@iam-protocol/pulse-sdk 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +52 -2
- package/dist/index.d.ts +52 -2
- package/dist/index.js +545 -67
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +540 -67
- package/dist/index.mjs.map +1 -1
- package/package.json +2 -1
- package/src/config.ts +1 -1
- package/src/extraction/kinematic.ts +171 -1
- package/src/extraction/lpc.ts +215 -0
- package/src/extraction/speaker.ts +361 -0
- package/src/hashing/simhash.ts +1 -1
- package/src/index.ts +2 -0
- package/src/pulse.ts +16 -5
- package/test/integration.test.ts +2 -2
- package/src/extraction/mfcc.ts +0 -113
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Speaker-dependent audio feature extraction.
|
|
3
|
+
*
|
|
4
|
+
* Extracts features that characterize HOW someone speaks (prosody, vocal physiology)
|
|
5
|
+
* rather than WHAT they say (phonetic content). These features are stable across
|
|
6
|
+
* different utterances from the same speaker.
|
|
7
|
+
*
|
|
8
|
+
* Output: 44 values
|
|
9
|
+
* F0 statistics (5) + F0 delta (4) + jitter (4) + shimmer (4) +
|
|
10
|
+
* HNR statistics (5) + formant ratios (8) + LTAS (8) + voicing ratio (1) +
|
|
11
|
+
* amplitude statistics (5)
|
|
12
|
+
*/
|
|
13
|
+
import type { AudioCapture } from "../sensor/types";
|
|
14
|
+
import { condense, entropy } from "./statistics";
|
|
15
|
+
import { extractFormantRatios } from "./lpc";
|
|
16
|
+
|
|
17
|
+
const FRAME_SIZE = 512; // ~32ms at 16kHz, power of 2 for FFT
|
|
18
|
+
const HOP_SIZE = 160; // ~10ms hop
|
|
19
|
+
const SPEAKER_FEATURE_COUNT = 44;
|
|
20
|
+
|
|
21
|
+
// Dynamic imports for browser compatibility
|
|
22
|
+
let pitchDetector: ((buf: Float32Array) => number | null) | null = null;
|
|
23
|
+
let meydaModule: any = null;
|
|
24
|
+
|
|
25
|
+
async function getPitchDetector(): Promise<(buf: Float32Array) => number | null> {
|
|
26
|
+
if (!pitchDetector) {
|
|
27
|
+
const PitchFinder = await import("pitchfinder");
|
|
28
|
+
pitchDetector = PitchFinder.YIN({ sampleRate: 16000 });
|
|
29
|
+
}
|
|
30
|
+
return pitchDetector;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function getMeyda(): Promise<any> {
|
|
34
|
+
if (!meydaModule) {
|
|
35
|
+
try {
|
|
36
|
+
meydaModule = await import("meyda");
|
|
37
|
+
} catch {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return meydaModule.default ?? meydaModule;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Detect F0 (fundamental frequency) contour and amplitude peaks per frame.
|
|
46
|
+
*/
|
|
47
|
+
async function detectF0Contour(
|
|
48
|
+
samples: Float32Array,
|
|
49
|
+
sampleRate: number
|
|
50
|
+
): Promise<{ f0: number[]; amplitudes: number[]; periods: number[] }> {
|
|
51
|
+
const detect = await getPitchDetector();
|
|
52
|
+
const f0: number[] = [];
|
|
53
|
+
const amplitudes: number[] = [];
|
|
54
|
+
const periods: number[] = [];
|
|
55
|
+
const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
|
|
56
|
+
|
|
57
|
+
for (let i = 0; i < numFrames; i++) {
|
|
58
|
+
const start = i * HOP_SIZE;
|
|
59
|
+
const frame = samples.slice(start, start + FRAME_SIZE);
|
|
60
|
+
|
|
61
|
+
// F0 detection
|
|
62
|
+
const pitch = detect(frame);
|
|
63
|
+
if (pitch && pitch > 50 && pitch < 600) {
|
|
64
|
+
f0.push(pitch);
|
|
65
|
+
periods.push(1 / pitch);
|
|
66
|
+
} else {
|
|
67
|
+
f0.push(0); // unvoiced frame
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// RMS amplitude per frame
|
|
71
|
+
let sum = 0;
|
|
72
|
+
for (let j = 0; j < frame.length; j++) {
|
|
73
|
+
sum += (frame[j] ?? 0) * (frame[j] ?? 0);
|
|
74
|
+
}
|
|
75
|
+
amplitudes.push(Math.sqrt(sum / frame.length));
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return { f0, amplitudes, periods };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Compute jitter measures from pitch period contour.
|
|
83
|
+
* Jitter = cycle-to-cycle perturbation of the fundamental period.
|
|
84
|
+
*/
|
|
85
|
+
function computeJitter(periods: number[]): number[] {
|
|
86
|
+
const voiced = periods.filter((p) => p > 0);
|
|
87
|
+
if (voiced.length < 3) return [0, 0, 0, 0];
|
|
88
|
+
|
|
89
|
+
const meanPeriod = voiced.reduce((a, b) => a + b, 0) / voiced.length;
|
|
90
|
+
if (meanPeriod === 0) return [0, 0, 0, 0];
|
|
91
|
+
|
|
92
|
+
// Jitter (local): average absolute difference between consecutive periods
|
|
93
|
+
let localSum = 0;
|
|
94
|
+
for (let i = 1; i < voiced.length; i++) {
|
|
95
|
+
localSum += Math.abs(voiced[i]! - voiced[i - 1]!);
|
|
96
|
+
}
|
|
97
|
+
const jitterLocal = localSum / (voiced.length - 1) / meanPeriod;
|
|
98
|
+
|
|
99
|
+
// RAP: Relative Average Perturbation (3-point running average)
|
|
100
|
+
let rapSum = 0;
|
|
101
|
+
for (let i = 1; i < voiced.length - 1; i++) {
|
|
102
|
+
const avg3 = (voiced[i - 1]! + voiced[i]! + voiced[i + 1]!) / 3;
|
|
103
|
+
rapSum += Math.abs(voiced[i]! - avg3);
|
|
104
|
+
}
|
|
105
|
+
const jitterRAP = voiced.length > 2 ? rapSum / (voiced.length - 2) / meanPeriod : 0;
|
|
106
|
+
|
|
107
|
+
// PPQ5: Five-Point Period Perturbation Quotient
|
|
108
|
+
let ppq5Sum = 0;
|
|
109
|
+
let ppq5Count = 0;
|
|
110
|
+
for (let i = 2; i < voiced.length - 2; i++) {
|
|
111
|
+
const avg5 = (voiced[i - 2]! + voiced[i - 1]! + voiced[i]! + voiced[i + 1]! + voiced[i + 2]!) / 5;
|
|
112
|
+
ppq5Sum += Math.abs(voiced[i]! - avg5);
|
|
113
|
+
ppq5Count++;
|
|
114
|
+
}
|
|
115
|
+
const jitterPPQ5 = ppq5Count > 0 ? ppq5Sum / ppq5Count / meanPeriod : 0;
|
|
116
|
+
|
|
117
|
+
// DDP: Difference of Differences of Periods
|
|
118
|
+
let ddpSum = 0;
|
|
119
|
+
for (let i = 1; i < voiced.length - 1; i++) {
|
|
120
|
+
const d1 = voiced[i]! - voiced[i - 1]!;
|
|
121
|
+
const d2 = voiced[i + 1]! - voiced[i]!;
|
|
122
|
+
ddpSum += Math.abs(d2 - d1);
|
|
123
|
+
}
|
|
124
|
+
const jitterDDP = voiced.length > 2 ? ddpSum / (voiced.length - 2) / meanPeriod : 0;
|
|
125
|
+
|
|
126
|
+
return [jitterLocal, jitterRAP, jitterPPQ5, jitterDDP];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Compute shimmer measures from amplitude peaks.
|
|
131
|
+
* Shimmer = cycle-to-cycle amplitude perturbation.
|
|
132
|
+
*/
|
|
133
|
+
function computeShimmer(amplitudes: number[], f0: number[]): number[] {
|
|
134
|
+
// Use amplitudes only at voiced frames
|
|
135
|
+
const voicedAmps = amplitudes.filter((_, i) => f0[i]! > 0);
|
|
136
|
+
if (voicedAmps.length < 3) return [0, 0, 0, 0];
|
|
137
|
+
|
|
138
|
+
const meanAmp = voicedAmps.reduce((a, b) => a + b, 0) / voicedAmps.length;
|
|
139
|
+
if (meanAmp === 0) return [0, 0, 0, 0];
|
|
140
|
+
|
|
141
|
+
// Shimmer (local)
|
|
142
|
+
let localSum = 0;
|
|
143
|
+
for (let i = 1; i < voicedAmps.length; i++) {
|
|
144
|
+
localSum += Math.abs(voicedAmps[i]! - voicedAmps[i - 1]!);
|
|
145
|
+
}
|
|
146
|
+
const shimmerLocal = localSum / (voicedAmps.length - 1) / meanAmp;
|
|
147
|
+
|
|
148
|
+
// APQ3: 3-point Amplitude Perturbation Quotient
|
|
149
|
+
let apq3Sum = 0;
|
|
150
|
+
for (let i = 1; i < voicedAmps.length - 1; i++) {
|
|
151
|
+
const avg3 = (voicedAmps[i - 1]! + voicedAmps[i]! + voicedAmps[i + 1]!) / 3;
|
|
152
|
+
apq3Sum += Math.abs(voicedAmps[i]! - avg3);
|
|
153
|
+
}
|
|
154
|
+
const shimmerAPQ3 = voicedAmps.length > 2 ? apq3Sum / (voicedAmps.length - 2) / meanAmp : 0;
|
|
155
|
+
|
|
156
|
+
// APQ5
|
|
157
|
+
let apq5Sum = 0;
|
|
158
|
+
let apq5Count = 0;
|
|
159
|
+
for (let i = 2; i < voicedAmps.length - 2; i++) {
|
|
160
|
+
const avg5 = (voicedAmps[i - 2]! + voicedAmps[i - 1]! + voicedAmps[i]! + voicedAmps[i + 1]! + voicedAmps[i + 2]!) / 5;
|
|
161
|
+
apq5Sum += Math.abs(voicedAmps[i]! - avg5);
|
|
162
|
+
apq5Count++;
|
|
163
|
+
}
|
|
164
|
+
const shimmerAPQ5 = apq5Count > 0 ? apq5Sum / apq5Count / meanAmp : 0;
|
|
165
|
+
|
|
166
|
+
// DDA: Difference of Differences of Amplitudes
|
|
167
|
+
let ddaSum = 0;
|
|
168
|
+
for (let i = 1; i < voicedAmps.length - 1; i++) {
|
|
169
|
+
const d1 = voicedAmps[i]! - voicedAmps[i - 1]!;
|
|
170
|
+
const d2 = voicedAmps[i + 1]! - voicedAmps[i]!;
|
|
171
|
+
ddaSum += Math.abs(d2 - d1);
|
|
172
|
+
}
|
|
173
|
+
const shimmerDDA = voicedAmps.length > 2 ? ddaSum / (voicedAmps.length - 2) / meanAmp : 0;
|
|
174
|
+
|
|
175
|
+
return [shimmerLocal, shimmerAPQ3, shimmerAPQ5, shimmerDDA];
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Compute Harmonic-to-Noise Ratio per frame using autocorrelation.
|
|
180
|
+
*/
|
|
181
|
+
function computeHNR(
|
|
182
|
+
samples: Float32Array,
|
|
183
|
+
sampleRate: number,
|
|
184
|
+
f0Contour: number[]
|
|
185
|
+
): number[] {
|
|
186
|
+
const hnr: number[] = [];
|
|
187
|
+
const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
|
|
188
|
+
|
|
189
|
+
for (let i = 0; i < numFrames && i < f0Contour.length; i++) {
|
|
190
|
+
const f0 = f0Contour[i]!;
|
|
191
|
+
if (f0 <= 0) continue; // Skip unvoiced frames
|
|
192
|
+
|
|
193
|
+
const start = i * HOP_SIZE;
|
|
194
|
+
const frame = samples.slice(start, start + FRAME_SIZE);
|
|
195
|
+
const period = Math.round(sampleRate / f0);
|
|
196
|
+
|
|
197
|
+
if (period <= 0 || period >= frame.length) continue;
|
|
198
|
+
|
|
199
|
+
// Autocorrelation at the fundamental period
|
|
200
|
+
let num = 0;
|
|
201
|
+
let den = 0;
|
|
202
|
+
for (let j = 0; j < frame.length - period; j++) {
|
|
203
|
+
num += (frame[j] ?? 0) * (frame[j + period] ?? 0);
|
|
204
|
+
den += (frame[j] ?? 0) * (frame[j] ?? 0);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (den > 0) {
|
|
208
|
+
const r = num / den;
|
|
209
|
+
const clampedR = Math.max(0.001, Math.min(0.999, r));
|
|
210
|
+
hnr.push(10 * Math.log10(clampedR / (1 - clampedR)));
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return hnr;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Compute LTAS (Long-Term Average Spectrum) features using Meyda.
|
|
219
|
+
* Returns 8 values: spectral centroid, rolloff, flatness, spread — each mean + variance.
|
|
220
|
+
*/
|
|
221
|
+
async function computeLTAS(
|
|
222
|
+
samples: Float32Array,
|
|
223
|
+
sampleRate: number
|
|
224
|
+
): Promise<number[]> {
|
|
225
|
+
const Meyda = await getMeyda();
|
|
226
|
+
if (!Meyda) return new Array(8).fill(0);
|
|
227
|
+
|
|
228
|
+
const centroids: number[] = [];
|
|
229
|
+
const rolloffs: number[] = [];
|
|
230
|
+
const flatnesses: number[] = [];
|
|
231
|
+
const spreads: number[] = [];
|
|
232
|
+
const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
|
|
233
|
+
|
|
234
|
+
for (let i = 0; i < numFrames; i++) {
|
|
235
|
+
const start = i * HOP_SIZE;
|
|
236
|
+
const frame = samples.slice(start, start + FRAME_SIZE);
|
|
237
|
+
const paddedFrame = new Float32Array(FRAME_SIZE);
|
|
238
|
+
paddedFrame.set(frame);
|
|
239
|
+
|
|
240
|
+
const features = Meyda.extract(
|
|
241
|
+
["spectralCentroid", "spectralRolloff", "spectralFlatness", "spectralSpread"],
|
|
242
|
+
paddedFrame,
|
|
243
|
+
{ sampleRate, bufferSize: FRAME_SIZE }
|
|
244
|
+
);
|
|
245
|
+
|
|
246
|
+
if (features) {
|
|
247
|
+
if (typeof features.spectralCentroid === "number") centroids.push(features.spectralCentroid);
|
|
248
|
+
if (typeof features.spectralRolloff === "number") rolloffs.push(features.spectralRolloff);
|
|
249
|
+
if (typeof features.spectralFlatness === "number") flatnesses.push(features.spectralFlatness);
|
|
250
|
+
if (typeof features.spectralSpread === "number") spreads.push(features.spectralSpread);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const m = (arr: number[]) => arr.length > 0 ? arr.reduce((a, b) => a + b, 0) / arr.length : 0;
|
|
255
|
+
const v = (arr: number[]) => {
|
|
256
|
+
if (arr.length < 2) return 0;
|
|
257
|
+
const mu = m(arr);
|
|
258
|
+
return arr.reduce((sum, x) => sum + (x - mu) * (x - mu), 0) / (arr.length - 1);
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
return [
|
|
262
|
+
m(centroids), v(centroids),
|
|
263
|
+
m(rolloffs), v(rolloffs),
|
|
264
|
+
m(flatnesses), v(flatnesses),
|
|
265
|
+
m(spreads), v(spreads),
|
|
266
|
+
];
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Compute derivative (frame-to-frame differences) of a time series.
|
|
271
|
+
*/
|
|
272
|
+
function derivative(values: number[]): number[] {
|
|
273
|
+
const d: number[] = [];
|
|
274
|
+
for (let i = 1; i < values.length; i++) {
|
|
275
|
+
d.push(values[i]! - values[i - 1]!);
|
|
276
|
+
}
|
|
277
|
+
return d;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Extract speaker-dependent audio features.
|
|
282
|
+
*
|
|
283
|
+
* Captures physiological vocal characteristics (F0, jitter, shimmer, HNR, formant
|
|
284
|
+
* ratios) that are stable across different utterances from the same speaker.
|
|
285
|
+
* Content-independent by design — different phrases produce similar feature values.
|
|
286
|
+
*
|
|
287
|
+
* Returns 44 values.
|
|
288
|
+
*/
|
|
289
|
+
export async function extractSpeakerFeatures(audio: AudioCapture): Promise<number[]> {
|
|
290
|
+
const { samples, sampleRate } = audio;
|
|
291
|
+
|
|
292
|
+
const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
|
|
293
|
+
if (numFrames < 5) {
|
|
294
|
+
console.warn(`[IAM SDK] Too few audio frames (${numFrames}). Speaker features will be zeros.`);
|
|
295
|
+
return new Array(SPEAKER_FEATURE_COUNT).fill(0);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// 1. F0 detection + amplitude contour
|
|
299
|
+
const { f0, amplitudes, periods } = await detectF0Contour(samples, sampleRate);
|
|
300
|
+
|
|
301
|
+
const voicedF0 = f0.filter((v) => v > 0);
|
|
302
|
+
const voicedRatio = voicedF0.length / f0.length;
|
|
303
|
+
|
|
304
|
+
// 2. F0 statistics (5 values)
|
|
305
|
+
const f0Stats = condense(voicedF0);
|
|
306
|
+
const f0Entropy = entropy(voicedF0);
|
|
307
|
+
const f0Features = [f0Stats.mean, f0Stats.variance, f0Stats.skewness, f0Stats.kurtosis, f0Entropy];
|
|
308
|
+
|
|
309
|
+
// 3. F0 delta statistics (4 values)
|
|
310
|
+
const f0Delta = derivative(voicedF0);
|
|
311
|
+
const f0DeltaStats = condense(f0Delta);
|
|
312
|
+
const f0DeltaFeatures = [f0DeltaStats.mean, f0DeltaStats.variance, f0DeltaStats.skewness, f0DeltaStats.kurtosis];
|
|
313
|
+
|
|
314
|
+
// 4. Jitter (4 values)
|
|
315
|
+
const jitterFeatures = computeJitter(periods);
|
|
316
|
+
|
|
317
|
+
// 5. Shimmer (4 values)
|
|
318
|
+
const shimmerFeatures = computeShimmer(amplitudes, f0);
|
|
319
|
+
|
|
320
|
+
// 6. HNR statistics (5 values)
|
|
321
|
+
const hnrValues = computeHNR(samples, sampleRate, f0);
|
|
322
|
+
const hnrStats = condense(hnrValues);
|
|
323
|
+
const hnrEntropy = entropy(hnrValues);
|
|
324
|
+
const hnrFeatures = [hnrStats.mean, hnrStats.variance, hnrStats.skewness, hnrStats.kurtosis, hnrEntropy];
|
|
325
|
+
|
|
326
|
+
// 7. Formant ratios (8 values)
|
|
327
|
+
const { f1f2, f2f3 } = extractFormantRatios(samples, sampleRate, FRAME_SIZE, HOP_SIZE);
|
|
328
|
+
const f1f2Stats = condense(f1f2);
|
|
329
|
+
const f2f3Stats = condense(f2f3);
|
|
330
|
+
const formantFeatures = [
|
|
331
|
+
f1f2Stats.mean, f1f2Stats.variance, f1f2Stats.skewness, f1f2Stats.kurtosis,
|
|
332
|
+
f2f3Stats.mean, f2f3Stats.variance, f2f3Stats.skewness, f2f3Stats.kurtosis,
|
|
333
|
+
];
|
|
334
|
+
|
|
335
|
+
// 8. LTAS (8 values)
|
|
336
|
+
const ltasFeatures = await computeLTAS(samples, sampleRate);
|
|
337
|
+
|
|
338
|
+
// 9. Voicing ratio (1 value)
|
|
339
|
+
const voicingFeatures = [voicedRatio];
|
|
340
|
+
|
|
341
|
+
// 10. Amplitude statistics (5 values)
|
|
342
|
+
const ampStats = condense(amplitudes);
|
|
343
|
+
const ampEntropy = entropy(amplitudes);
|
|
344
|
+
const ampFeatures = [ampStats.mean, ampStats.variance, ampStats.skewness, ampStats.kurtosis, ampEntropy];
|
|
345
|
+
|
|
346
|
+
const features = [
|
|
347
|
+
...f0Features, // 5
|
|
348
|
+
...f0DeltaFeatures, // 4
|
|
349
|
+
...jitterFeatures, // 4
|
|
350
|
+
...shimmerFeatures, // 4
|
|
351
|
+
...hnrFeatures, // 5
|
|
352
|
+
...formantFeatures, // 8
|
|
353
|
+
...ltasFeatures, // 8
|
|
354
|
+
...voicingFeatures, // 1
|
|
355
|
+
...ampFeatures, // 5
|
|
356
|
+
]; // = 44
|
|
357
|
+
|
|
358
|
+
return features;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
export { SPEAKER_FEATURE_COUNT };
|
package/src/hashing/simhash.ts
CHANGED
|
@@ -52,7 +52,7 @@ function getHyperplanes(dimension: number): number[][] {
|
|
|
52
52
|
* Uses deterministic random hyperplanes seeded from the protocol constant.
|
|
53
53
|
* Similar feature vectors produce fingerprints with low Hamming distance.
|
|
54
54
|
*/
|
|
55
|
-
const EXPECTED_FEATURE_DIMENSION =
|
|
55
|
+
const EXPECTED_FEATURE_DIMENSION = 134; // 44 speaker + 54 motion/mouse + 36 touch
|
|
56
56
|
|
|
57
57
|
export function simhash(features: number[]): TemporalFingerprint {
|
|
58
58
|
if (features.length === 0) {
|
package/src/index.ts
CHANGED
|
@@ -19,6 +19,8 @@ export {
|
|
|
19
19
|
// Feature extraction
|
|
20
20
|
export type { StatsSummary, FeatureVector, FusedFeatureVector } from "./extraction/types";
|
|
21
21
|
export { mean, variance, skewness, kurtosis, condense, entropy, autocorrelation, fuseFeatures } from "./extraction/statistics";
|
|
22
|
+
export { extractSpeakerFeatures, SPEAKER_FEATURE_COUNT } from "./extraction/speaker";
|
|
23
|
+
export { extractMotionFeatures, extractTouchFeatures, extractMouseDynamics } from "./extraction/kinematic";
|
|
22
24
|
|
|
23
25
|
// Proof generation
|
|
24
26
|
export type { SolanaProof, CircuitInput, ProofResult } from "./proof/types";
|
package/src/pulse.ts
CHANGED
|
@@ -9,13 +9,14 @@ import type { StoredVerificationData } from "./identity/types";
|
|
|
9
9
|
import { captureAudio } from "./sensor/audio";
|
|
10
10
|
import { captureMotion } from "./sensor/motion";
|
|
11
11
|
import { captureTouch } from "./sensor/touch";
|
|
12
|
-
import {
|
|
12
|
+
import { extractSpeakerFeatures, SPEAKER_FEATURE_COUNT } from "./extraction/speaker";
|
|
13
13
|
import {
|
|
14
14
|
extractMotionFeatures,
|
|
15
15
|
extractTouchFeatures,
|
|
16
|
+
extractMouseDynamics,
|
|
16
17
|
} from "./extraction/kinematic";
|
|
17
18
|
import { fuseFeatures } from "./extraction/statistics";
|
|
18
|
-
import { simhash } from "./hashing/simhash";
|
|
19
|
+
import { simhash, hammingDistance } from "./hashing/simhash";
|
|
19
20
|
import { generateTBH, bigintToBytes32 } from "./hashing/poseidon";
|
|
20
21
|
import { prepareCircuitInput, generateProof } from "./proof/prover";
|
|
21
22
|
import { serializeProof } from "./proof/serializer";
|
|
@@ -34,9 +35,14 @@ type ResolvedConfig = Required<Pick<PulseConfig, "cluster" | "threshold">> &
|
|
|
34
35
|
*/
|
|
35
36
|
async function extractFeatures(data: SensorData): Promise<number[]> {
|
|
36
37
|
const audioFeatures = data.audio
|
|
37
|
-
? await
|
|
38
|
-
: new Array(
|
|
39
|
-
|
|
38
|
+
? await extractSpeakerFeatures(data.audio)
|
|
39
|
+
: new Array(SPEAKER_FEATURE_COUNT).fill(0);
|
|
40
|
+
|
|
41
|
+
const hasMotion = data.motion.length >= MIN_MOTION_SAMPLES;
|
|
42
|
+
const motionFeatures = hasMotion
|
|
43
|
+
? extractMotionFeatures(data.motion)
|
|
44
|
+
: extractMouseDynamics(data.touch);
|
|
45
|
+
|
|
40
46
|
const touchFeatures = extractTouchFeatures(data.touch);
|
|
41
47
|
return fuseFeatures(audioFeatures, motionFeatures, touchFeatures);
|
|
42
48
|
}
|
|
@@ -107,6 +113,11 @@ async function processSensorData(
|
|
|
107
113
|
commitmentBytes: bigintToBytes32(BigInt(previousData.commitment)),
|
|
108
114
|
};
|
|
109
115
|
|
|
116
|
+
const distance = hammingDistance(fingerprint, previousData.fingerprint);
|
|
117
|
+
console.log(
|
|
118
|
+
`[IAM SDK] Re-verification: Hamming distance = ${distance} / 256 bits (threshold = ${config.threshold})`
|
|
119
|
+
);
|
|
120
|
+
|
|
110
121
|
const circuitInput = prepareCircuitInput(
|
|
111
122
|
tbh,
|
|
112
123
|
previousTBH,
|
package/test/integration.test.ts
CHANGED
|
@@ -29,8 +29,8 @@ describe.skipIf(!circuitArtifactsExist)(
|
|
|
29
29
|
"integration: full crypto pipeline",
|
|
30
30
|
() => {
|
|
31
31
|
it("generates a valid proof from mock features end-to-end", async () => {
|
|
32
|
-
// 1. Create mock feature vector (~
|
|
33
|
-
const features = Array.from({ length:
|
|
32
|
+
// 1. Create mock feature vector (~134 random values: 44 speaker + 54 motion/mouse + 36 touch)
|
|
33
|
+
const features = Array.from({ length: 134 }, (_, i) =>
|
|
34
34
|
Math.sin(i * 0.3) * Math.cos(i * 0.7)
|
|
35
35
|
);
|
|
36
36
|
|
package/src/extraction/mfcc.ts
DELETED
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
import type { AudioCapture } from "../sensor/types";
|
|
2
|
-
import { condense, entropy } from "./statistics";
|
|
3
|
-
|
|
4
|
-
// Frame parameters matching the research paper spec
|
|
5
|
-
const FRAME_SIZE = 512; // ~32ms at 16kHz (must be power of 2 for Meyda FFT)
|
|
6
|
-
const HOP_SIZE = 160; // 10ms hop
|
|
7
|
-
const NUM_MFCC = 13;
|
|
8
|
-
|
|
9
|
-
// Dynamic import cache for Meyda (works in both browser and Node.js)
|
|
10
|
-
let meydaModule: any = null;
|
|
11
|
-
|
|
12
|
-
async function getMeyda(): Promise<any> {
|
|
13
|
-
if (!meydaModule) {
|
|
14
|
-
try {
|
|
15
|
-
meydaModule = await import("meyda");
|
|
16
|
-
} catch {
|
|
17
|
-
return null;
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
return meydaModule.default ?? meydaModule;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Extract MFCC features from audio data.
|
|
25
|
-
* Computes 13 MFCCs per frame, plus delta and delta-delta coefficients,
|
|
26
|
-
* then condenses each coefficient's time series into 4 statistics.
|
|
27
|
-
*
|
|
28
|
-
* Returns: 13 coefficients × 3 (raw + delta + delta-delta) × 4 stats + 13 entropy values = 169 values
|
|
29
|
-
*/
|
|
30
|
-
export async function extractMFCC(audio: AudioCapture): Promise<number[]> {
|
|
31
|
-
const { samples, sampleRate } = audio;
|
|
32
|
-
|
|
33
|
-
const Meyda = await getMeyda();
|
|
34
|
-
if (!Meyda) {
|
|
35
|
-
// Meyda genuinely unavailable — this is a real problem, not a silent fallback
|
|
36
|
-
console.warn("[IAM SDK] Meyda library failed to load. Audio features will be zeros.");
|
|
37
|
-
return new Array(NUM_MFCC * 3 * 4 + NUM_MFCC).fill(0);
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// Extract MFCCs per frame
|
|
41
|
-
const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
|
|
42
|
-
if (numFrames < 3) {
|
|
43
|
-
console.warn(`[IAM SDK] Too few audio frames (${numFrames}). Need at least 3.`);
|
|
44
|
-
return new Array(NUM_MFCC * 3 * 4 + NUM_MFCC).fill(0);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
const mfccFrames: number[][] = [];
|
|
48
|
-
|
|
49
|
-
for (let i = 0; i < numFrames; i++) {
|
|
50
|
-
const start = i * HOP_SIZE;
|
|
51
|
-
const frame = samples.slice(start, start + FRAME_SIZE);
|
|
52
|
-
|
|
53
|
-
// Pad if frame is shorter than expected
|
|
54
|
-
const paddedFrame = new Float32Array(FRAME_SIZE);
|
|
55
|
-
paddedFrame.set(frame);
|
|
56
|
-
|
|
57
|
-
const features = Meyda.extract(["mfcc"], paddedFrame, {
|
|
58
|
-
sampleRate,
|
|
59
|
-
bufferSize: FRAME_SIZE,
|
|
60
|
-
numberOfMFCCCoefficients: NUM_MFCC,
|
|
61
|
-
});
|
|
62
|
-
|
|
63
|
-
if (features?.mfcc) {
|
|
64
|
-
mfccFrames.push(features.mfcc);
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
if (mfccFrames.length < 3) return new Array(NUM_MFCC * 3 * 4 + NUM_MFCC).fill(0);
|
|
69
|
-
|
|
70
|
-
// Compute delta (1st derivative) and delta-delta (2nd derivative)
|
|
71
|
-
const deltaFrames = computeDeltas(mfccFrames);
|
|
72
|
-
const deltaDeltaFrames = computeDeltas(deltaFrames);
|
|
73
|
-
|
|
74
|
-
// Condense each coefficient across all frames into 4 statistics
|
|
75
|
-
const features: number[] = [];
|
|
76
|
-
|
|
77
|
-
for (let c = 0; c < NUM_MFCC; c++) {
|
|
78
|
-
const raw = mfccFrames.map((f) => f[c] ?? 0);
|
|
79
|
-
const stats = condense(raw);
|
|
80
|
-
features.push(stats.mean, stats.variance, stats.skewness, stats.kurtosis);
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
for (let c = 0; c < NUM_MFCC; c++) {
|
|
84
|
-
const delta = deltaFrames.map((f) => f[c] ?? 0);
|
|
85
|
-
const stats = condense(delta);
|
|
86
|
-
features.push(stats.mean, stats.variance, stats.skewness, stats.kurtosis);
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
for (let c = 0; c < NUM_MFCC; c++) {
|
|
90
|
-
const dd = deltaDeltaFrames.map((f) => f[c] ?? 0);
|
|
91
|
-
const stats = condense(dd);
|
|
92
|
-
features.push(stats.mean, stats.variance, stats.skewness, stats.kurtosis);
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
// Entropy per MFCC coefficient
|
|
96
|
-
for (let c = 0; c < NUM_MFCC; c++) {
|
|
97
|
-
const raw = mfccFrames.map((f) => f[c] ?? 0);
|
|
98
|
-
features.push(entropy(raw));
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
return features;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/** Compute delta coefficients (frame-to-frame differences) */
|
|
105
|
-
function computeDeltas(frames: number[][]): number[][] {
|
|
106
|
-
const deltas: number[][] = [];
|
|
107
|
-
for (let i = 1; i < frames.length; i++) {
|
|
108
|
-
const prev = frames[i - 1]!;
|
|
109
|
-
const curr = frames[i]!;
|
|
110
|
-
deltas.push(curr.map((v, j) => v - (prev[j] ?? 0)));
|
|
111
|
-
}
|
|
112
|
-
return deltas;
|
|
113
|
-
}
|