@iam-protocol/pulse-sdk 0.2.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,361 @@
1
+ /**
2
+ * Speaker-dependent audio feature extraction.
3
+ *
4
+ * Extracts features that characterize HOW someone speaks (prosody, vocal physiology)
5
+ * rather than WHAT they say (phonetic content). These features are stable across
6
+ * different utterances from the same speaker.
7
+ *
8
+ * Output: 44 values
9
+ * F0 statistics (5) + F0 delta (4) + jitter (4) + shimmer (4) +
10
+ * HNR statistics (5) + formant ratios (8) + LTAS (8) + voicing ratio (1) +
11
+ * amplitude statistics (5)
12
+ */
13
+ import type { AudioCapture } from "../sensor/types";
14
+ import { condense, entropy } from "./statistics";
15
+ import { extractFormantRatios } from "./lpc";
16
+
17
+ const FRAME_SIZE = 512; // ~32ms at 16kHz, power of 2 for FFT
18
+ const HOP_SIZE = 160; // ~10ms hop
19
+ const SPEAKER_FEATURE_COUNT = 44;
20
+
21
+ // Dynamic imports for browser compatibility
22
+ let pitchDetector: ((buf: Float32Array) => number | null) | null = null;
23
+ let meydaModule: any = null;
24
+
25
+ async function getPitchDetector(): Promise<(buf: Float32Array) => number | null> {
26
+ if (!pitchDetector) {
27
+ const PitchFinder = await import("pitchfinder");
28
+ pitchDetector = PitchFinder.YIN({ sampleRate: 16000 });
29
+ }
30
+ return pitchDetector;
31
+ }
32
+
33
+ async function getMeyda(): Promise<any> {
34
+ if (!meydaModule) {
35
+ try {
36
+ meydaModule = await import("meyda");
37
+ } catch {
38
+ return null;
39
+ }
40
+ }
41
+ return meydaModule.default ?? meydaModule;
42
+ }
43
+
44
+ /**
45
+ * Detect F0 (fundamental frequency) contour and amplitude peaks per frame.
46
+ */
47
+ async function detectF0Contour(
48
+ samples: Float32Array,
49
+ sampleRate: number
50
+ ): Promise<{ f0: number[]; amplitudes: number[]; periods: number[] }> {
51
+ const detect = await getPitchDetector();
52
+ const f0: number[] = [];
53
+ const amplitudes: number[] = [];
54
+ const periods: number[] = [];
55
+ const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
56
+
57
+ for (let i = 0; i < numFrames; i++) {
58
+ const start = i * HOP_SIZE;
59
+ const frame = samples.slice(start, start + FRAME_SIZE);
60
+
61
+ // F0 detection
62
+ const pitch = detect(frame);
63
+ if (pitch && pitch > 50 && pitch < 600) {
64
+ f0.push(pitch);
65
+ periods.push(1 / pitch);
66
+ } else {
67
+ f0.push(0); // unvoiced frame
68
+ }
69
+
70
+ // RMS amplitude per frame
71
+ let sum = 0;
72
+ for (let j = 0; j < frame.length; j++) {
73
+ sum += (frame[j] ?? 0) * (frame[j] ?? 0);
74
+ }
75
+ amplitudes.push(Math.sqrt(sum / frame.length));
76
+ }
77
+
78
+ return { f0, amplitudes, periods };
79
+ }
80
+
81
+ /**
82
+ * Compute jitter measures from pitch period contour.
83
+ * Jitter = cycle-to-cycle perturbation of the fundamental period.
84
+ */
85
+ function computeJitter(periods: number[]): number[] {
86
+ const voiced = periods.filter((p) => p > 0);
87
+ if (voiced.length < 3) return [0, 0, 0, 0];
88
+
89
+ const meanPeriod = voiced.reduce((a, b) => a + b, 0) / voiced.length;
90
+ if (meanPeriod === 0) return [0, 0, 0, 0];
91
+
92
+ // Jitter (local): average absolute difference between consecutive periods
93
+ let localSum = 0;
94
+ for (let i = 1; i < voiced.length; i++) {
95
+ localSum += Math.abs(voiced[i]! - voiced[i - 1]!);
96
+ }
97
+ const jitterLocal = localSum / (voiced.length - 1) / meanPeriod;
98
+
99
+ // RAP: Relative Average Perturbation (3-point running average)
100
+ let rapSum = 0;
101
+ for (let i = 1; i < voiced.length - 1; i++) {
102
+ const avg3 = (voiced[i - 1]! + voiced[i]! + voiced[i + 1]!) / 3;
103
+ rapSum += Math.abs(voiced[i]! - avg3);
104
+ }
105
+ const jitterRAP = voiced.length > 2 ? rapSum / (voiced.length - 2) / meanPeriod : 0;
106
+
107
+ // PPQ5: Five-Point Period Perturbation Quotient
108
+ let ppq5Sum = 0;
109
+ let ppq5Count = 0;
110
+ for (let i = 2; i < voiced.length - 2; i++) {
111
+ const avg5 = (voiced[i - 2]! + voiced[i - 1]! + voiced[i]! + voiced[i + 1]! + voiced[i + 2]!) / 5;
112
+ ppq5Sum += Math.abs(voiced[i]! - avg5);
113
+ ppq5Count++;
114
+ }
115
+ const jitterPPQ5 = ppq5Count > 0 ? ppq5Sum / ppq5Count / meanPeriod : 0;
116
+
117
+ // DDP: Difference of Differences of Periods
118
+ let ddpSum = 0;
119
+ for (let i = 1; i < voiced.length - 1; i++) {
120
+ const d1 = voiced[i]! - voiced[i - 1]!;
121
+ const d2 = voiced[i + 1]! - voiced[i]!;
122
+ ddpSum += Math.abs(d2 - d1);
123
+ }
124
+ const jitterDDP = voiced.length > 2 ? ddpSum / (voiced.length - 2) / meanPeriod : 0;
125
+
126
+ return [jitterLocal, jitterRAP, jitterPPQ5, jitterDDP];
127
+ }
128
+
129
+ /**
130
+ * Compute shimmer measures from amplitude peaks.
131
+ * Shimmer = cycle-to-cycle amplitude perturbation.
132
+ */
133
+ function computeShimmer(amplitudes: number[], f0: number[]): number[] {
134
+ // Use amplitudes only at voiced frames
135
+ const voicedAmps = amplitudes.filter((_, i) => f0[i]! > 0);
136
+ if (voicedAmps.length < 3) return [0, 0, 0, 0];
137
+
138
+ const meanAmp = voicedAmps.reduce((a, b) => a + b, 0) / voicedAmps.length;
139
+ if (meanAmp === 0) return [0, 0, 0, 0];
140
+
141
+ // Shimmer (local)
142
+ let localSum = 0;
143
+ for (let i = 1; i < voicedAmps.length; i++) {
144
+ localSum += Math.abs(voicedAmps[i]! - voicedAmps[i - 1]!);
145
+ }
146
+ const shimmerLocal = localSum / (voicedAmps.length - 1) / meanAmp;
147
+
148
+ // APQ3: 3-point Amplitude Perturbation Quotient
149
+ let apq3Sum = 0;
150
+ for (let i = 1; i < voicedAmps.length - 1; i++) {
151
+ const avg3 = (voicedAmps[i - 1]! + voicedAmps[i]! + voicedAmps[i + 1]!) / 3;
152
+ apq3Sum += Math.abs(voicedAmps[i]! - avg3);
153
+ }
154
+ const shimmerAPQ3 = voicedAmps.length > 2 ? apq3Sum / (voicedAmps.length - 2) / meanAmp : 0;
155
+
156
+ // APQ5
157
+ let apq5Sum = 0;
158
+ let apq5Count = 0;
159
+ for (let i = 2; i < voicedAmps.length - 2; i++) {
160
+ const avg5 = (voicedAmps[i - 2]! + voicedAmps[i - 1]! + voicedAmps[i]! + voicedAmps[i + 1]! + voicedAmps[i + 2]!) / 5;
161
+ apq5Sum += Math.abs(voicedAmps[i]! - avg5);
162
+ apq5Count++;
163
+ }
164
+ const shimmerAPQ5 = apq5Count > 0 ? apq5Sum / apq5Count / meanAmp : 0;
165
+
166
+ // DDA: Difference of Differences of Amplitudes
167
+ let ddaSum = 0;
168
+ for (let i = 1; i < voicedAmps.length - 1; i++) {
169
+ const d1 = voicedAmps[i]! - voicedAmps[i - 1]!;
170
+ const d2 = voicedAmps[i + 1]! - voicedAmps[i]!;
171
+ ddaSum += Math.abs(d2 - d1);
172
+ }
173
+ const shimmerDDA = voicedAmps.length > 2 ? ddaSum / (voicedAmps.length - 2) / meanAmp : 0;
174
+
175
+ return [shimmerLocal, shimmerAPQ3, shimmerAPQ5, shimmerDDA];
176
+ }
177
+
178
+ /**
179
+ * Compute Harmonic-to-Noise Ratio per frame using autocorrelation.
180
+ */
181
+ function computeHNR(
182
+ samples: Float32Array,
183
+ sampleRate: number,
184
+ f0Contour: number[]
185
+ ): number[] {
186
+ const hnr: number[] = [];
187
+ const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
188
+
189
+ for (let i = 0; i < numFrames && i < f0Contour.length; i++) {
190
+ const f0 = f0Contour[i]!;
191
+ if (f0 <= 0) continue; // Skip unvoiced frames
192
+
193
+ const start = i * HOP_SIZE;
194
+ const frame = samples.slice(start, start + FRAME_SIZE);
195
+ const period = Math.round(sampleRate / f0);
196
+
197
+ if (period <= 0 || period >= frame.length) continue;
198
+
199
+ // Autocorrelation at the fundamental period
200
+ let num = 0;
201
+ let den = 0;
202
+ for (let j = 0; j < frame.length - period; j++) {
203
+ num += (frame[j] ?? 0) * (frame[j + period] ?? 0);
204
+ den += (frame[j] ?? 0) * (frame[j] ?? 0);
205
+ }
206
+
207
+ if (den > 0) {
208
+ const r = num / den;
209
+ const clampedR = Math.max(0.001, Math.min(0.999, r));
210
+ hnr.push(10 * Math.log10(clampedR / (1 - clampedR)));
211
+ }
212
+ }
213
+
214
+ return hnr;
215
+ }
216
+
217
+ /**
218
+ * Compute LTAS (Long-Term Average Spectrum) features using Meyda.
219
+ * Returns 8 values: spectral centroid, rolloff, flatness, spread — each mean + variance.
220
+ */
221
+ async function computeLTAS(
222
+ samples: Float32Array,
223
+ sampleRate: number
224
+ ): Promise<number[]> {
225
+ const Meyda = await getMeyda();
226
+ if (!Meyda) return new Array(8).fill(0);
227
+
228
+ const centroids: number[] = [];
229
+ const rolloffs: number[] = [];
230
+ const flatnesses: number[] = [];
231
+ const spreads: number[] = [];
232
+ const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
233
+
234
+ for (let i = 0; i < numFrames; i++) {
235
+ const start = i * HOP_SIZE;
236
+ const frame = samples.slice(start, start + FRAME_SIZE);
237
+ const paddedFrame = new Float32Array(FRAME_SIZE);
238
+ paddedFrame.set(frame);
239
+
240
+ const features = Meyda.extract(
241
+ ["spectralCentroid", "spectralRolloff", "spectralFlatness", "spectralSpread"],
242
+ paddedFrame,
243
+ { sampleRate, bufferSize: FRAME_SIZE }
244
+ );
245
+
246
+ if (features) {
247
+ if (typeof features.spectralCentroid === "number") centroids.push(features.spectralCentroid);
248
+ if (typeof features.spectralRolloff === "number") rolloffs.push(features.spectralRolloff);
249
+ if (typeof features.spectralFlatness === "number") flatnesses.push(features.spectralFlatness);
250
+ if (typeof features.spectralSpread === "number") spreads.push(features.spectralSpread);
251
+ }
252
+ }
253
+
254
+ const m = (arr: number[]) => arr.length > 0 ? arr.reduce((a, b) => a + b, 0) / arr.length : 0;
255
+ const v = (arr: number[]) => {
256
+ if (arr.length < 2) return 0;
257
+ const mu = m(arr);
258
+ return arr.reduce((sum, x) => sum + (x - mu) * (x - mu), 0) / (arr.length - 1);
259
+ };
260
+
261
+ return [
262
+ m(centroids), v(centroids),
263
+ m(rolloffs), v(rolloffs),
264
+ m(flatnesses), v(flatnesses),
265
+ m(spreads), v(spreads),
266
+ ];
267
+ }
268
+
269
+ /**
270
+ * Compute derivative (frame-to-frame differences) of a time series.
271
+ */
272
+ function derivative(values: number[]): number[] {
273
+ const d: number[] = [];
274
+ for (let i = 1; i < values.length; i++) {
275
+ d.push(values[i]! - values[i - 1]!);
276
+ }
277
+ return d;
278
+ }
279
+
280
+ /**
281
+ * Extract speaker-dependent audio features.
282
+ *
283
+ * Captures physiological vocal characteristics (F0, jitter, shimmer, HNR, formant
284
+ * ratios) that are stable across different utterances from the same speaker.
285
+ * Content-independent by design — different phrases produce similar feature values.
286
+ *
287
+ * Returns 44 values.
288
+ */
289
+ export async function extractSpeakerFeatures(audio: AudioCapture): Promise<number[]> {
290
+ const { samples, sampleRate } = audio;
291
+
292
+ const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
293
+ if (numFrames < 5) {
294
+ console.warn(`[IAM SDK] Too few audio frames (${numFrames}). Speaker features will be zeros.`);
295
+ return new Array(SPEAKER_FEATURE_COUNT).fill(0);
296
+ }
297
+
298
+ // 1. F0 detection + amplitude contour
299
+ const { f0, amplitudes, periods } = await detectF0Contour(samples, sampleRate);
300
+
301
+ const voicedF0 = f0.filter((v) => v > 0);
302
+ const voicedRatio = voicedF0.length / f0.length;
303
+
304
+ // 2. F0 statistics (5 values)
305
+ const f0Stats = condense(voicedF0);
306
+ const f0Entropy = entropy(voicedF0);
307
+ const f0Features = [f0Stats.mean, f0Stats.variance, f0Stats.skewness, f0Stats.kurtosis, f0Entropy];
308
+
309
+ // 3. F0 delta statistics (4 values)
310
+ const f0Delta = derivative(voicedF0);
311
+ const f0DeltaStats = condense(f0Delta);
312
+ const f0DeltaFeatures = [f0DeltaStats.mean, f0DeltaStats.variance, f0DeltaStats.skewness, f0DeltaStats.kurtosis];
313
+
314
+ // 4. Jitter (4 values)
315
+ const jitterFeatures = computeJitter(periods);
316
+
317
+ // 5. Shimmer (4 values)
318
+ const shimmerFeatures = computeShimmer(amplitudes, f0);
319
+
320
+ // 6. HNR statistics (5 values)
321
+ const hnrValues = computeHNR(samples, sampleRate, f0);
322
+ const hnrStats = condense(hnrValues);
323
+ const hnrEntropy = entropy(hnrValues);
324
+ const hnrFeatures = [hnrStats.mean, hnrStats.variance, hnrStats.skewness, hnrStats.kurtosis, hnrEntropy];
325
+
326
+ // 7. Formant ratios (8 values)
327
+ const { f1f2, f2f3 } = extractFormantRatios(samples, sampleRate, FRAME_SIZE, HOP_SIZE);
328
+ const f1f2Stats = condense(f1f2);
329
+ const f2f3Stats = condense(f2f3);
330
+ const formantFeatures = [
331
+ f1f2Stats.mean, f1f2Stats.variance, f1f2Stats.skewness, f1f2Stats.kurtosis,
332
+ f2f3Stats.mean, f2f3Stats.variance, f2f3Stats.skewness, f2f3Stats.kurtosis,
333
+ ];
334
+
335
+ // 8. LTAS (8 values)
336
+ const ltasFeatures = await computeLTAS(samples, sampleRate);
337
+
338
+ // 9. Voicing ratio (1 value)
339
+ const voicingFeatures = [voicedRatio];
340
+
341
+ // 10. Amplitude statistics (5 values)
342
+ const ampStats = condense(amplitudes);
343
+ const ampEntropy = entropy(amplitudes);
344
+ const ampFeatures = [ampStats.mean, ampStats.variance, ampStats.skewness, ampStats.kurtosis, ampEntropy];
345
+
346
+ const features = [
347
+ ...f0Features, // 5
348
+ ...f0DeltaFeatures, // 4
349
+ ...jitterFeatures, // 4
350
+ ...shimmerFeatures, // 4
351
+ ...hnrFeatures, // 5
352
+ ...formantFeatures, // 8
353
+ ...ltasFeatures, // 8
354
+ ...voicingFeatures, // 1
355
+ ...ampFeatures, // 5
356
+ ]; // = 44
357
+
358
+ return features;
359
+ }
360
+
361
+ export { SPEAKER_FEATURE_COUNT };
@@ -52,7 +52,7 @@ function getHyperplanes(dimension: number): number[][] {
52
52
  * Uses deterministic random hyperplanes seeded from the protocol constant.
53
53
  * Similar feature vectors produce fingerprints with low Hamming distance.
54
54
  */
55
- const EXPECTED_FEATURE_DIMENSION = 259; // 169 audio + 54 motion + 36 touch
55
+ const EXPECTED_FEATURE_DIMENSION = 134; // 44 speaker + 54 motion/mouse + 36 touch
56
56
 
57
57
  export function simhash(features: number[]): TemporalFingerprint {
58
58
  if (features.length === 0) {
package/src/index.ts CHANGED
@@ -19,6 +19,8 @@ export {
19
19
  // Feature extraction
20
20
  export type { StatsSummary, FeatureVector, FusedFeatureVector } from "./extraction/types";
21
21
  export { mean, variance, skewness, kurtosis, condense, entropy, autocorrelation, fuseFeatures } from "./extraction/statistics";
22
+ export { extractSpeakerFeatures, SPEAKER_FEATURE_COUNT } from "./extraction/speaker";
23
+ export { extractMotionFeatures, extractTouchFeatures, extractMouseDynamics } from "./extraction/kinematic";
22
24
 
23
25
  // Proof generation
24
26
  export type { SolanaProof, CircuitInput, ProofResult } from "./proof/types";
package/src/pulse.ts CHANGED
@@ -9,13 +9,14 @@ import type { StoredVerificationData } from "./identity/types";
9
9
  import { captureAudio } from "./sensor/audio";
10
10
  import { captureMotion } from "./sensor/motion";
11
11
  import { captureTouch } from "./sensor/touch";
12
- import { extractMFCC } from "./extraction/mfcc";
12
+ import { extractSpeakerFeatures, SPEAKER_FEATURE_COUNT } from "./extraction/speaker";
13
13
  import {
14
14
  extractMotionFeatures,
15
15
  extractTouchFeatures,
16
+ extractMouseDynamics,
16
17
  } from "./extraction/kinematic";
17
18
  import { fuseFeatures } from "./extraction/statistics";
18
- import { simhash } from "./hashing/simhash";
19
+ import { simhash, hammingDistance } from "./hashing/simhash";
19
20
  import { generateTBH, bigintToBytes32 } from "./hashing/poseidon";
20
21
  import { prepareCircuitInput, generateProof } from "./proof/prover";
21
22
  import { serializeProof } from "./proof/serializer";
@@ -34,9 +35,14 @@ type ResolvedConfig = Required<Pick<PulseConfig, "cluster" | "threshold">> &
34
35
  */
35
36
  async function extractFeatures(data: SensorData): Promise<number[]> {
36
37
  const audioFeatures = data.audio
37
- ? await extractMFCC(data.audio)
38
- : new Array(169).fill(0);
39
- const motionFeatures = extractMotionFeatures(data.motion);
38
+ ? await extractSpeakerFeatures(data.audio)
39
+ : new Array(SPEAKER_FEATURE_COUNT).fill(0);
40
+
41
+ const hasMotion = data.motion.length >= MIN_MOTION_SAMPLES;
42
+ const motionFeatures = hasMotion
43
+ ? extractMotionFeatures(data.motion)
44
+ : extractMouseDynamics(data.touch);
45
+
40
46
  const touchFeatures = extractTouchFeatures(data.touch);
41
47
  return fuseFeatures(audioFeatures, motionFeatures, touchFeatures);
42
48
  }
@@ -107,6 +113,11 @@ async function processSensorData(
107
113
  commitmentBytes: bigintToBytes32(BigInt(previousData.commitment)),
108
114
  };
109
115
 
116
+ const distance = hammingDistance(fingerprint, previousData.fingerprint);
117
+ console.log(
118
+ `[IAM SDK] Re-verification: Hamming distance = ${distance} / 256 bits (threshold = ${config.threshold})`
119
+ );
120
+
110
121
  const circuitInput = prepareCircuitInput(
111
122
  tbh,
112
123
  previousTBH,
@@ -29,8 +29,8 @@ describe.skipIf(!circuitArtifactsExist)(
29
29
  "integration: full crypto pipeline",
30
30
  () => {
31
31
  it("generates a valid proof from mock features end-to-end", async () => {
32
- // 1. Create mock feature vector (~259 random values: 169 audio + 54 motion + 36 touch)
33
- const features = Array.from({ length: 259 }, (_, i) =>
32
+ // 1. Create mock feature vector (~134 random values: 44 speaker + 54 motion/mouse + 36 touch)
33
+ const features = Array.from({ length: 134 }, (_, i) =>
34
34
  Math.sin(i * 0.3) * Math.cos(i * 0.7)
35
35
  );
36
36
 
@@ -1,113 +0,0 @@
1
- import type { AudioCapture } from "../sensor/types";
2
- import { condense, entropy } from "./statistics";
3
-
4
- // Frame parameters matching the research paper spec
5
- const FRAME_SIZE = 512; // ~32ms at 16kHz (must be power of 2 for Meyda FFT)
6
- const HOP_SIZE = 160; // 10ms hop
7
- const NUM_MFCC = 13;
8
-
9
- // Dynamic import cache for Meyda (works in both browser and Node.js)
10
- let meydaModule: any = null;
11
-
12
- async function getMeyda(): Promise<any> {
13
- if (!meydaModule) {
14
- try {
15
- meydaModule = await import("meyda");
16
- } catch {
17
- return null;
18
- }
19
- }
20
- return meydaModule.default ?? meydaModule;
21
- }
22
-
23
- /**
24
- * Extract MFCC features from audio data.
25
- * Computes 13 MFCCs per frame, plus delta and delta-delta coefficients,
26
- * then condenses each coefficient's time series into 4 statistics.
27
- *
28
- * Returns: 13 coefficients × 3 (raw + delta + delta-delta) × 4 stats + 13 entropy values = 169 values
29
- */
30
- export async function extractMFCC(audio: AudioCapture): Promise<number[]> {
31
- const { samples, sampleRate } = audio;
32
-
33
- const Meyda = await getMeyda();
34
- if (!Meyda) {
35
- // Meyda genuinely unavailable — this is a real problem, not a silent fallback
36
- console.warn("[IAM SDK] Meyda library failed to load. Audio features will be zeros.");
37
- return new Array(NUM_MFCC * 3 * 4 + NUM_MFCC).fill(0);
38
- }
39
-
40
- // Extract MFCCs per frame
41
- const numFrames = Math.floor((samples.length - FRAME_SIZE) / HOP_SIZE) + 1;
42
- if (numFrames < 3) {
43
- console.warn(`[IAM SDK] Too few audio frames (${numFrames}). Need at least 3.`);
44
- return new Array(NUM_MFCC * 3 * 4 + NUM_MFCC).fill(0);
45
- }
46
-
47
- const mfccFrames: number[][] = [];
48
-
49
- for (let i = 0; i < numFrames; i++) {
50
- const start = i * HOP_SIZE;
51
- const frame = samples.slice(start, start + FRAME_SIZE);
52
-
53
- // Pad if frame is shorter than expected
54
- const paddedFrame = new Float32Array(FRAME_SIZE);
55
- paddedFrame.set(frame);
56
-
57
- const features = Meyda.extract(["mfcc"], paddedFrame, {
58
- sampleRate,
59
- bufferSize: FRAME_SIZE,
60
- numberOfMFCCCoefficients: NUM_MFCC,
61
- });
62
-
63
- if (features?.mfcc) {
64
- mfccFrames.push(features.mfcc);
65
- }
66
- }
67
-
68
- if (mfccFrames.length < 3) return new Array(NUM_MFCC * 3 * 4 + NUM_MFCC).fill(0);
69
-
70
- // Compute delta (1st derivative) and delta-delta (2nd derivative)
71
- const deltaFrames = computeDeltas(mfccFrames);
72
- const deltaDeltaFrames = computeDeltas(deltaFrames);
73
-
74
- // Condense each coefficient across all frames into 4 statistics
75
- const features: number[] = [];
76
-
77
- for (let c = 0; c < NUM_MFCC; c++) {
78
- const raw = mfccFrames.map((f) => f[c] ?? 0);
79
- const stats = condense(raw);
80
- features.push(stats.mean, stats.variance, stats.skewness, stats.kurtosis);
81
- }
82
-
83
- for (let c = 0; c < NUM_MFCC; c++) {
84
- const delta = deltaFrames.map((f) => f[c] ?? 0);
85
- const stats = condense(delta);
86
- features.push(stats.mean, stats.variance, stats.skewness, stats.kurtosis);
87
- }
88
-
89
- for (let c = 0; c < NUM_MFCC; c++) {
90
- const dd = deltaDeltaFrames.map((f) => f[c] ?? 0);
91
- const stats = condense(dd);
92
- features.push(stats.mean, stats.variance, stats.skewness, stats.kurtosis);
93
- }
94
-
95
- // Entropy per MFCC coefficient
96
- for (let c = 0; c < NUM_MFCC; c++) {
97
- const raw = mfccFrames.map((f) => f[c] ?? 0);
98
- features.push(entropy(raw));
99
- }
100
-
101
- return features;
102
- }
103
-
104
- /** Compute delta coefficients (frame-to-frame differences) */
105
- function computeDeltas(frames: number[][]): number[][] {
106
- const deltas: number[][] = [];
107
- for (let i = 1; i < frames.length; i++) {
108
- const prev = frames[i - 1]!;
109
- const curr = frames[i]!;
110
- deltas.push(curr.map((v, j) => v - (prev[j] ?? 0)));
111
- }
112
- return deltas;
113
- }