@entros/pulse-sdk 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -47,6 +47,25 @@ function sdkWarn(...args) {
47
47
 
48
48
  // src/sensor/audio.ts
49
49
  var TARGET_SAMPLE_RATE = 16e3;
50
+ var TARGET_CAPTURE_RMS = 0.05;
51
+ var MIN_RMS_FOR_NORMALIZATION = 1e-4;
52
+ var MAX_NORMALIZATION_GAIN = 50;
53
+ function normalizeCaptureRMS(samples) {
54
+ if (samples.length === 0) return samples;
55
+ let sumSq = 0;
56
+ for (let i = 0; i < samples.length; i++) {
57
+ const s = samples[i];
58
+ sumSq += s * s;
59
+ }
60
+ const rms = Math.sqrt(sumSq / samples.length);
61
+ if (rms < MIN_RMS_FOR_NORMALIZATION) return samples;
62
+ const gain = Math.min(TARGET_CAPTURE_RMS / rms, MAX_NORMALIZATION_GAIN);
63
+ const out = new Float32Array(samples.length);
64
+ for (let i = 0; i < samples.length; i++) {
65
+ out[i] = Math.max(-1, Math.min(1, samples[i] * gain));
66
+ }
67
+ return out;
68
+ }
50
69
  async function captureAudio(options = {}) {
51
70
  const {
52
71
  signal,
@@ -134,8 +153,9 @@ async function captureAudio(options = {}) {
134
153
  samples.set(chunk, offset);
135
154
  offset += chunk.length;
136
155
  }
156
+ const normalized = normalizeCaptureRMS(samples);
137
157
  resolve({
138
- samples,
158
+ samples: normalized,
139
159
  sampleRate: capturedSampleRate,
140
160
  duration: totalLength / capturedSampleRate
141
161
  });
@@ -310,6 +330,9 @@ function variance(values, mu) {
310
330
  for (const v of values) sum += (v - m) ** 2;
311
331
  return sum / (values.length - 1);
312
332
  }
333
+ var SKEWNESS_BOUND = 20;
334
+ var KURTOSIS_LOWER = 0;
335
+ var KURTOSIS_UPPER = 50;
313
336
  function skewness(values) {
314
337
  if (values.length < 3) return 0;
315
338
  const n = values.length;
@@ -318,7 +341,8 @@ function skewness(values) {
318
341
  if (s === 0) return 0;
319
342
  let sum = 0;
320
343
  for (const v of values) sum += ((v - m) / s) ** 3;
321
- return n / ((n - 1) * (n - 2)) * sum;
344
+ const raw = n / ((n - 1) * (n - 2)) * sum;
345
+ return Math.max(-SKEWNESS_BOUND, Math.min(SKEWNESS_BOUND, raw));
322
346
  }
323
347
  function kurtosis(values) {
324
348
  if (values.length < 4) return 0;
@@ -329,7 +353,7 @@ function kurtosis(values) {
329
353
  let sum = 0;
330
354
  for (const v of values) sum += (v - m) ** 4 / s2 ** 2;
331
355
  const k = n * (n + 1) / ((n - 1) * (n - 2) * (n - 3)) * sum - 3 * (n - 1) ** 2 / ((n - 2) * (n - 3));
332
- return k;
356
+ return Math.max(KURTOSIS_LOWER, Math.min(KURTOSIS_UPPER, k));
333
357
  }
334
358
  function condense(values) {
335
359
  const m = mean(values);
@@ -590,9 +614,20 @@ function extractLpcAnalysis(samples, sampleRate, frameSize, hopSize, lpcOrder =
590
614
 
591
615
  // src/extraction/mfcc.ts
592
616
  var NUM_MFCC_COEFFICIENTS = 13;
617
+ var MFCC_DROP_LEADING = 1;
618
+ var NUM_USED_MFCC = NUM_MFCC_COEFFICIENTS - MFCC_DROP_LEADING;
593
619
  var DELTA_REGRESSION_HALF_WIDTH = 2;
594
- var MFCC_FEATURE_COUNT = NUM_MFCC_COEFFICIENTS * 4 + // mean, var, skew, kurt per coefficient
595
- NUM_MFCC_COEFFICIENTS * 2;
620
+ var MFCC_FEATURE_COUNT = NUM_USED_MFCC * 4 + // mean, var, skew, kurt per coefficient
621
+ NUM_USED_MFCC * 2;
622
+ function applyPreEmphasis(samples) {
623
+ const out = new Float32Array(samples.length);
624
+ if (samples.length === 0) return out;
625
+ out[0] = samples[0];
626
+ for (let i = 1; i < samples.length; i++) {
627
+ out[i] = samples[i] - 0.97 * samples[i - 1];
628
+ }
629
+ return out;
630
+ }
596
631
  function computeDelta(series, halfWidth) {
597
632
  const n = series.length;
598
633
  const out = new Array(n);
@@ -642,15 +677,16 @@ async function extractMfccFeatures(samples, sampleRate, frameSize, hopSize) {
642
677
  return new Array(MFCC_FEATURE_COUNT).fill(0);
643
678
  }
644
679
  const mfccTracks = Array.from(
645
- { length: NUM_MFCC_COEFFICIENTS },
680
+ { length: NUM_USED_MFCC },
646
681
  () => []
647
682
  );
648
683
  const frame = new Float32Array(frameSize);
649
684
  Meyda.bufferSize = frameSize;
650
685
  Meyda.sampleRate = sampleRate;
686
+ const emphasized = applyPreEmphasis(samples);
651
687
  for (let i = 0; i < numFrames; i++) {
652
688
  const start = i * hopSize;
653
- frame.set(samples.subarray(start, start + frameSize), 0);
689
+ frame.set(emphasized.subarray(start, start + frameSize), 0);
654
690
  const result = Meyda.extract("mfcc", frame);
655
691
  if (!Array.isArray(result) || result.length !== NUM_MFCC_COEFFICIENTS) {
656
692
  continue;
@@ -663,21 +699,21 @@ async function extractMfccFeatures(samples, sampleRate, frameSize, hopSize) {
663
699
  }
664
700
  }
665
701
  if (!allFinite) continue;
666
- for (let c = 0; c < NUM_MFCC_COEFFICIENTS; c++) {
667
- mfccTracks[c].push(result[c]);
702
+ for (let c = 0; c < NUM_USED_MFCC; c++) {
703
+ mfccTracks[c].push(result[c + MFCC_DROP_LEADING]);
668
704
  }
669
705
  }
670
706
  const out = [];
671
707
  out.length = MFCC_FEATURE_COUNT;
672
708
  let writeIdx = 0;
673
- for (let c = 0; c < NUM_MFCC_COEFFICIENTS; c++) {
709
+ for (let c = 0; c < NUM_USED_MFCC; c++) {
674
710
  const stats = condense(mfccTracks[c]);
675
711
  out[writeIdx++] = stats.mean;
676
712
  out[writeIdx++] = stats.variance;
677
713
  out[writeIdx++] = stats.skewness;
678
714
  out[writeIdx++] = stats.kurtosis;
679
715
  }
680
- for (let c = 0; c < NUM_MFCC_COEFFICIENTS; c++) {
716
+ for (let c = 0; c < NUM_USED_MFCC; c++) {
681
717
  const delta = computeDelta(mfccTracks[c], DELTA_REGRESSION_HALF_WIDTH);
682
718
  const muDelta = mean(delta);
683
719
  out[writeIdx++] = muDelta;
@@ -1196,9 +1232,9 @@ async function extractSpeakerFeaturesDetailed(audio) {
1196
1232
  for (let i = 0; i < numFrames; i++) {
1197
1233
  const start = i * hopSize;
1198
1234
  let sum = 0;
1199
- const end = Math.min(start + frameSize, samples.length);
1235
+ const end = Math.min(start + frameSize, normalizedSamples.length);
1200
1236
  for (let j = start; j < end; j++) {
1201
- sum += (samples[j] ?? 0) * (samples[j] ?? 0);
1237
+ sum += (normalizedSamples[j] ?? 0) * (normalizedSamples[j] ?? 0);
1202
1238
  }
1203
1239
  amplitudes.push(Math.sqrt(sum / (end - start)));
1204
1240
  }
@@ -1307,15 +1343,15 @@ async function extractSpeakerFeaturesDetailed(audio) {
1307
1343
  ...ampFeatures,
1308
1344
  // 5 [39..44] AMPLITUDE
1309
1345
  ...mfccFeatures,
1310
- // 78 [44..122] MFCC + delta-MFCC
1346
+ // 72 [44..116] MFCC + delta-MFCC (MFCC[0] dropped)
1311
1347
  ...lpcStats,
1312
- // 24 [122..146] LPC coefficient stats
1348
+ // 24 [116..140] LPC coefficient stats
1313
1349
  ...formantTrajectoryFeatures,
1314
- // 16 [146..162] Formant absolutes + dynamics + bandwidths
1350
+ // 16 [140..156] Formant absolutes + dynamics + bandwidths
1315
1351
  ...voiceQualityFeatures,
1316
- // 9 [162..171] Voice quality
1352
+ // 9 [156..165] Voice quality
1317
1353
  ...pitchShapeFeatures
1318
- // 5 [171..176] Pitch contour shape DCT
1354
+ // 5 [165..170] Pitch contour shape DCT
1319
1355
  ];
1320
1356
  return { features, f0Contour: f0 };
1321
1357
  }
@@ -1841,8 +1877,61 @@ function extractMouseDynamics(samples) {
1841
1877
  angleAutoCorr[2] ?? 0,
1842
1878
  normalizedPathLength
1843
1879
  ];
1844
- const padding = MOUSE_DYNAMICS_FEATURE_COUNT - legacyMouseDynamics.length;
1845
- return padding > 0 ? [...legacyMouseDynamics, ...new Array(padding).fill(0)] : legacyMouseDynamics;
1880
+ const v2 = computeMouseV2(samples, vx, vy, accX, accY, speed, acc, jerk, directions);
1881
+ return [...legacyMouseDynamics, ...v2];
1882
+ }
1883
+ function computeMouseV2(samples, vx, vy, accX, accY, speed, acc, jerk, directions) {
1884
+ const out = [];
1885
+ const covPairs = [
1886
+ [vx, vy],
1887
+ [vx, accX],
1888
+ [vx, accY],
1889
+ [vy, accX],
1890
+ [vy, accY],
1891
+ [accX, accY]
1892
+ ];
1893
+ for (const [a, b] of covPairs) out.push(covariance(a, b));
1894
+ const sampleRate = sampleRateFromTimestamps(samples.map((s) => s.timestamp));
1895
+ const fftSize = nextPow2(Math.max(64, speed.length));
1896
+ const bands = [
1897
+ [0, 2],
1898
+ [2, 6],
1899
+ [6, 12],
1900
+ [12, 30]
1901
+ ];
1902
+ const speedSpectrum = realFFT(meanCenter(speed), fftSize);
1903
+ const accSpectrum = realFFT(meanCenter(acc), fftSize);
1904
+ const jerkSpectrum = realFFT(meanCenter(jerk), fftSize);
1905
+ for (const spectrum of [speedSpectrum, accSpectrum, jerkSpectrum]) {
1906
+ for (const [lo, hi] of bands) {
1907
+ out.push(bandEnergy(spectrum.real, spectrum.imag, sampleRate, lo, hi));
1908
+ }
1909
+ }
1910
+ const tremor = peakInBand(
1911
+ speedSpectrum.real,
1912
+ speedSpectrum.imag,
1913
+ sampleRate,
1914
+ 4,
1915
+ 12
1916
+ );
1917
+ out.push(tremor.freq, tremor.amplitude);
1918
+ const duration = captureDurationSec(samples);
1919
+ const reversalRates = [vx, vy, speed].map(
1920
+ (channel) => duration > 0 ? signChangeCount(derivative2(channel)) / duration : 0
1921
+ );
1922
+ out.push(mean(reversalRates), variance(reversalRates));
1923
+ let dirAccum = 0;
1924
+ for (let i = 1; i < directions.length; i++) {
1925
+ let diff = directions[i] - directions[i - 1];
1926
+ while (diff > Math.PI) diff -= 2 * Math.PI;
1927
+ while (diff < -Math.PI) diff += 2 * Math.PI;
1928
+ dirAccum += Math.abs(diff);
1929
+ }
1930
+ out.push(directions.length > 1 ? dirAccum / (directions.length - 1) : 0);
1931
+ for (const lag of [1, 5, 10, 25]) {
1932
+ out.push(autocorrelation(speed, lag));
1933
+ }
1934
+ return out.map((v) => Number.isFinite(v) ? v : 0);
1846
1935
  }
1847
1936
 
1848
1937
  // src/hashing/simhash.ts