@entros/pulse-sdk 1.5.3 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.d.mts +34 -8
- package/dist/index.d.ts +34 -8
- package/dist/index.js +882 -50
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +882 -50
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -47,6 +47,25 @@ function sdkWarn(...args) {
|
|
|
47
47
|
|
|
48
48
|
// src/sensor/audio.ts
|
|
49
49
|
var TARGET_SAMPLE_RATE = 16e3;
|
|
50
|
+
var TARGET_CAPTURE_RMS = 0.05;
|
|
51
|
+
var MIN_RMS_FOR_NORMALIZATION = 1e-4;
|
|
52
|
+
var MAX_NORMALIZATION_GAIN = 50;
|
|
53
|
+
function normalizeCaptureRMS(samples) {
|
|
54
|
+
if (samples.length === 0) return samples;
|
|
55
|
+
let sumSq = 0;
|
|
56
|
+
for (let i = 0; i < samples.length; i++) {
|
|
57
|
+
const s = samples[i];
|
|
58
|
+
sumSq += s * s;
|
|
59
|
+
}
|
|
60
|
+
const rms = Math.sqrt(sumSq / samples.length);
|
|
61
|
+
if (rms < MIN_RMS_FOR_NORMALIZATION) return samples;
|
|
62
|
+
const gain = Math.min(TARGET_CAPTURE_RMS / rms, MAX_NORMALIZATION_GAIN);
|
|
63
|
+
const out = new Float32Array(samples.length);
|
|
64
|
+
for (let i = 0; i < samples.length; i++) {
|
|
65
|
+
out[i] = Math.max(-1, Math.min(1, samples[i] * gain));
|
|
66
|
+
}
|
|
67
|
+
return out;
|
|
68
|
+
}
|
|
50
69
|
async function captureAudio(options = {}) {
|
|
51
70
|
const {
|
|
52
71
|
signal,
|
|
@@ -134,8 +153,9 @@ async function captureAudio(options = {}) {
|
|
|
134
153
|
samples.set(chunk, offset);
|
|
135
154
|
offset += chunk.length;
|
|
136
155
|
}
|
|
156
|
+
const normalized = normalizeCaptureRMS(samples);
|
|
137
157
|
resolve({
|
|
138
|
-
samples,
|
|
158
|
+
samples: normalized,
|
|
139
159
|
sampleRate: capturedSampleRate,
|
|
140
160
|
duration: totalLength / capturedSampleRate
|
|
141
161
|
});
|
|
@@ -310,6 +330,9 @@ function variance(values, mu) {
|
|
|
310
330
|
for (const v of values) sum += (v - m) ** 2;
|
|
311
331
|
return sum / (values.length - 1);
|
|
312
332
|
}
|
|
333
|
+
var SKEWNESS_BOUND = 20;
|
|
334
|
+
var KURTOSIS_LOWER = 0;
|
|
335
|
+
var KURTOSIS_UPPER = 50;
|
|
313
336
|
function skewness(values) {
|
|
314
337
|
if (values.length < 3) return 0;
|
|
315
338
|
const n = values.length;
|
|
@@ -318,7 +341,8 @@ function skewness(values) {
|
|
|
318
341
|
if (s === 0) return 0;
|
|
319
342
|
let sum = 0;
|
|
320
343
|
for (const v of values) sum += ((v - m) / s) ** 3;
|
|
321
|
-
|
|
344
|
+
const raw = n / ((n - 1) * (n - 2)) * sum;
|
|
345
|
+
return Math.max(-SKEWNESS_BOUND, Math.min(SKEWNESS_BOUND, raw));
|
|
322
346
|
}
|
|
323
347
|
function kurtosis(values) {
|
|
324
348
|
if (values.length < 4) return 0;
|
|
@@ -329,7 +353,7 @@ function kurtosis(values) {
|
|
|
329
353
|
let sum = 0;
|
|
330
354
|
for (const v of values) sum += (v - m) ** 4 / s2 ** 2;
|
|
331
355
|
const k = n * (n + 1) / ((n - 1) * (n - 2) * (n - 3)) * sum - 3 * (n - 1) ** 2 / ((n - 2) * (n - 3));
|
|
332
|
-
return k;
|
|
356
|
+
return Math.max(KURTOSIS_LOWER, Math.min(KURTOSIS_UPPER, k));
|
|
333
357
|
}
|
|
334
358
|
function condense(values) {
|
|
335
359
|
const m = mean(values);
|
|
@@ -491,44 +515,458 @@ function findRoots(coefficients, maxIterations = 50) {
|
|
|
491
515
|
}
|
|
492
516
|
return roots;
|
|
493
517
|
}
|
|
494
|
-
function
|
|
518
|
+
function extractFrameAnalysis(frame, sampleRate, lpcOrder = 12) {
|
|
495
519
|
const r = autocorrelate(frame, lpcOrder);
|
|
496
520
|
const coeffs = levinsonDurbin(r, lpcOrder);
|
|
497
521
|
const roots = findRoots(coeffs);
|
|
498
|
-
const
|
|
522
|
+
const candidates = [];
|
|
499
523
|
for (const [real, imag] of roots) {
|
|
500
524
|
if (imag <= 0) continue;
|
|
501
525
|
const freq = Math.atan2(imag, real) / (2 * Math.PI) * sampleRate;
|
|
502
526
|
const bandwidth = -sampleRate / (2 * Math.PI) * Math.log(Math.sqrt(real * real + imag * imag));
|
|
503
527
|
if (freq > 200 && freq < 5e3 && bandwidth < 500) {
|
|
504
|
-
|
|
528
|
+
candidates.push({ freq, bandwidth });
|
|
505
529
|
}
|
|
506
530
|
}
|
|
507
|
-
|
|
508
|
-
if (
|
|
509
|
-
|
|
531
|
+
candidates.sort((a, b) => a.freq - b.freq);
|
|
532
|
+
if (candidates.length < 3) {
|
|
533
|
+
return { lpcCoefficients: coeffs, formants: null, bandwidths: null };
|
|
534
|
+
}
|
|
535
|
+
const formants = [
|
|
536
|
+
candidates[0].freq,
|
|
537
|
+
candidates[1].freq,
|
|
538
|
+
candidates[2].freq
|
|
539
|
+
];
|
|
540
|
+
const bandwidths = [
|
|
541
|
+
candidates[0].bandwidth,
|
|
542
|
+
candidates[1].bandwidth,
|
|
543
|
+
candidates[2].bandwidth
|
|
544
|
+
];
|
|
545
|
+
return { lpcCoefficients: coeffs, formants, bandwidths };
|
|
510
546
|
}
|
|
511
|
-
function
|
|
547
|
+
function extractLpcAnalysis(samples, sampleRate, frameSize, hopSize, lpcOrder = 12) {
|
|
548
|
+
const lpcCoefficients = Array.from({ length: lpcOrder }, () => []);
|
|
549
|
+
const f1 = [];
|
|
550
|
+
const f2 = [];
|
|
551
|
+
const f3 = [];
|
|
552
|
+
const b1 = [];
|
|
553
|
+
const b2 = [];
|
|
554
|
+
const b3 = [];
|
|
512
555
|
const f1f2 = [];
|
|
513
556
|
const f2f3 = [];
|
|
514
557
|
const numFrames = Math.floor((samples.length - frameSize) / hopSize) + 1;
|
|
558
|
+
let numFramesAnalyzed = 0;
|
|
559
|
+
if (numFrames < 1) {
|
|
560
|
+
return {
|
|
561
|
+
lpcCoefficients,
|
|
562
|
+
f1,
|
|
563
|
+
f2,
|
|
564
|
+
f3,
|
|
565
|
+
b1,
|
|
566
|
+
b2,
|
|
567
|
+
b3,
|
|
568
|
+
f1f2,
|
|
569
|
+
f2f3,
|
|
570
|
+
numFramesAnalyzed: 0
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
const windowed = new Float32Array(frameSize);
|
|
515
574
|
for (let i = 0; i < numFrames; i++) {
|
|
516
575
|
const start = i * hopSize;
|
|
517
576
|
const frame = samples.subarray(start, start + frameSize);
|
|
518
|
-
const windowed = new Float32Array(frameSize);
|
|
519
577
|
for (let j = 0; j < frameSize; j++) {
|
|
520
578
|
windowed[j] = (frame[j] ?? 0) * (0.54 - 0.46 * Math.cos(2 * Math.PI * j / (frameSize - 1)));
|
|
521
579
|
}
|
|
522
|
-
const
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
if (
|
|
580
|
+
const analysis = extractFrameAnalysis(windowed, sampleRate, lpcOrder);
|
|
581
|
+
numFramesAnalyzed++;
|
|
582
|
+
for (let c = 0; c < lpcOrder; c++) {
|
|
583
|
+
const coeff = analysis.lpcCoefficients[c];
|
|
584
|
+
if (Number.isFinite(coeff)) {
|
|
585
|
+
lpcCoefficients[c].push(coeff);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
if (analysis.formants && analysis.bandwidths) {
|
|
589
|
+
const [F1, F2, F3] = analysis.formants;
|
|
590
|
+
const [B1, B2, B3] = analysis.bandwidths;
|
|
591
|
+
f1.push(F1);
|
|
592
|
+
f2.push(F2);
|
|
593
|
+
f3.push(F3);
|
|
594
|
+
b1.push(B1);
|
|
595
|
+
b2.push(B2);
|
|
596
|
+
b3.push(B3);
|
|
597
|
+
if (F2 > 0) f1f2.push(F1 / F2);
|
|
598
|
+
if (F3 > 0) f2f3.push(F2 / F3);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
return {
|
|
602
|
+
lpcCoefficients,
|
|
603
|
+
f1,
|
|
604
|
+
f2,
|
|
605
|
+
f3,
|
|
606
|
+
b1,
|
|
607
|
+
b2,
|
|
608
|
+
b3,
|
|
609
|
+
f1f2,
|
|
610
|
+
f2f3,
|
|
611
|
+
numFramesAnalyzed
|
|
612
|
+
};
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// src/extraction/mfcc.ts
|
|
616
|
+
var NUM_MFCC_COEFFICIENTS = 13;
|
|
617
|
+
var MFCC_DROP_LEADING = 1;
|
|
618
|
+
var NUM_USED_MFCC = NUM_MFCC_COEFFICIENTS - MFCC_DROP_LEADING;
|
|
619
|
+
var DELTA_REGRESSION_HALF_WIDTH = 2;
|
|
620
|
+
var MFCC_FEATURE_COUNT = NUM_USED_MFCC * 4 + // mean, var, skew, kurt per coefficient
|
|
621
|
+
NUM_USED_MFCC * 2;
|
|
622
|
+
function applyPreEmphasis(samples) {
|
|
623
|
+
const out = new Float32Array(samples.length);
|
|
624
|
+
if (samples.length === 0) return out;
|
|
625
|
+
out[0] = samples[0];
|
|
626
|
+
for (let i = 1; i < samples.length; i++) {
|
|
627
|
+
out[i] = samples[i] - 0.97 * samples[i - 1];
|
|
628
|
+
}
|
|
629
|
+
return out;
|
|
630
|
+
}
|
|
631
|
+
function computeDelta(series, halfWidth) {
|
|
632
|
+
const n = series.length;
|
|
633
|
+
const out = new Array(n);
|
|
634
|
+
const fullDenom = halfWidth * (halfWidth + 1) * (2 * halfWidth + 1) / 3;
|
|
635
|
+
for (let t = 0; t < n; t++) {
|
|
636
|
+
let num = 0;
|
|
637
|
+
let denom = fullDenom;
|
|
638
|
+
for (let k = 1; k <= halfWidth; k++) {
|
|
639
|
+
const tPlus = t + k;
|
|
640
|
+
const tMinus = t - k;
|
|
641
|
+
if (tPlus >= n || tMinus < 0) {
|
|
642
|
+
denom -= 2 * k * k;
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
num += k * (series[tPlus] - series[tMinus]);
|
|
646
|
+
}
|
|
647
|
+
if (denom <= 0) {
|
|
648
|
+
out[t] = 0;
|
|
649
|
+
continue;
|
|
650
|
+
}
|
|
651
|
+
out[t] = num / denom;
|
|
652
|
+
}
|
|
653
|
+
return out;
|
|
654
|
+
}
|
|
655
|
+
var meydaModule = null;
|
|
656
|
+
async function getMeyda() {
|
|
657
|
+
if (!meydaModule) {
|
|
658
|
+
try {
|
|
659
|
+
meydaModule = await import("meyda");
|
|
660
|
+
} catch {
|
|
661
|
+
return null;
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
return meydaModule.default ?? meydaModule;
|
|
665
|
+
}
|
|
666
|
+
async function extractMfccFeatures(samples, sampleRate, frameSize, hopSize) {
|
|
667
|
+
if (!Number.isFinite(sampleRate) || sampleRate <= 0 || samples.length === 0 || frameSize <= 0 || hopSize <= 0) {
|
|
668
|
+
return new Array(MFCC_FEATURE_COUNT).fill(0);
|
|
669
|
+
}
|
|
670
|
+
const Meyda = await getMeyda();
|
|
671
|
+
if (!Meyda) {
|
|
672
|
+
sdkWarn("[Entros SDK] Meyda unavailable; MFCC features will be zeros.");
|
|
673
|
+
return new Array(MFCC_FEATURE_COUNT).fill(0);
|
|
674
|
+
}
|
|
675
|
+
const numFrames = Math.floor((samples.length - frameSize) / hopSize) + 1;
|
|
676
|
+
if (numFrames < 5) {
|
|
677
|
+
return new Array(MFCC_FEATURE_COUNT).fill(0);
|
|
678
|
+
}
|
|
679
|
+
const mfccTracks = Array.from(
|
|
680
|
+
{ length: NUM_USED_MFCC },
|
|
681
|
+
() => []
|
|
682
|
+
);
|
|
683
|
+
const frame = new Float32Array(frameSize);
|
|
684
|
+
Meyda.bufferSize = frameSize;
|
|
685
|
+
Meyda.sampleRate = sampleRate;
|
|
686
|
+
const emphasized = applyPreEmphasis(samples);
|
|
687
|
+
for (let i = 0; i < numFrames; i++) {
|
|
688
|
+
const start = i * hopSize;
|
|
689
|
+
frame.set(emphasized.subarray(start, start + frameSize), 0);
|
|
690
|
+
const result = Meyda.extract("mfcc", frame);
|
|
691
|
+
if (!Array.isArray(result) || result.length !== NUM_MFCC_COEFFICIENTS) {
|
|
692
|
+
continue;
|
|
693
|
+
}
|
|
694
|
+
let allFinite = true;
|
|
695
|
+
for (let c = 0; c < NUM_MFCC_COEFFICIENTS; c++) {
|
|
696
|
+
if (!Number.isFinite(result[c])) {
|
|
697
|
+
allFinite = false;
|
|
698
|
+
break;
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
if (!allFinite) continue;
|
|
702
|
+
for (let c = 0; c < NUM_USED_MFCC; c++) {
|
|
703
|
+
mfccTracks[c].push(result[c + MFCC_DROP_LEADING]);
|
|
527
704
|
}
|
|
528
705
|
}
|
|
529
|
-
|
|
706
|
+
const out = [];
|
|
707
|
+
out.length = MFCC_FEATURE_COUNT;
|
|
708
|
+
let writeIdx = 0;
|
|
709
|
+
for (let c = 0; c < NUM_USED_MFCC; c++) {
|
|
710
|
+
const stats = condense(mfccTracks[c]);
|
|
711
|
+
out[writeIdx++] = stats.mean;
|
|
712
|
+
out[writeIdx++] = stats.variance;
|
|
713
|
+
out[writeIdx++] = stats.skewness;
|
|
714
|
+
out[writeIdx++] = stats.kurtosis;
|
|
715
|
+
}
|
|
716
|
+
for (let c = 0; c < NUM_USED_MFCC; c++) {
|
|
717
|
+
const delta = computeDelta(mfccTracks[c], DELTA_REGRESSION_HALF_WIDTH);
|
|
718
|
+
const muDelta = mean(delta);
|
|
719
|
+
out[writeIdx++] = muDelta;
|
|
720
|
+
out[writeIdx++] = variance(delta, muDelta);
|
|
721
|
+
}
|
|
722
|
+
return out;
|
|
530
723
|
}
|
|
531
724
|
|
|
725
|
+
// src/extraction/voice-quality.ts
|
|
726
|
+
var VOICE_QUALITY_FEATURE_COUNT = 9;
|
|
727
|
+
var LOW_BAND_HZ = 1e3;
|
|
728
|
+
var MID_BAND_HZ = 3e3;
|
|
729
|
+
var HIGH_BAND_HZ = 8e3;
|
|
730
|
+
function cppQuefrencyRange(sampleRate) {
|
|
731
|
+
return {
|
|
732
|
+
qMin: Math.max(2, Math.floor(sampleRate / 400)),
|
|
733
|
+
qMax: Math.floor(sampleRate / 60)
|
|
734
|
+
};
|
|
735
|
+
}
|
|
736
|
+
var meydaModule2 = null;
|
|
737
|
+
async function getMeyda2() {
|
|
738
|
+
if (!meydaModule2) {
|
|
739
|
+
try {
|
|
740
|
+
meydaModule2 = await import("meyda");
|
|
741
|
+
} catch {
|
|
742
|
+
return null;
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
return meydaModule2.default ?? meydaModule2;
|
|
746
|
+
}
|
|
747
|
+
function cepstralPeakProminence(powerSpectrum, sampleRate) {
|
|
748
|
+
const N = powerSpectrum.length;
|
|
749
|
+
if (N < 8) return 0;
|
|
750
|
+
const { qMin, qMax } = cppQuefrencyRange(sampleRate);
|
|
751
|
+
if (qMax >= N || qMax <= qMin) return 0;
|
|
752
|
+
const FLOOR = 1e-12;
|
|
753
|
+
const logPower = new Array(N);
|
|
754
|
+
for (let i = 0; i < N; i++) {
|
|
755
|
+
const p = Math.max(powerSpectrum[i], FLOOR);
|
|
756
|
+
const l = Math.log(p);
|
|
757
|
+
if (!Number.isFinite(l)) return 0;
|
|
758
|
+
logPower[i] = l;
|
|
759
|
+
}
|
|
760
|
+
const bandLen = qMax - qMin + 1;
|
|
761
|
+
const cepstrumBand = new Array(bandLen);
|
|
762
|
+
const piOverN = Math.PI / N;
|
|
763
|
+
for (let bIdx = 0; bIdx < bandLen; bIdx++) {
|
|
764
|
+
const k = qMin + bIdx;
|
|
765
|
+
let sum = 0;
|
|
766
|
+
for (let n = 0; n < N; n++) {
|
|
767
|
+
sum += logPower[n] * Math.cos(piOverN * (n + 0.5) * k);
|
|
768
|
+
}
|
|
769
|
+
cepstrumBand[bIdx] = sum;
|
|
770
|
+
}
|
|
771
|
+
let peakBIdx = 0;
|
|
772
|
+
let peakVal = cepstrumBand[0];
|
|
773
|
+
for (let bIdx = 1; bIdx < bandLen; bIdx++) {
|
|
774
|
+
if (cepstrumBand[bIdx] > peakVal) {
|
|
775
|
+
peakVal = cepstrumBand[bIdx];
|
|
776
|
+
peakBIdx = bIdx;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
const peakQuefrency = qMin + peakBIdx;
|
|
780
|
+
const M = bandLen;
|
|
781
|
+
let sx = 0;
|
|
782
|
+
let sy = 0;
|
|
783
|
+
let sxx = 0;
|
|
784
|
+
let sxy = 0;
|
|
785
|
+
for (let bIdx = 0; bIdx < bandLen; bIdx++) {
|
|
786
|
+
const x = qMin + bIdx;
|
|
787
|
+
const y = cepstrumBand[bIdx];
|
|
788
|
+
sx += x;
|
|
789
|
+
sy += y;
|
|
790
|
+
sxx += x * x;
|
|
791
|
+
sxy += x * y;
|
|
792
|
+
}
|
|
793
|
+
const denom = M * sxx - sx * sx;
|
|
794
|
+
if (Math.abs(denom) < 1e-12) return 0;
|
|
795
|
+
const slope = (M * sxy - sx * sy) / denom;
|
|
796
|
+
const intercept = (sy - slope * sx) / M;
|
|
797
|
+
const baselineAtPeak = intercept + slope * peakQuefrency;
|
|
798
|
+
return peakVal - baselineAtPeak;
|
|
799
|
+
}
|
|
800
|
+
function spectralTilt(powerSpectrum, sampleRate) {
|
|
801
|
+
const N = powerSpectrum.length;
|
|
802
|
+
if (N < 8) return 0;
|
|
803
|
+
const FLOOR = 1e-12;
|
|
804
|
+
let sx = 0;
|
|
805
|
+
let sy = 0;
|
|
806
|
+
let sxx = 0;
|
|
807
|
+
let sxy = 0;
|
|
808
|
+
let count = 0;
|
|
809
|
+
const minBin = Math.max(1, Math.floor(100 * 2 * (N - 1) / sampleRate));
|
|
810
|
+
for (let k = minBin; k < N; k++) {
|
|
811
|
+
const p = powerSpectrum[k];
|
|
812
|
+
if (p < FLOOR) continue;
|
|
813
|
+
const x = Math.log(k);
|
|
814
|
+
const y = Math.log(p);
|
|
815
|
+
if (!Number.isFinite(x) || !Number.isFinite(y)) continue;
|
|
816
|
+
sx += x;
|
|
817
|
+
sy += y;
|
|
818
|
+
sxx += x * x;
|
|
819
|
+
sxy += x * y;
|
|
820
|
+
count++;
|
|
821
|
+
}
|
|
822
|
+
if (count < 4) return 0;
|
|
823
|
+
const denom = count * sxx - sx * sx;
|
|
824
|
+
if (Math.abs(denom) < 1e-12) return 0;
|
|
825
|
+
return (count * sxy - sx * sy) / denom;
|
|
826
|
+
}
|
|
827
|
+
function h1MinusH2(powerSpectrum, sampleRate, f0) {
|
|
828
|
+
if (!Number.isFinite(f0) || f0 <= 0) return 0;
|
|
829
|
+
const N = powerSpectrum.length;
|
|
830
|
+
if (N < 8) return 0;
|
|
831
|
+
const binPerHz = 2 * (N - 1) / sampleRate;
|
|
832
|
+
const k1 = Math.round(f0 * binPerHz);
|
|
833
|
+
const k2 = Math.round(2 * f0 * binPerHz);
|
|
834
|
+
const window2 = 2;
|
|
835
|
+
function peakNear(k) {
|
|
836
|
+
let best = -Infinity;
|
|
837
|
+
for (let i = k - window2; i <= k + window2; i++) {
|
|
838
|
+
if (i <= 0 || i >= N) continue;
|
|
839
|
+
const p = powerSpectrum[i];
|
|
840
|
+
if (p > best) best = p;
|
|
841
|
+
}
|
|
842
|
+
return best;
|
|
843
|
+
}
|
|
844
|
+
const h1 = peakNear(k1);
|
|
845
|
+
const h2 = peakNear(k2);
|
|
846
|
+
if (!Number.isFinite(h1) || !Number.isFinite(h2) || h1 <= 0 || h2 <= 0) return 0;
|
|
847
|
+
return 10 * Math.log10(h1 / h2);
|
|
848
|
+
}
|
|
849
|
+
function subbandRatios(powerSpectrum, sampleRate) {
|
|
850
|
+
const N = powerSpectrum.length;
|
|
851
|
+
if (N < 4) return [0, 0, 0];
|
|
852
|
+
const binPerHz = 2 * (N - 1) / sampleRate;
|
|
853
|
+
const lowBin = Math.min(N - 1, Math.round(LOW_BAND_HZ * binPerHz));
|
|
854
|
+
const midBin = Math.min(N - 1, Math.round(MID_BAND_HZ * binPerHz));
|
|
855
|
+
const highBin = Math.min(N - 1, Math.round(HIGH_BAND_HZ * binPerHz));
|
|
856
|
+
let total = 0;
|
|
857
|
+
let low = 0;
|
|
858
|
+
let mid = 0;
|
|
859
|
+
let high = 0;
|
|
860
|
+
for (let k = 1; k < N; k++) {
|
|
861
|
+
const p = powerSpectrum[k];
|
|
862
|
+
if (!Number.isFinite(p) || p < 0) continue;
|
|
863
|
+
total += p;
|
|
864
|
+
if (k <= lowBin) low += p;
|
|
865
|
+
else if (k <= midBin) mid += p;
|
|
866
|
+
else if (k <= highBin) high += p;
|
|
867
|
+
}
|
|
868
|
+
if (total < 1e-12) return [0, 0, 0];
|
|
869
|
+
return [low / total, mid / total, high / total];
|
|
870
|
+
}
|
|
871
|
+
async function extractVoiceQualityFeatures(samples, sampleRate, frameSize, hopSize, f0PerFrame) {
|
|
872
|
+
if (!Number.isFinite(sampleRate) || sampleRate <= 0 || samples.length === 0 || frameSize <= 0 || hopSize <= 0) {
|
|
873
|
+
return new Array(VOICE_QUALITY_FEATURE_COUNT).fill(0);
|
|
874
|
+
}
|
|
875
|
+
const Meyda = await getMeyda2();
|
|
876
|
+
if (!Meyda) {
|
|
877
|
+
sdkWarn("[Entros SDK] Meyda unavailable; voice quality features will be zeros.");
|
|
878
|
+
return new Array(VOICE_QUALITY_FEATURE_COUNT).fill(0);
|
|
879
|
+
}
|
|
880
|
+
const numFrames = Math.floor((samples.length - frameSize) / hopSize) + 1;
|
|
881
|
+
if (numFrames < 5) {
|
|
882
|
+
return new Array(VOICE_QUALITY_FEATURE_COUNT).fill(0);
|
|
883
|
+
}
|
|
884
|
+
const cppValues = [];
|
|
885
|
+
const tiltValues = [];
|
|
886
|
+
const h1h2Values = [];
|
|
887
|
+
const lowRatios = [];
|
|
888
|
+
const midRatios = [];
|
|
889
|
+
const highRatios = [];
|
|
890
|
+
const frame = new Float32Array(frameSize);
|
|
891
|
+
Meyda.bufferSize = frameSize;
|
|
892
|
+
Meyda.sampleRate = sampleRate;
|
|
893
|
+
for (let i = 0; i < numFrames; i++) {
|
|
894
|
+
const start = i * hopSize;
|
|
895
|
+
frame.set(samples.subarray(start, start + frameSize), 0);
|
|
896
|
+
const features = Meyda.extract("powerSpectrum", frame);
|
|
897
|
+
const power = features;
|
|
898
|
+
if (!power || power.length === 0) continue;
|
|
899
|
+
const cpp = cepstralPeakProminence(power, sampleRate);
|
|
900
|
+
if (Number.isFinite(cpp)) cppValues.push(cpp);
|
|
901
|
+
const tilt = spectralTilt(power, sampleRate);
|
|
902
|
+
if (Number.isFinite(tilt)) tiltValues.push(tilt);
|
|
903
|
+
const f0 = f0PerFrame[i] ?? 0;
|
|
904
|
+
if (f0 > 0) {
|
|
905
|
+
const h1h2 = h1MinusH2(power, sampleRate, f0);
|
|
906
|
+
if (Number.isFinite(h1h2)) h1h2Values.push(h1h2);
|
|
907
|
+
}
|
|
908
|
+
const [low, mid, high] = subbandRatios(power, sampleRate);
|
|
909
|
+
lowRatios.push(low);
|
|
910
|
+
midRatios.push(mid);
|
|
911
|
+
highRatios.push(high);
|
|
912
|
+
}
|
|
913
|
+
const cppMean = mean(cppValues);
|
|
914
|
+
const cppVar = variance(cppValues, cppMean);
|
|
915
|
+
const tiltMean = mean(tiltValues);
|
|
916
|
+
const tiltVar = variance(tiltValues, tiltMean);
|
|
917
|
+
const h1h2Mean = mean(h1h2Values);
|
|
918
|
+
const h1h2Var = variance(h1h2Values, h1h2Mean);
|
|
919
|
+
const lowMean = mean(lowRatios);
|
|
920
|
+
const midMean = mean(midRatios);
|
|
921
|
+
const highMean = mean(highRatios);
|
|
922
|
+
return [
|
|
923
|
+
cppMean,
|
|
924
|
+
cppVar,
|
|
925
|
+
tiltMean,
|
|
926
|
+
tiltVar,
|
|
927
|
+
h1h2Mean,
|
|
928
|
+
h1h2Var,
|
|
929
|
+
lowMean,
|
|
930
|
+
midMean,
|
|
931
|
+
highMean
|
|
932
|
+
];
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
// src/extraction/dct.ts
|
|
936
|
+
function dctII(input, numCoefficients) {
|
|
937
|
+
const N = input.length;
|
|
938
|
+
const K = Math.max(0, numCoefficients);
|
|
939
|
+
const output = new Array(K).fill(0);
|
|
940
|
+
if (N === 0 || K === 0) return output;
|
|
941
|
+
const upper = Math.min(K, N);
|
|
942
|
+
const piOverN = Math.PI / N;
|
|
943
|
+
for (let k = 0; k < upper; k++) {
|
|
944
|
+
let sum = 0;
|
|
945
|
+
for (let n = 0; n < N; n++) {
|
|
946
|
+
sum += input[n] * Math.cos(piOverN * (n + 0.5) * k);
|
|
947
|
+
}
|
|
948
|
+
output[k] = sum;
|
|
949
|
+
}
|
|
950
|
+
return output;
|
|
951
|
+
}
|
|
952
|
+
function pitchContourShape(contour, numCoefficients = 5) {
|
|
953
|
+
if (numCoefficients <= 0) return [];
|
|
954
|
+
const zero = () => new Array(numCoefficients).fill(0);
|
|
955
|
+
const voiced = [];
|
|
956
|
+
for (const v of contour) {
|
|
957
|
+
if (Number.isFinite(v) && v > 0) voiced.push(v);
|
|
958
|
+
}
|
|
959
|
+
if (voiced.length < numCoefficients * 2) return zero();
|
|
960
|
+
let sum = 0;
|
|
961
|
+
for (const v of voiced) sum += v;
|
|
962
|
+
const mu = sum / voiced.length;
|
|
963
|
+
const centered = voiced.map((v) => v - mu);
|
|
964
|
+
const N = centered.length;
|
|
965
|
+
const norm = 1 / Math.sqrt(N);
|
|
966
|
+
return dctII(centered, numCoefficients).map((c) => c * norm);
|
|
967
|
+
}
|
|
968
|
+
var PITCH_CONTOUR_SHAPE_FEATURE_COUNT = 5;
|
|
969
|
+
|
|
532
970
|
// src/yield.ts
|
|
533
971
|
function yieldToMainThread() {
|
|
534
972
|
return new Promise((resolve) => {
|
|
@@ -560,10 +998,13 @@ function getFrameSize(sampleRate) {
|
|
|
560
998
|
function getHopSize(sampleRate) {
|
|
561
999
|
return Math.max(1, Math.round(sampleRate * 0.01));
|
|
562
1000
|
}
|
|
563
|
-
var
|
|
1001
|
+
var LEGACY_SPEAKER_FEATURE_COUNT = 44;
|
|
1002
|
+
var LPC_COEFFICIENT_STATS = 12 * 2;
|
|
1003
|
+
var FORMANT_TRAJECTORY_FEATURE_COUNT = 16;
|
|
1004
|
+
var SPEAKER_FEATURE_COUNT = LEGACY_SPEAKER_FEATURE_COUNT + MFCC_FEATURE_COUNT + LPC_COEFFICIENT_STATS + FORMANT_TRAJECTORY_FEATURE_COUNT + VOICE_QUALITY_FEATURE_COUNT + PITCH_CONTOUR_SHAPE_FEATURE_COUNT;
|
|
564
1005
|
var pitchDetector = null;
|
|
565
1006
|
var pitchDetectorRate = 0;
|
|
566
|
-
var
|
|
1007
|
+
var meydaModule3 = null;
|
|
567
1008
|
async function getPitchDetector(sampleRate) {
|
|
568
1009
|
if (!pitchDetector || pitchDetectorRate !== sampleRate) {
|
|
569
1010
|
const PitchFinder = await import("pitchfinder");
|
|
@@ -572,15 +1013,15 @@ async function getPitchDetector(sampleRate) {
|
|
|
572
1013
|
}
|
|
573
1014
|
return pitchDetector;
|
|
574
1015
|
}
|
|
575
|
-
async function
|
|
576
|
-
if (!
|
|
1016
|
+
async function getMeyda3() {
|
|
1017
|
+
if (!meydaModule3) {
|
|
577
1018
|
try {
|
|
578
|
-
|
|
1019
|
+
meydaModule3 = await import("meyda");
|
|
579
1020
|
} catch {
|
|
580
1021
|
return null;
|
|
581
1022
|
}
|
|
582
1023
|
}
|
|
583
|
-
return
|
|
1024
|
+
return meydaModule3.default ?? meydaModule3;
|
|
584
1025
|
}
|
|
585
1026
|
var F0_YIELD_EVERY_N_FRAMES = 16;
|
|
586
1027
|
async function detectF0Contour(samples, sampleRate) {
|
|
@@ -710,8 +1151,10 @@ function computeHNR(samples, sampleRate, f0Contour) {
|
|
|
710
1151
|
async function computeLTAS(samples, sampleRate) {
|
|
711
1152
|
const frameSize = getFrameSize(sampleRate);
|
|
712
1153
|
const hopSize = getHopSize(sampleRate);
|
|
713
|
-
const Meyda = await
|
|
1154
|
+
const Meyda = await getMeyda3();
|
|
714
1155
|
if (!Meyda) return new Array(8).fill(0);
|
|
1156
|
+
Meyda.bufferSize = frameSize;
|
|
1157
|
+
Meyda.sampleRate = sampleRate;
|
|
715
1158
|
const centroids = [];
|
|
716
1159
|
const rolloffs = [];
|
|
717
1160
|
const flatnesses = [];
|
|
@@ -723,8 +1166,7 @@ async function computeLTAS(samples, sampleRate) {
|
|
|
723
1166
|
paddedFrame.set(samples.subarray(start, start + frameSize), 0);
|
|
724
1167
|
const features = Meyda.extract(
|
|
725
1168
|
["spectralCentroid", "spectralRolloff", "spectralFlatness", "spectralSpread"],
|
|
726
|
-
paddedFrame
|
|
727
|
-
{ sampleRate, bufferSize: frameSize }
|
|
1169
|
+
paddedFrame
|
|
728
1170
|
);
|
|
729
1171
|
if (features) {
|
|
730
1172
|
if (Number.isFinite(features.spectralCentroid)) centroids.push(features.spectralCentroid);
|
|
@@ -790,9 +1232,9 @@ async function extractSpeakerFeaturesDetailed(audio) {
|
|
|
790
1232
|
for (let i = 0; i < numFrames; i++) {
|
|
791
1233
|
const start = i * hopSize;
|
|
792
1234
|
let sum = 0;
|
|
793
|
-
const end = Math.min(start + frameSize,
|
|
1235
|
+
const end = Math.min(start + frameSize, normalizedSamples.length);
|
|
794
1236
|
for (let j = start; j < end; j++) {
|
|
795
|
-
sum += (
|
|
1237
|
+
sum += (normalizedSamples[j] ?? 0) * (normalizedSamples[j] ?? 0);
|
|
796
1238
|
}
|
|
797
1239
|
amplitudes.push(Math.sqrt(sum / (end - start)));
|
|
798
1240
|
}
|
|
@@ -811,9 +1253,9 @@ async function extractSpeakerFeaturesDetailed(audio) {
|
|
|
811
1253
|
const hnrEntropy = entropy(hnrValues);
|
|
812
1254
|
const hnrFeatures = [hnrStats.mean, hnrStats.variance, hnrStats.skewness, hnrStats.kurtosis, hnrEntropy];
|
|
813
1255
|
await yieldToMainThread();
|
|
814
|
-
const
|
|
815
|
-
const f1f2Stats = condense(f1f2);
|
|
816
|
-
const f2f3Stats = condense(f2f3);
|
|
1256
|
+
const lpc = extractLpcAnalysis(normalizedSamples, sampleRate, frameSize, hopSize);
|
|
1257
|
+
const f1f2Stats = condense(lpc.f1f2);
|
|
1258
|
+
const f2f3Stats = condense(lpc.f2f3);
|
|
817
1259
|
const formantFeatures = [
|
|
818
1260
|
f1f2Stats.mean,
|
|
819
1261
|
f1f2Stats.variance,
|
|
@@ -830,25 +1272,86 @@ async function extractSpeakerFeaturesDetailed(audio) {
|
|
|
830
1272
|
const ampStats = condense(amplitudes);
|
|
831
1273
|
const ampEntropy = entropy(amplitudes);
|
|
832
1274
|
const ampFeatures = [ampStats.mean, ampStats.variance, ampStats.skewness, ampStats.kurtosis, ampEntropy];
|
|
1275
|
+
await yieldToMainThread();
|
|
1276
|
+
const mfccFeatures = await extractMfccFeatures(
|
|
1277
|
+
normalizedSamples,
|
|
1278
|
+
sampleRate,
|
|
1279
|
+
frameSize,
|
|
1280
|
+
hopSize
|
|
1281
|
+
);
|
|
1282
|
+
const lpcStats = [];
|
|
1283
|
+
for (let c = 0; c < 12; c++) {
|
|
1284
|
+
const track = lpc.lpcCoefficients[c] ?? [];
|
|
1285
|
+
const mu = mean(track);
|
|
1286
|
+
lpcStats.push(mu, variance(track, mu));
|
|
1287
|
+
}
|
|
1288
|
+
const f1Stats = { mean: mean(lpc.f1), var: variance(lpc.f1) };
|
|
1289
|
+
const f2Stats = { mean: mean(lpc.f2), var: variance(lpc.f2) };
|
|
1290
|
+
const f3Stats = { mean: mean(lpc.f3), var: variance(lpc.f3) };
|
|
1291
|
+
const f1Delta = derivative(lpc.f1);
|
|
1292
|
+
const f2Delta = derivative(lpc.f2);
|
|
1293
|
+
const f3Delta = derivative(lpc.f3);
|
|
1294
|
+
const f1DeltaMu = mean(f1Delta);
|
|
1295
|
+
const f2DeltaMu = mean(f2Delta);
|
|
1296
|
+
const f3DeltaMu = mean(f3Delta);
|
|
1297
|
+
const b1Mu = mean(lpc.b1);
|
|
1298
|
+
const b2Mu = mean(lpc.b2);
|
|
1299
|
+
const formantTrajectoryFeatures = [
|
|
1300
|
+
f1Stats.mean,
|
|
1301
|
+
f1Stats.var,
|
|
1302
|
+
f2Stats.mean,
|
|
1303
|
+
f2Stats.var,
|
|
1304
|
+
f3Stats.mean,
|
|
1305
|
+
f3Stats.var,
|
|
1306
|
+
f1DeltaMu,
|
|
1307
|
+
variance(f1Delta, f1DeltaMu),
|
|
1308
|
+
f2DeltaMu,
|
|
1309
|
+
variance(f2Delta, f2DeltaMu),
|
|
1310
|
+
f3DeltaMu,
|
|
1311
|
+
variance(f3Delta, f3DeltaMu),
|
|
1312
|
+
b1Mu,
|
|
1313
|
+
variance(lpc.b1, b1Mu),
|
|
1314
|
+
b2Mu,
|
|
1315
|
+
variance(lpc.b2, b2Mu)
|
|
1316
|
+
];
|
|
1317
|
+
await yieldToMainThread();
|
|
1318
|
+
const voiceQualityFeatures = await extractVoiceQualityFeatures(
|
|
1319
|
+
normalizedSamples,
|
|
1320
|
+
sampleRate,
|
|
1321
|
+
frameSize,
|
|
1322
|
+
hopSize,
|
|
1323
|
+
f0
|
|
1324
|
+
);
|
|
1325
|
+
const pitchShapeFeatures = pitchContourShape(f0, PITCH_CONTOUR_SHAPE_FEATURE_COUNT);
|
|
833
1326
|
const features = [
|
|
834
1327
|
...f0Features,
|
|
835
|
-
// 5
|
|
1328
|
+
// 5 [0..5] F0_STATS
|
|
836
1329
|
...f0DeltaFeatures,
|
|
837
|
-
// 4
|
|
1330
|
+
// 4 [5..9] F0_DELTA
|
|
838
1331
|
...jitterFeatures,
|
|
839
|
-
// 4
|
|
1332
|
+
// 4 [9..13] JITTER
|
|
840
1333
|
...shimmerFeatures,
|
|
841
|
-
// 4
|
|
1334
|
+
// 4 [13..17] SHIMMER
|
|
842
1335
|
...hnrFeatures,
|
|
843
|
-
// 5
|
|
1336
|
+
// 5 [17..22] HNR
|
|
844
1337
|
...formantFeatures,
|
|
845
|
-
// 8
|
|
1338
|
+
// 8 [22..30] FORMANT_RATIOS
|
|
846
1339
|
...ltasFeatures,
|
|
847
|
-
// 8
|
|
1340
|
+
// 8 [30..38] LTAS
|
|
848
1341
|
...voicingFeatures,
|
|
849
|
-
// 1
|
|
850
|
-
...ampFeatures
|
|
851
|
-
// 5
|
|
1342
|
+
// 1 [38] VOICING_RATIO
|
|
1343
|
+
...ampFeatures,
|
|
1344
|
+
// 5 [39..44] AMPLITUDE
|
|
1345
|
+
...mfccFeatures,
|
|
1346
|
+
// 72 [44..116] MFCC + delta-MFCC (MFCC[0] dropped)
|
|
1347
|
+
...lpcStats,
|
|
1348
|
+
// 24 [116..140] LPC coefficient stats
|
|
1349
|
+
...formantTrajectoryFeatures,
|
|
1350
|
+
// 16 [140..156] Formant absolutes + dynamics + bandwidths
|
|
1351
|
+
...voiceQualityFeatures,
|
|
1352
|
+
// 9 [156..165] Voice quality
|
|
1353
|
+
...pitchShapeFeatures
|
|
1354
|
+
// 5 [165..170] Pitch contour shape DCT
|
|
852
1355
|
];
|
|
853
1356
|
return { features, f0Contour: f0 };
|
|
854
1357
|
}
|
|
@@ -857,7 +1360,102 @@ async function extractSpeakerFeatures(audio) {
|
|
|
857
1360
|
return features;
|
|
858
1361
|
}
|
|
859
1362
|
|
|
1363
|
+
// src/extraction/fft.ts
|
|
1364
|
+
function nextPow2(n) {
|
|
1365
|
+
if (n <= 2) return 2;
|
|
1366
|
+
let p = 2;
|
|
1367
|
+
while (p < n) p <<= 1;
|
|
1368
|
+
return p;
|
|
1369
|
+
}
|
|
1370
|
+
function realFFT(input, size) {
|
|
1371
|
+
if (size <= 0 || (size & size - 1) !== 0) {
|
|
1372
|
+
throw new Error(`FFT size must be a positive power of two, got ${size}`);
|
|
1373
|
+
}
|
|
1374
|
+
const real = new Array(size);
|
|
1375
|
+
const imag = new Array(size).fill(0);
|
|
1376
|
+
for (let i = 0; i < size; i++) {
|
|
1377
|
+
real[i] = i < input.length ? input[i] ?? 0 : 0;
|
|
1378
|
+
}
|
|
1379
|
+
for (let i = 1, j = 0; i < size; i++) {
|
|
1380
|
+
let bit = size >> 1;
|
|
1381
|
+
for (; j & bit; bit >>= 1) j ^= bit;
|
|
1382
|
+
j ^= bit;
|
|
1383
|
+
if (i < j) {
|
|
1384
|
+
const tr = real[i];
|
|
1385
|
+
real[i] = real[j];
|
|
1386
|
+
real[j] = tr;
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1389
|
+
for (let halfSize = 1; halfSize < size; halfSize <<= 1) {
|
|
1390
|
+
const fullSize = halfSize << 1;
|
|
1391
|
+
const phaseStep = -Math.PI / halfSize;
|
|
1392
|
+
for (let chunkStart = 0; chunkStart < size; chunkStart += fullSize) {
|
|
1393
|
+
for (let k = 0; k < halfSize; k++) {
|
|
1394
|
+
const phase = phaseStep * k;
|
|
1395
|
+
const wr = Math.cos(phase);
|
|
1396
|
+
const wi = Math.sin(phase);
|
|
1397
|
+
const ar = real[chunkStart + k];
|
|
1398
|
+
const ai = imag[chunkStart + k];
|
|
1399
|
+
const br = real[chunkStart + k + halfSize];
|
|
1400
|
+
const bi = imag[chunkStart + k + halfSize];
|
|
1401
|
+
const tr = wr * br - wi * bi;
|
|
1402
|
+
const ti = wr * bi + wi * br;
|
|
1403
|
+
real[chunkStart + k] = ar + tr;
|
|
1404
|
+
imag[chunkStart + k] = ai + ti;
|
|
1405
|
+
real[chunkStart + k + halfSize] = ar - tr;
|
|
1406
|
+
imag[chunkStart + k + halfSize] = ai - ti;
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
return { real, imag };
|
|
1411
|
+
}
|
|
1412
|
+
function bandEnergy(real, imag, sampleRate, fLow, fHigh) {
|
|
1413
|
+
const N = real.length;
|
|
1414
|
+
if (N === 0 || !Number.isFinite(sampleRate) || sampleRate <= 0 || fLow >= fHigh || fLow < 0) {
|
|
1415
|
+
return 0;
|
|
1416
|
+
}
|
|
1417
|
+
const binHz = sampleRate / N;
|
|
1418
|
+
const kLow = Math.max(0, Math.ceil(fLow / binHz));
|
|
1419
|
+
const kHigh = Math.min(Math.floor(N / 2), Math.floor((fHigh - 1e-9) / binHz));
|
|
1420
|
+
let energy = 0;
|
|
1421
|
+
for (let k = kLow; k <= kHigh; k++) {
|
|
1422
|
+
const re = real[k] ?? 0;
|
|
1423
|
+
const im = imag[k] ?? 0;
|
|
1424
|
+
energy += re * re + im * im;
|
|
1425
|
+
}
|
|
1426
|
+
return energy / (N * N);
|
|
1427
|
+
}
|
|
1428
|
+
function peakInBand(real, imag, sampleRate, fLow, fHigh) {
|
|
1429
|
+
const N = real.length;
|
|
1430
|
+
if (N === 0 || !Number.isFinite(sampleRate) || sampleRate <= 0 || fLow >= fHigh || fLow < 0) {
|
|
1431
|
+
return { freq: 0, amplitude: 0 };
|
|
1432
|
+
}
|
|
1433
|
+
const binHz = sampleRate / N;
|
|
1434
|
+
const kLow = Math.max(0, Math.ceil(fLow / binHz));
|
|
1435
|
+
const kHigh = Math.min(Math.floor(N / 2), Math.floor((fHigh - 1e-9) / binHz));
|
|
1436
|
+
let bestK = -1;
|
|
1437
|
+
let bestAmp = -Infinity;
|
|
1438
|
+
for (let k = kLow; k <= kHigh; k++) {
|
|
1439
|
+
const re = real[k] ?? 0;
|
|
1440
|
+
const im = imag[k] ?? 0;
|
|
1441
|
+
const amp = re * re + im * im;
|
|
1442
|
+
if (amp > bestAmp) {
|
|
1443
|
+
bestAmp = amp;
|
|
1444
|
+
bestK = k;
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
if (bestK < 0) return { freq: 0, amplitude: 0 };
|
|
1448
|
+
return { freq: bestK * binHz, amplitude: bestAmp / (N * N) };
|
|
1449
|
+
}
|
|
1450
|
+
|
|
860
1451
|
// src/extraction/kinematic.ts
|
|
1452
|
+
var MOTION_LEGACY_COUNT = 54;
|
|
1453
|
+
var MOTION_V2_ADDITIONS = 27;
|
|
1454
|
+
var MOTION_FEATURE_COUNT = MOTION_LEGACY_COUNT + MOTION_V2_ADDITIONS;
|
|
1455
|
+
var TOUCH_LEGACY_COUNT = 36;
|
|
1456
|
+
var TOUCH_V2_ADDITIONS = 21;
|
|
1457
|
+
var TOUCH_FEATURE_COUNT = TOUCH_LEGACY_COUNT + TOUCH_V2_ADDITIONS;
|
|
1458
|
+
var MOUSE_DYNAMICS_FEATURE_COUNT = MOTION_FEATURE_COUNT;
|
|
861
1459
|
function extractAccelerationMagnitude(samples, targetFrameCount) {
|
|
862
1460
|
if (samples.length < 2 || targetFrameCount < 2) return [];
|
|
863
1461
|
const magnitudes = samples.map((s) => Math.sqrt(s.ax * s.ax + s.ay * s.ay + s.az * s.az));
|
|
@@ -875,7 +1473,7 @@ function extractAccelerationMagnitude(samples, targetFrameCount) {
|
|
|
875
1473
|
return out;
|
|
876
1474
|
}
|
|
877
1475
|
function extractMotionFeatures(samples) {
|
|
878
|
-
if (samples.length < 5) return new Array(
|
|
1476
|
+
if (samples.length < 5) return new Array(MOTION_FEATURE_COUNT).fill(0);
|
|
879
1477
|
const axes = {
|
|
880
1478
|
ax: samples.map((s) => s.ax),
|
|
881
1479
|
ay: samples.map((s) => s.ay),
|
|
@@ -910,10 +1508,68 @@ function extractMotionFeatures(samples) {
|
|
|
910
1508
|
}
|
|
911
1509
|
features.push(windowVariances.length >= 2 ? variance(windowVariances) : 0);
|
|
912
1510
|
}
|
|
1511
|
+
features.push(...computeMotionV2(axes, samples));
|
|
913
1512
|
return features;
|
|
914
1513
|
}
|
|
1514
|
+
function computeMotionV2(axes, samples) {
|
|
1515
|
+
const out = [];
|
|
1516
|
+
const covPairs = [
|
|
1517
|
+
[axes.ax, axes.gy],
|
|
1518
|
+
[axes.ay, axes.gx],
|
|
1519
|
+
[axes.az, axes.gz],
|
|
1520
|
+
[axes.ax, axes.az],
|
|
1521
|
+
[axes.ay, axes.az],
|
|
1522
|
+
[axes.gx, axes.gy]
|
|
1523
|
+
];
|
|
1524
|
+
for (const [a, b] of covPairs) out.push(covariance(a, b));
|
|
1525
|
+
const sampleRate = sampleRateFromTimestamps(samples.map((s) => s.timestamp));
|
|
1526
|
+
const fftSize = nextPow2(Math.max(64, axes.ax.length));
|
|
1527
|
+
const bands = [
|
|
1528
|
+
[0, 2],
|
|
1529
|
+
[2, 6],
|
|
1530
|
+
[6, 12],
|
|
1531
|
+
[12, 30]
|
|
1532
|
+
];
|
|
1533
|
+
const accelSpectra = [axes.ax, axes.ay, axes.az].map(
|
|
1534
|
+
(axis) => realFFT(meanCenter(axis), fftSize)
|
|
1535
|
+
);
|
|
1536
|
+
for (const spectrum of accelSpectra) {
|
|
1537
|
+
for (const [lo, hi] of bands) {
|
|
1538
|
+
out.push(bandEnergy(spectrum.real, spectrum.imag, sampleRate, lo, hi));
|
|
1539
|
+
}
|
|
1540
|
+
}
|
|
1541
|
+
const magnitude = samples.map(
|
|
1542
|
+
(s) => Math.sqrt(s.ax * s.ax + s.ay * s.ay + s.az * s.az)
|
|
1543
|
+
);
|
|
1544
|
+
const magSpectrum = realFFT(meanCenter(magnitude), fftSize);
|
|
1545
|
+
const tremor = peakInBand(
|
|
1546
|
+
magSpectrum.real,
|
|
1547
|
+
magSpectrum.imag,
|
|
1548
|
+
sampleRate,
|
|
1549
|
+
4,
|
|
1550
|
+
12
|
|
1551
|
+
);
|
|
1552
|
+
out.push(tremor.freq, tremor.amplitude);
|
|
1553
|
+
const duration = captureDurationSec(samples);
|
|
1554
|
+
const reversalRates = [axes.ax, axes.ay, axes.az].map(
|
|
1555
|
+
(axis) => duration > 0 ? signChangeCount(derivative2(axis)) / duration : 0
|
|
1556
|
+
);
|
|
1557
|
+
out.push(mean(reversalRates), variance(reversalRates));
|
|
1558
|
+
let gyroSum = 0;
|
|
1559
|
+
for (let i = 0; i < samples.length; i++) {
|
|
1560
|
+
const gx = samples[i].gx;
|
|
1561
|
+
const gy = samples[i].gy;
|
|
1562
|
+
const gz = samples[i].gz;
|
|
1563
|
+
gyroSum += Math.sqrt(gx * gx + gy * gy + gz * gz);
|
|
1564
|
+
}
|
|
1565
|
+
out.push(samples.length > 0 ? gyroSum / samples.length : 0);
|
|
1566
|
+
for (const lag of [1, 5, 10, 25]) {
|
|
1567
|
+
out.push(autocorrelation(magnitude, lag));
|
|
1568
|
+
}
|
|
1569
|
+
return out;
|
|
1570
|
+
}
|
|
915
1571
|
function extractTouchFeatures(samples) {
|
|
916
|
-
if (samples.length < 5) return new Array(
|
|
1572
|
+
if (samples.length < 5) return new Array(TOUCH_FEATURE_COUNT).fill(0);
|
|
917
1573
|
const x = samples.map((s) => s.x);
|
|
918
1574
|
const y = samples.map((s) => s.y);
|
|
919
1575
|
const pressure = samples.map((s) => s.pressure);
|
|
@@ -941,8 +1597,78 @@ function extractTouchFeatures(samples) {
|
|
|
941
1597
|
}
|
|
942
1598
|
features.push(windowVariances.length >= 2 ? variance(windowVariances) : 0);
|
|
943
1599
|
}
|
|
1600
|
+
features.push(...computeTouchV2(samples, vx, vy));
|
|
944
1601
|
return features;
|
|
945
1602
|
}
|
|
1603
|
+
function computeTouchV2(samples, vx, vy) {
|
|
1604
|
+
const out = [];
|
|
1605
|
+
const pressure = samples.map((s) => s.pressure);
|
|
1606
|
+
const dPressure = derivative2(pressure);
|
|
1607
|
+
out.push(...Object.values(condense(dPressure)));
|
|
1608
|
+
const aspect = samples.map((s) => {
|
|
1609
|
+
const h = s.height;
|
|
1610
|
+
return h > 0 ? s.width / h : 0;
|
|
1611
|
+
});
|
|
1612
|
+
out.push(mean(aspect), variance(aspect));
|
|
1613
|
+
const area = samples.map((s) => s.width * s.height);
|
|
1614
|
+
const dArea = derivative2(area);
|
|
1615
|
+
out.push(mean(dArea), variance(dArea));
|
|
1616
|
+
const CURVATURE_REST_EPS = 1e-3;
|
|
1617
|
+
const curvatures = [];
|
|
1618
|
+
for (let i = 1; i < vx.length; i++) {
|
|
1619
|
+
const v1x = vx[i - 1] ?? 0;
|
|
1620
|
+
const v1y = vy[i - 1] ?? 0;
|
|
1621
|
+
const v2x = vx[i] ?? 0;
|
|
1622
|
+
const v2y = vy[i] ?? 0;
|
|
1623
|
+
if (Math.hypot(v1x, v1y) < CURVATURE_REST_EPS || Math.hypot(v2x, v2y) < CURVATURE_REST_EPS) {
|
|
1624
|
+
continue;
|
|
1625
|
+
}
|
|
1626
|
+
const a1 = Math.atan2(v1y, v1x);
|
|
1627
|
+
const a2 = Math.atan2(v2y, v2x);
|
|
1628
|
+
let d = a2 - a1;
|
|
1629
|
+
while (d > Math.PI) d -= 2 * Math.PI;
|
|
1630
|
+
while (d < -Math.PI) d += 2 * Math.PI;
|
|
1631
|
+
curvatures.push(Math.abs(d));
|
|
1632
|
+
}
|
|
1633
|
+
const curvStats = condense(curvatures);
|
|
1634
|
+
out.push(curvStats.mean, curvStats.variance, curvStats.skewness);
|
|
1635
|
+
const speed = vx.map((dx2, i) => {
|
|
1636
|
+
const dy2 = vy[i] ?? 0;
|
|
1637
|
+
return Math.sqrt(dx2 * dx2 + dy2 * dy2);
|
|
1638
|
+
});
|
|
1639
|
+
for (const lag of [1, 3, 5]) out.push(autocorrelation(speed, lag));
|
|
1640
|
+
const gaps = [];
|
|
1641
|
+
for (let i = 1; i < samples.length; i++) {
|
|
1642
|
+
gaps.push((samples[i]?.timestamp ?? 0) - (samples[i - 1]?.timestamp ?? 0));
|
|
1643
|
+
}
|
|
1644
|
+
out.push(...Object.values(condense(gaps)));
|
|
1645
|
+
const totalPath = speed.reduce((a, b) => a + b, 0);
|
|
1646
|
+
const dx = (samples[samples.length - 1]?.x ?? 0) - (samples[0]?.x ?? 0);
|
|
1647
|
+
const dy = (samples[samples.length - 1]?.y ?? 0) - (samples[0]?.y ?? 0);
|
|
1648
|
+
const straight = Math.sqrt(dx * dx + dy * dy);
|
|
1649
|
+
out.push(totalPath > 0 ? straight / totalPath : 0);
|
|
1650
|
+
const strokeLengths = perStrokePathLengths(speed);
|
|
1651
|
+
out.push(mean(strokeLengths), variance(strokeLengths));
|
|
1652
|
+
return out;
|
|
1653
|
+
}
|
|
1654
|
+
function perStrokePathLengths(speed) {
|
|
1655
|
+
const PAUSE_THRESHOLD = 0.5;
|
|
1656
|
+
const lengths = [];
|
|
1657
|
+
let acc = 0;
|
|
1658
|
+
let inStroke = false;
|
|
1659
|
+
for (const s of speed) {
|
|
1660
|
+
if (s >= PAUSE_THRESHOLD) {
|
|
1661
|
+
acc += s;
|
|
1662
|
+
inStroke = true;
|
|
1663
|
+
} else if (inStroke) {
|
|
1664
|
+
lengths.push(acc);
|
|
1665
|
+
acc = 0;
|
|
1666
|
+
inStroke = false;
|
|
1667
|
+
}
|
|
1668
|
+
}
|
|
1669
|
+
if (inStroke && acc > 0) lengths.push(acc);
|
|
1670
|
+
return lengths;
|
|
1671
|
+
}
|
|
946
1672
|
function derivative2(values) {
|
|
947
1673
|
const d = [];
|
|
948
1674
|
for (let i = 1; i < values.length; i++) {
|
|
@@ -950,8 +1676,53 @@ function derivative2(values) {
|
|
|
950
1676
|
}
|
|
951
1677
|
return d;
|
|
952
1678
|
}
|
|
1679
|
+
function meanCenter(values) {
|
|
1680
|
+
if (values.length === 0) return [];
|
|
1681
|
+
let sum = 0;
|
|
1682
|
+
for (const v of values) sum += v;
|
|
1683
|
+
const m = sum / values.length;
|
|
1684
|
+
return values.map((v) => v - m);
|
|
1685
|
+
}
|
|
1686
|
+
function covariance(a, b) {
|
|
1687
|
+
const n = Math.min(a.length, b.length);
|
|
1688
|
+
if (n < 2) return 0;
|
|
1689
|
+
let sumA = 0;
|
|
1690
|
+
let sumB = 0;
|
|
1691
|
+
for (let i = 0; i < n; i++) {
|
|
1692
|
+
sumA += a[i] ?? 0;
|
|
1693
|
+
sumB += b[i] ?? 0;
|
|
1694
|
+
}
|
|
1695
|
+
const meanA = sumA / n;
|
|
1696
|
+
const meanB = sumB / n;
|
|
1697
|
+
let cov = 0;
|
|
1698
|
+
for (let i = 0; i < n; i++) {
|
|
1699
|
+
cov += ((a[i] ?? 0) - meanA) * ((b[i] ?? 0) - meanB);
|
|
1700
|
+
}
|
|
1701
|
+
return cov / (n - 1);
|
|
1702
|
+
}
|
|
1703
|
+
function signChangeCount(values) {
|
|
1704
|
+
let count = 0;
|
|
1705
|
+
let last = 0;
|
|
1706
|
+
for (const v of values) {
|
|
1707
|
+
if (v > 0 && last < 0) count++;
|
|
1708
|
+
else if (v < 0 && last > 0) count++;
|
|
1709
|
+
if (v !== 0) last = v;
|
|
1710
|
+
}
|
|
1711
|
+
return count;
|
|
1712
|
+
}
|
|
1713
|
+
function sampleRateFromTimestamps(timestampsMs) {
|
|
1714
|
+
if (timestampsMs.length < 2) return 0;
|
|
1715
|
+
const span = (timestampsMs[timestampsMs.length - 1] ?? 0) - (timestampsMs[0] ?? 0);
|
|
1716
|
+
if (!Number.isFinite(span) || span <= 0) return 0;
|
|
1717
|
+
return (timestampsMs.length - 1) * 1e3 / span;
|
|
1718
|
+
}
|
|
1719
|
+
function captureDurationSec(samples) {
|
|
1720
|
+
if (samples.length < 2) return 0;
|
|
1721
|
+
const span = (samples[samples.length - 1]?.timestamp ?? 0) - (samples[0]?.timestamp ?? 0);
|
|
1722
|
+
return Number.isFinite(span) && span > 0 ? span / 1e3 : 0;
|
|
1723
|
+
}
|
|
953
1724
|
function extractMouseDynamics(samples) {
|
|
954
|
-
if (samples.length < 10) return new Array(
|
|
1725
|
+
if (samples.length < 10) return new Array(MOUSE_DYNAMICS_FEATURE_COUNT).fill(0);
|
|
955
1726
|
const x = samples.map((s) => s.x);
|
|
956
1727
|
const y = samples.map((s) => s.y);
|
|
957
1728
|
const pressure = samples.map((s) => s.pressure);
|
|
@@ -1050,7 +1821,7 @@ function extractMouseDynamics(samples) {
|
|
|
1050
1821
|
const pressureStats = condense(pressure);
|
|
1051
1822
|
const moveDurStats = condense(movementDurations);
|
|
1052
1823
|
const segLenStats = condense(segmentLengths);
|
|
1053
|
-
|
|
1824
|
+
const legacyMouseDynamics = [
|
|
1054
1825
|
curvatureStats.mean,
|
|
1055
1826
|
curvatureStats.variance,
|
|
1056
1827
|
curvatureStats.skewness,
|
|
@@ -1106,6 +1877,61 @@ function extractMouseDynamics(samples) {
|
|
|
1106
1877
|
angleAutoCorr[2] ?? 0,
|
|
1107
1878
|
normalizedPathLength
|
|
1108
1879
|
];
|
|
1880
|
+
const v2 = computeMouseV2(samples, vx, vy, accX, accY, speed, acc, jerk, directions);
|
|
1881
|
+
return [...legacyMouseDynamics, ...v2];
|
|
1882
|
+
}
|
|
1883
|
+
function computeMouseV2(samples, vx, vy, accX, accY, speed, acc, jerk, directions) {
|
|
1884
|
+
const out = [];
|
|
1885
|
+
const covPairs = [
|
|
1886
|
+
[vx, vy],
|
|
1887
|
+
[vx, accX],
|
|
1888
|
+
[vx, accY],
|
|
1889
|
+
[vy, accX],
|
|
1890
|
+
[vy, accY],
|
|
1891
|
+
[accX, accY]
|
|
1892
|
+
];
|
|
1893
|
+
for (const [a, b] of covPairs) out.push(covariance(a, b));
|
|
1894
|
+
const sampleRate = sampleRateFromTimestamps(samples.map((s) => s.timestamp));
|
|
1895
|
+
const fftSize = nextPow2(Math.max(64, speed.length));
|
|
1896
|
+
const bands = [
|
|
1897
|
+
[0, 2],
|
|
1898
|
+
[2, 6],
|
|
1899
|
+
[6, 12],
|
|
1900
|
+
[12, 30]
|
|
1901
|
+
];
|
|
1902
|
+
const speedSpectrum = realFFT(meanCenter(speed), fftSize);
|
|
1903
|
+
const accSpectrum = realFFT(meanCenter(acc), fftSize);
|
|
1904
|
+
const jerkSpectrum = realFFT(meanCenter(jerk), fftSize);
|
|
1905
|
+
for (const spectrum of [speedSpectrum, accSpectrum, jerkSpectrum]) {
|
|
1906
|
+
for (const [lo, hi] of bands) {
|
|
1907
|
+
out.push(bandEnergy(spectrum.real, spectrum.imag, sampleRate, lo, hi));
|
|
1908
|
+
}
|
|
1909
|
+
}
|
|
1910
|
+
const tremor = peakInBand(
|
|
1911
|
+
speedSpectrum.real,
|
|
1912
|
+
speedSpectrum.imag,
|
|
1913
|
+
sampleRate,
|
|
1914
|
+
4,
|
|
1915
|
+
12
|
|
1916
|
+
);
|
|
1917
|
+
out.push(tremor.freq, tremor.amplitude);
|
|
1918
|
+
const duration = captureDurationSec(samples);
|
|
1919
|
+
const reversalRates = [vx, vy, speed].map(
|
|
1920
|
+
(channel) => duration > 0 ? signChangeCount(derivative2(channel)) / duration : 0
|
|
1921
|
+
);
|
|
1922
|
+
out.push(mean(reversalRates), variance(reversalRates));
|
|
1923
|
+
let dirAccum = 0;
|
|
1924
|
+
for (let i = 1; i < directions.length; i++) {
|
|
1925
|
+
let diff = directions[i] - directions[i - 1];
|
|
1926
|
+
while (diff > Math.PI) diff -= 2 * Math.PI;
|
|
1927
|
+
while (diff < -Math.PI) diff += 2 * Math.PI;
|
|
1928
|
+
dirAccum += Math.abs(diff);
|
|
1929
|
+
}
|
|
1930
|
+
out.push(directions.length > 1 ? dirAccum / (directions.length - 1) : 0);
|
|
1931
|
+
for (const lag of [1, 5, 10, 25]) {
|
|
1932
|
+
out.push(autocorrelation(speed, lag));
|
|
1933
|
+
}
|
|
1934
|
+
return out.map((v) => Number.isFinite(v) ? v : 0);
|
|
1109
1935
|
}
|
|
1110
1936
|
|
|
1111
1937
|
// src/hashing/simhash.ts
|
|
@@ -1145,7 +1971,7 @@ function getHyperplanes(dimension) {
|
|
|
1145
1971
|
cachedDimension = dimension;
|
|
1146
1972
|
return planes;
|
|
1147
1973
|
}
|
|
1148
|
-
var EXPECTED_FEATURE_DIMENSION =
|
|
1974
|
+
var EXPECTED_FEATURE_DIMENSION = SPEAKER_FEATURE_COUNT + MOTION_FEATURE_COUNT + TOUCH_FEATURE_COUNT;
|
|
1149
1975
|
function simhash(features) {
|
|
1150
1976
|
if (features.length === 0) {
|
|
1151
1977
|
return new Array(FINGERPRINT_BITS).fill(0);
|
|
@@ -4286,9 +5112,12 @@ async function extractFingerprintAndValidate(sensorData, config, walletAddress,
|
|
|
4286
5112
|
f0Contour,
|
|
4287
5113
|
accelMagnitude
|
|
4288
5114
|
} = await extractFeatures(sensorData);
|
|
5115
|
+
const AUDIO_END = SPEAKER_FEATURE_COUNT;
|
|
5116
|
+
const MOTION_END = AUDIO_END + MOTION_FEATURE_COUNT;
|
|
5117
|
+
const TOUCH_END = MOTION_END + TOUCH_FEATURE_COUNT;
|
|
4289
5118
|
const nonZero = features.filter((v) => v !== 0).length;
|
|
4290
5119
|
sdkLog(
|
|
4291
|
-
`[Entros SDK] Feature vector: ${features.length} dimensions, ${nonZero} non-zero. Audio[0
|
|
5120
|
+
`[Entros SDK] Feature vector: ${features.length} dimensions, ${nonZero} non-zero. Audio[0..${AUDIO_END - 1}]: ${features.slice(0, AUDIO_END).filter((v) => v !== 0).length} non-zero. Motion/Mouse[${AUDIO_END}..${MOTION_END - 1}]: ${features.slice(AUDIO_END, MOTION_END).filter((v) => v !== 0).length} non-zero. Touch[${MOTION_END}..${TOUCH_END - 1}]: ${features.slice(MOTION_END, TOUCH_END).filter((v) => v !== 0).length} non-zero.`
|
|
4292
5121
|
);
|
|
4293
5122
|
const fingerprint = simhash(normalizedFeatures);
|
|
4294
5123
|
const tbh = await generateTBH(fingerprint);
|
|
@@ -4492,9 +5321,12 @@ async function processSensorData(sensorData, config, wallet, connection, onProgr
|
|
|
4492
5321
|
);
|
|
4493
5322
|
solanaProof = serializeProof(proof, publicSignals);
|
|
4494
5323
|
} catch (proofErr) {
|
|
4495
|
-
const
|
|
4496
|
-
const
|
|
4497
|
-
const
|
|
5324
|
+
const motionStart = SPEAKER_FEATURE_COUNT;
|
|
5325
|
+
const touchStart = motionStart + MOTION_FEATURE_COUNT;
|
|
5326
|
+
const touchEnd = touchStart + TOUCH_FEATURE_COUNT;
|
|
5327
|
+
const audioNZ = features.slice(0, motionStart).filter((v) => v !== 0).length;
|
|
5328
|
+
const motionNZ = features.slice(motionStart, touchStart).filter((v) => v !== 0).length;
|
|
5329
|
+
const touchNZ = features.slice(touchStart, touchEnd).filter((v) => v !== 0).length;
|
|
4498
5330
|
const rawAudio = sensorData.audio?.samples.length ?? 0;
|
|
4499
5331
|
const rawMotion = sensorData.motion.length;
|
|
4500
5332
|
const rawTouch = sensorData.touch.length;
|