@entros/pulse-sdk 1.5.3 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.d.mts +34 -8
- package/dist/index.d.ts +34 -8
- package/dist/index.js +882 -50
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +882 -50
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -138,6 +138,25 @@ function sdkWarn(...args) {
|
|
|
138
138
|
|
|
139
139
|
// src/sensor/audio.ts
|
|
140
140
|
var TARGET_SAMPLE_RATE = 16e3;
|
|
141
|
+
var TARGET_CAPTURE_RMS = 0.05;
|
|
142
|
+
var MIN_RMS_FOR_NORMALIZATION = 1e-4;
|
|
143
|
+
var MAX_NORMALIZATION_GAIN = 50;
|
|
144
|
+
function normalizeCaptureRMS(samples) {
|
|
145
|
+
if (samples.length === 0) return samples;
|
|
146
|
+
let sumSq = 0;
|
|
147
|
+
for (let i = 0; i < samples.length; i++) {
|
|
148
|
+
const s = samples[i];
|
|
149
|
+
sumSq += s * s;
|
|
150
|
+
}
|
|
151
|
+
const rms = Math.sqrt(sumSq / samples.length);
|
|
152
|
+
if (rms < MIN_RMS_FOR_NORMALIZATION) return samples;
|
|
153
|
+
const gain = Math.min(TARGET_CAPTURE_RMS / rms, MAX_NORMALIZATION_GAIN);
|
|
154
|
+
const out = new Float32Array(samples.length);
|
|
155
|
+
for (let i = 0; i < samples.length; i++) {
|
|
156
|
+
out[i] = Math.max(-1, Math.min(1, samples[i] * gain));
|
|
157
|
+
}
|
|
158
|
+
return out;
|
|
159
|
+
}
|
|
141
160
|
async function captureAudio(options = {}) {
|
|
142
161
|
const {
|
|
143
162
|
signal,
|
|
@@ -225,8 +244,9 @@ async function captureAudio(options = {}) {
|
|
|
225
244
|
samples.set(chunk, offset);
|
|
226
245
|
offset += chunk.length;
|
|
227
246
|
}
|
|
247
|
+
const normalized = normalizeCaptureRMS(samples);
|
|
228
248
|
resolve({
|
|
229
|
-
samples,
|
|
249
|
+
samples: normalized,
|
|
230
250
|
sampleRate: capturedSampleRate,
|
|
231
251
|
duration: totalLength / capturedSampleRate
|
|
232
252
|
});
|
|
@@ -401,6 +421,9 @@ function variance(values, mu) {
|
|
|
401
421
|
for (const v of values) sum += (v - m) ** 2;
|
|
402
422
|
return sum / (values.length - 1);
|
|
403
423
|
}
|
|
424
|
+
var SKEWNESS_BOUND = 20;
|
|
425
|
+
var KURTOSIS_LOWER = 0;
|
|
426
|
+
var KURTOSIS_UPPER = 50;
|
|
404
427
|
function skewness(values) {
|
|
405
428
|
if (values.length < 3) return 0;
|
|
406
429
|
const n = values.length;
|
|
@@ -409,7 +432,8 @@ function skewness(values) {
|
|
|
409
432
|
if (s === 0) return 0;
|
|
410
433
|
let sum = 0;
|
|
411
434
|
for (const v of values) sum += ((v - m) / s) ** 3;
|
|
412
|
-
|
|
435
|
+
const raw = n / ((n - 1) * (n - 2)) * sum;
|
|
436
|
+
return Math.max(-SKEWNESS_BOUND, Math.min(SKEWNESS_BOUND, raw));
|
|
413
437
|
}
|
|
414
438
|
function kurtosis(values) {
|
|
415
439
|
if (values.length < 4) return 0;
|
|
@@ -420,7 +444,7 @@ function kurtosis(values) {
|
|
|
420
444
|
let sum = 0;
|
|
421
445
|
for (const v of values) sum += (v - m) ** 4 / s2 ** 2;
|
|
422
446
|
const k = n * (n + 1) / ((n - 1) * (n - 2) * (n - 3)) * sum - 3 * (n - 1) ** 2 / ((n - 2) * (n - 3));
|
|
423
|
-
return k;
|
|
447
|
+
return Math.max(KURTOSIS_LOWER, Math.min(KURTOSIS_UPPER, k));
|
|
424
448
|
}
|
|
425
449
|
function condense(values) {
|
|
426
450
|
const m = mean(values);
|
|
@@ -582,44 +606,458 @@ function findRoots(coefficients, maxIterations = 50) {
|
|
|
582
606
|
}
|
|
583
607
|
return roots;
|
|
584
608
|
}
|
|
585
|
-
function
|
|
609
|
+
function extractFrameAnalysis(frame, sampleRate, lpcOrder = 12) {
|
|
586
610
|
const r = autocorrelate(frame, lpcOrder);
|
|
587
611
|
const coeffs = levinsonDurbin(r, lpcOrder);
|
|
588
612
|
const roots = findRoots(coeffs);
|
|
589
|
-
const
|
|
613
|
+
const candidates = [];
|
|
590
614
|
for (const [real, imag] of roots) {
|
|
591
615
|
if (imag <= 0) continue;
|
|
592
616
|
const freq = Math.atan2(imag, real) / (2 * Math.PI) * sampleRate;
|
|
593
617
|
const bandwidth = -sampleRate / (2 * Math.PI) * Math.log(Math.sqrt(real * real + imag * imag));
|
|
594
618
|
if (freq > 200 && freq < 5e3 && bandwidth < 500) {
|
|
595
|
-
|
|
619
|
+
candidates.push({ freq, bandwidth });
|
|
596
620
|
}
|
|
597
621
|
}
|
|
598
|
-
|
|
599
|
-
if (
|
|
600
|
-
|
|
622
|
+
candidates.sort((a, b) => a.freq - b.freq);
|
|
623
|
+
if (candidates.length < 3) {
|
|
624
|
+
return { lpcCoefficients: coeffs, formants: null, bandwidths: null };
|
|
625
|
+
}
|
|
626
|
+
const formants = [
|
|
627
|
+
candidates[0].freq,
|
|
628
|
+
candidates[1].freq,
|
|
629
|
+
candidates[2].freq
|
|
630
|
+
];
|
|
631
|
+
const bandwidths = [
|
|
632
|
+
candidates[0].bandwidth,
|
|
633
|
+
candidates[1].bandwidth,
|
|
634
|
+
candidates[2].bandwidth
|
|
635
|
+
];
|
|
636
|
+
return { lpcCoefficients: coeffs, formants, bandwidths };
|
|
601
637
|
}
|
|
602
|
-
function
|
|
638
|
+
function extractLpcAnalysis(samples, sampleRate, frameSize, hopSize, lpcOrder = 12) {
|
|
639
|
+
const lpcCoefficients = Array.from({ length: lpcOrder }, () => []);
|
|
640
|
+
const f1 = [];
|
|
641
|
+
const f2 = [];
|
|
642
|
+
const f3 = [];
|
|
643
|
+
const b1 = [];
|
|
644
|
+
const b2 = [];
|
|
645
|
+
const b3 = [];
|
|
603
646
|
const f1f2 = [];
|
|
604
647
|
const f2f3 = [];
|
|
605
648
|
const numFrames = Math.floor((samples.length - frameSize) / hopSize) + 1;
|
|
649
|
+
let numFramesAnalyzed = 0;
|
|
650
|
+
if (numFrames < 1) {
|
|
651
|
+
return {
|
|
652
|
+
lpcCoefficients,
|
|
653
|
+
f1,
|
|
654
|
+
f2,
|
|
655
|
+
f3,
|
|
656
|
+
b1,
|
|
657
|
+
b2,
|
|
658
|
+
b3,
|
|
659
|
+
f1f2,
|
|
660
|
+
f2f3,
|
|
661
|
+
numFramesAnalyzed: 0
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
const windowed = new Float32Array(frameSize);
|
|
606
665
|
for (let i = 0; i < numFrames; i++) {
|
|
607
666
|
const start = i * hopSize;
|
|
608
667
|
const frame = samples.subarray(start, start + frameSize);
|
|
609
|
-
const windowed = new Float32Array(frameSize);
|
|
610
668
|
for (let j = 0; j < frameSize; j++) {
|
|
611
669
|
windowed[j] = (frame[j] ?? 0) * (0.54 - 0.46 * Math.cos(2 * Math.PI * j / (frameSize - 1)));
|
|
612
670
|
}
|
|
613
|
-
const
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
if (
|
|
671
|
+
const analysis = extractFrameAnalysis(windowed, sampleRate, lpcOrder);
|
|
672
|
+
numFramesAnalyzed++;
|
|
673
|
+
for (let c = 0; c < lpcOrder; c++) {
|
|
674
|
+
const coeff = analysis.lpcCoefficients[c];
|
|
675
|
+
if (Number.isFinite(coeff)) {
|
|
676
|
+
lpcCoefficients[c].push(coeff);
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
if (analysis.formants && analysis.bandwidths) {
|
|
680
|
+
const [F1, F2, F3] = analysis.formants;
|
|
681
|
+
const [B1, B2, B3] = analysis.bandwidths;
|
|
682
|
+
f1.push(F1);
|
|
683
|
+
f2.push(F2);
|
|
684
|
+
f3.push(F3);
|
|
685
|
+
b1.push(B1);
|
|
686
|
+
b2.push(B2);
|
|
687
|
+
b3.push(B3);
|
|
688
|
+
if (F2 > 0) f1f2.push(F1 / F2);
|
|
689
|
+
if (F3 > 0) f2f3.push(F2 / F3);
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
return {
|
|
693
|
+
lpcCoefficients,
|
|
694
|
+
f1,
|
|
695
|
+
f2,
|
|
696
|
+
f3,
|
|
697
|
+
b1,
|
|
698
|
+
b2,
|
|
699
|
+
b3,
|
|
700
|
+
f1f2,
|
|
701
|
+
f2f3,
|
|
702
|
+
numFramesAnalyzed
|
|
703
|
+
};
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// src/extraction/mfcc.ts
|
|
707
|
+
var NUM_MFCC_COEFFICIENTS = 13;
|
|
708
|
+
var MFCC_DROP_LEADING = 1;
|
|
709
|
+
var NUM_USED_MFCC = NUM_MFCC_COEFFICIENTS - MFCC_DROP_LEADING;
|
|
710
|
+
var DELTA_REGRESSION_HALF_WIDTH = 2;
|
|
711
|
+
var MFCC_FEATURE_COUNT = NUM_USED_MFCC * 4 + // mean, var, skew, kurt per coefficient
|
|
712
|
+
NUM_USED_MFCC * 2;
|
|
713
|
+
function applyPreEmphasis(samples) {
|
|
714
|
+
const out = new Float32Array(samples.length);
|
|
715
|
+
if (samples.length === 0) return out;
|
|
716
|
+
out[0] = samples[0];
|
|
717
|
+
for (let i = 1; i < samples.length; i++) {
|
|
718
|
+
out[i] = samples[i] - 0.97 * samples[i - 1];
|
|
719
|
+
}
|
|
720
|
+
return out;
|
|
721
|
+
}
|
|
722
|
+
function computeDelta(series, halfWidth) {
|
|
723
|
+
const n = series.length;
|
|
724
|
+
const out = new Array(n);
|
|
725
|
+
const fullDenom = halfWidth * (halfWidth + 1) * (2 * halfWidth + 1) / 3;
|
|
726
|
+
for (let t = 0; t < n; t++) {
|
|
727
|
+
let num = 0;
|
|
728
|
+
let denom = fullDenom;
|
|
729
|
+
for (let k = 1; k <= halfWidth; k++) {
|
|
730
|
+
const tPlus = t + k;
|
|
731
|
+
const tMinus = t - k;
|
|
732
|
+
if (tPlus >= n || tMinus < 0) {
|
|
733
|
+
denom -= 2 * k * k;
|
|
734
|
+
continue;
|
|
735
|
+
}
|
|
736
|
+
num += k * (series[tPlus] - series[tMinus]);
|
|
737
|
+
}
|
|
738
|
+
if (denom <= 0) {
|
|
739
|
+
out[t] = 0;
|
|
740
|
+
continue;
|
|
741
|
+
}
|
|
742
|
+
out[t] = num / denom;
|
|
743
|
+
}
|
|
744
|
+
return out;
|
|
745
|
+
}
|
|
746
|
+
var meydaModule = null;
|
|
747
|
+
async function getMeyda() {
|
|
748
|
+
if (!meydaModule) {
|
|
749
|
+
try {
|
|
750
|
+
meydaModule = await import("meyda");
|
|
751
|
+
} catch {
|
|
752
|
+
return null;
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
return meydaModule.default ?? meydaModule;
|
|
756
|
+
}
|
|
757
|
+
async function extractMfccFeatures(samples, sampleRate, frameSize, hopSize) {
|
|
758
|
+
if (!Number.isFinite(sampleRate) || sampleRate <= 0 || samples.length === 0 || frameSize <= 0 || hopSize <= 0) {
|
|
759
|
+
return new Array(MFCC_FEATURE_COUNT).fill(0);
|
|
760
|
+
}
|
|
761
|
+
const Meyda = await getMeyda();
|
|
762
|
+
if (!Meyda) {
|
|
763
|
+
sdkWarn("[Entros SDK] Meyda unavailable; MFCC features will be zeros.");
|
|
764
|
+
return new Array(MFCC_FEATURE_COUNT).fill(0);
|
|
765
|
+
}
|
|
766
|
+
const numFrames = Math.floor((samples.length - frameSize) / hopSize) + 1;
|
|
767
|
+
if (numFrames < 5) {
|
|
768
|
+
return new Array(MFCC_FEATURE_COUNT).fill(0);
|
|
769
|
+
}
|
|
770
|
+
const mfccTracks = Array.from(
|
|
771
|
+
{ length: NUM_USED_MFCC },
|
|
772
|
+
() => []
|
|
773
|
+
);
|
|
774
|
+
const frame = new Float32Array(frameSize);
|
|
775
|
+
Meyda.bufferSize = frameSize;
|
|
776
|
+
Meyda.sampleRate = sampleRate;
|
|
777
|
+
const emphasized = applyPreEmphasis(samples);
|
|
778
|
+
for (let i = 0; i < numFrames; i++) {
|
|
779
|
+
const start = i * hopSize;
|
|
780
|
+
frame.set(emphasized.subarray(start, start + frameSize), 0);
|
|
781
|
+
const result = Meyda.extract("mfcc", frame);
|
|
782
|
+
if (!Array.isArray(result) || result.length !== NUM_MFCC_COEFFICIENTS) {
|
|
783
|
+
continue;
|
|
784
|
+
}
|
|
785
|
+
let allFinite = true;
|
|
786
|
+
for (let c = 0; c < NUM_MFCC_COEFFICIENTS; c++) {
|
|
787
|
+
if (!Number.isFinite(result[c])) {
|
|
788
|
+
allFinite = false;
|
|
789
|
+
break;
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
if (!allFinite) continue;
|
|
793
|
+
for (let c = 0; c < NUM_USED_MFCC; c++) {
|
|
794
|
+
mfccTracks[c].push(result[c + MFCC_DROP_LEADING]);
|
|
618
795
|
}
|
|
619
796
|
}
|
|
620
|
-
|
|
797
|
+
const out = [];
|
|
798
|
+
out.length = MFCC_FEATURE_COUNT;
|
|
799
|
+
let writeIdx = 0;
|
|
800
|
+
for (let c = 0; c < NUM_USED_MFCC; c++) {
|
|
801
|
+
const stats = condense(mfccTracks[c]);
|
|
802
|
+
out[writeIdx++] = stats.mean;
|
|
803
|
+
out[writeIdx++] = stats.variance;
|
|
804
|
+
out[writeIdx++] = stats.skewness;
|
|
805
|
+
out[writeIdx++] = stats.kurtosis;
|
|
806
|
+
}
|
|
807
|
+
for (let c = 0; c < NUM_USED_MFCC; c++) {
|
|
808
|
+
const delta = computeDelta(mfccTracks[c], DELTA_REGRESSION_HALF_WIDTH);
|
|
809
|
+
const muDelta = mean(delta);
|
|
810
|
+
out[writeIdx++] = muDelta;
|
|
811
|
+
out[writeIdx++] = variance(delta, muDelta);
|
|
812
|
+
}
|
|
813
|
+
return out;
|
|
621
814
|
}
|
|
622
815
|
|
|
816
|
+
// src/extraction/voice-quality.ts
|
|
817
|
+
var VOICE_QUALITY_FEATURE_COUNT = 9;
|
|
818
|
+
var LOW_BAND_HZ = 1e3;
|
|
819
|
+
var MID_BAND_HZ = 3e3;
|
|
820
|
+
var HIGH_BAND_HZ = 8e3;
|
|
821
|
+
function cppQuefrencyRange(sampleRate) {
|
|
822
|
+
return {
|
|
823
|
+
qMin: Math.max(2, Math.floor(sampleRate / 400)),
|
|
824
|
+
qMax: Math.floor(sampleRate / 60)
|
|
825
|
+
};
|
|
826
|
+
}
|
|
827
|
+
var meydaModule2 = null;
|
|
828
|
+
async function getMeyda2() {
|
|
829
|
+
if (!meydaModule2) {
|
|
830
|
+
try {
|
|
831
|
+
meydaModule2 = await import("meyda");
|
|
832
|
+
} catch {
|
|
833
|
+
return null;
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
return meydaModule2.default ?? meydaModule2;
|
|
837
|
+
}
|
|
838
|
+
function cepstralPeakProminence(powerSpectrum, sampleRate) {
|
|
839
|
+
const N = powerSpectrum.length;
|
|
840
|
+
if (N < 8) return 0;
|
|
841
|
+
const { qMin, qMax } = cppQuefrencyRange(sampleRate);
|
|
842
|
+
if (qMax >= N || qMax <= qMin) return 0;
|
|
843
|
+
const FLOOR = 1e-12;
|
|
844
|
+
const logPower = new Array(N);
|
|
845
|
+
for (let i = 0; i < N; i++) {
|
|
846
|
+
const p = Math.max(powerSpectrum[i], FLOOR);
|
|
847
|
+
const l = Math.log(p);
|
|
848
|
+
if (!Number.isFinite(l)) return 0;
|
|
849
|
+
logPower[i] = l;
|
|
850
|
+
}
|
|
851
|
+
const bandLen = qMax - qMin + 1;
|
|
852
|
+
const cepstrumBand = new Array(bandLen);
|
|
853
|
+
const piOverN = Math.PI / N;
|
|
854
|
+
for (let bIdx = 0; bIdx < bandLen; bIdx++) {
|
|
855
|
+
const k = qMin + bIdx;
|
|
856
|
+
let sum = 0;
|
|
857
|
+
for (let n = 0; n < N; n++) {
|
|
858
|
+
sum += logPower[n] * Math.cos(piOverN * (n + 0.5) * k);
|
|
859
|
+
}
|
|
860
|
+
cepstrumBand[bIdx] = sum;
|
|
861
|
+
}
|
|
862
|
+
let peakBIdx = 0;
|
|
863
|
+
let peakVal = cepstrumBand[0];
|
|
864
|
+
for (let bIdx = 1; bIdx < bandLen; bIdx++) {
|
|
865
|
+
if (cepstrumBand[bIdx] > peakVal) {
|
|
866
|
+
peakVal = cepstrumBand[bIdx];
|
|
867
|
+
peakBIdx = bIdx;
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
const peakQuefrency = qMin + peakBIdx;
|
|
871
|
+
const M = bandLen;
|
|
872
|
+
let sx = 0;
|
|
873
|
+
let sy = 0;
|
|
874
|
+
let sxx = 0;
|
|
875
|
+
let sxy = 0;
|
|
876
|
+
for (let bIdx = 0; bIdx < bandLen; bIdx++) {
|
|
877
|
+
const x = qMin + bIdx;
|
|
878
|
+
const y = cepstrumBand[bIdx];
|
|
879
|
+
sx += x;
|
|
880
|
+
sy += y;
|
|
881
|
+
sxx += x * x;
|
|
882
|
+
sxy += x * y;
|
|
883
|
+
}
|
|
884
|
+
const denom = M * sxx - sx * sx;
|
|
885
|
+
if (Math.abs(denom) < 1e-12) return 0;
|
|
886
|
+
const slope = (M * sxy - sx * sy) / denom;
|
|
887
|
+
const intercept = (sy - slope * sx) / M;
|
|
888
|
+
const baselineAtPeak = intercept + slope * peakQuefrency;
|
|
889
|
+
return peakVal - baselineAtPeak;
|
|
890
|
+
}
|
|
891
|
+
function spectralTilt(powerSpectrum, sampleRate) {
|
|
892
|
+
const N = powerSpectrum.length;
|
|
893
|
+
if (N < 8) return 0;
|
|
894
|
+
const FLOOR = 1e-12;
|
|
895
|
+
let sx = 0;
|
|
896
|
+
let sy = 0;
|
|
897
|
+
let sxx = 0;
|
|
898
|
+
let sxy = 0;
|
|
899
|
+
let count = 0;
|
|
900
|
+
const minBin = Math.max(1, Math.floor(100 * 2 * (N - 1) / sampleRate));
|
|
901
|
+
for (let k = minBin; k < N; k++) {
|
|
902
|
+
const p = powerSpectrum[k];
|
|
903
|
+
if (p < FLOOR) continue;
|
|
904
|
+
const x = Math.log(k);
|
|
905
|
+
const y = Math.log(p);
|
|
906
|
+
if (!Number.isFinite(x) || !Number.isFinite(y)) continue;
|
|
907
|
+
sx += x;
|
|
908
|
+
sy += y;
|
|
909
|
+
sxx += x * x;
|
|
910
|
+
sxy += x * y;
|
|
911
|
+
count++;
|
|
912
|
+
}
|
|
913
|
+
if (count < 4) return 0;
|
|
914
|
+
const denom = count * sxx - sx * sx;
|
|
915
|
+
if (Math.abs(denom) < 1e-12) return 0;
|
|
916
|
+
return (count * sxy - sx * sy) / denom;
|
|
917
|
+
}
|
|
918
|
+
function h1MinusH2(powerSpectrum, sampleRate, f0) {
|
|
919
|
+
if (!Number.isFinite(f0) || f0 <= 0) return 0;
|
|
920
|
+
const N = powerSpectrum.length;
|
|
921
|
+
if (N < 8) return 0;
|
|
922
|
+
const binPerHz = 2 * (N - 1) / sampleRate;
|
|
923
|
+
const k1 = Math.round(f0 * binPerHz);
|
|
924
|
+
const k2 = Math.round(2 * f0 * binPerHz);
|
|
925
|
+
const window2 = 2;
|
|
926
|
+
function peakNear(k) {
|
|
927
|
+
let best = -Infinity;
|
|
928
|
+
for (let i = k - window2; i <= k + window2; i++) {
|
|
929
|
+
if (i <= 0 || i >= N) continue;
|
|
930
|
+
const p = powerSpectrum[i];
|
|
931
|
+
if (p > best) best = p;
|
|
932
|
+
}
|
|
933
|
+
return best;
|
|
934
|
+
}
|
|
935
|
+
const h1 = peakNear(k1);
|
|
936
|
+
const h2 = peakNear(k2);
|
|
937
|
+
if (!Number.isFinite(h1) || !Number.isFinite(h2) || h1 <= 0 || h2 <= 0) return 0;
|
|
938
|
+
return 10 * Math.log10(h1 / h2);
|
|
939
|
+
}
|
|
940
|
+
function subbandRatios(powerSpectrum, sampleRate) {
|
|
941
|
+
const N = powerSpectrum.length;
|
|
942
|
+
if (N < 4) return [0, 0, 0];
|
|
943
|
+
const binPerHz = 2 * (N - 1) / sampleRate;
|
|
944
|
+
const lowBin = Math.min(N - 1, Math.round(LOW_BAND_HZ * binPerHz));
|
|
945
|
+
const midBin = Math.min(N - 1, Math.round(MID_BAND_HZ * binPerHz));
|
|
946
|
+
const highBin = Math.min(N - 1, Math.round(HIGH_BAND_HZ * binPerHz));
|
|
947
|
+
let total = 0;
|
|
948
|
+
let low = 0;
|
|
949
|
+
let mid = 0;
|
|
950
|
+
let high = 0;
|
|
951
|
+
for (let k = 1; k < N; k++) {
|
|
952
|
+
const p = powerSpectrum[k];
|
|
953
|
+
if (!Number.isFinite(p) || p < 0) continue;
|
|
954
|
+
total += p;
|
|
955
|
+
if (k <= lowBin) low += p;
|
|
956
|
+
else if (k <= midBin) mid += p;
|
|
957
|
+
else if (k <= highBin) high += p;
|
|
958
|
+
}
|
|
959
|
+
if (total < 1e-12) return [0, 0, 0];
|
|
960
|
+
return [low / total, mid / total, high / total];
|
|
961
|
+
}
|
|
962
|
+
async function extractVoiceQualityFeatures(samples, sampleRate, frameSize, hopSize, f0PerFrame) {
|
|
963
|
+
if (!Number.isFinite(sampleRate) || sampleRate <= 0 || samples.length === 0 || frameSize <= 0 || hopSize <= 0) {
|
|
964
|
+
return new Array(VOICE_QUALITY_FEATURE_COUNT).fill(0);
|
|
965
|
+
}
|
|
966
|
+
const Meyda = await getMeyda2();
|
|
967
|
+
if (!Meyda) {
|
|
968
|
+
sdkWarn("[Entros SDK] Meyda unavailable; voice quality features will be zeros.");
|
|
969
|
+
return new Array(VOICE_QUALITY_FEATURE_COUNT).fill(0);
|
|
970
|
+
}
|
|
971
|
+
const numFrames = Math.floor((samples.length - frameSize) / hopSize) + 1;
|
|
972
|
+
if (numFrames < 5) {
|
|
973
|
+
return new Array(VOICE_QUALITY_FEATURE_COUNT).fill(0);
|
|
974
|
+
}
|
|
975
|
+
const cppValues = [];
|
|
976
|
+
const tiltValues = [];
|
|
977
|
+
const h1h2Values = [];
|
|
978
|
+
const lowRatios = [];
|
|
979
|
+
const midRatios = [];
|
|
980
|
+
const highRatios = [];
|
|
981
|
+
const frame = new Float32Array(frameSize);
|
|
982
|
+
Meyda.bufferSize = frameSize;
|
|
983
|
+
Meyda.sampleRate = sampleRate;
|
|
984
|
+
for (let i = 0; i < numFrames; i++) {
|
|
985
|
+
const start = i * hopSize;
|
|
986
|
+
frame.set(samples.subarray(start, start + frameSize), 0);
|
|
987
|
+
const features = Meyda.extract("powerSpectrum", frame);
|
|
988
|
+
const power = features;
|
|
989
|
+
if (!power || power.length === 0) continue;
|
|
990
|
+
const cpp = cepstralPeakProminence(power, sampleRate);
|
|
991
|
+
if (Number.isFinite(cpp)) cppValues.push(cpp);
|
|
992
|
+
const tilt = spectralTilt(power, sampleRate);
|
|
993
|
+
if (Number.isFinite(tilt)) tiltValues.push(tilt);
|
|
994
|
+
const f0 = f0PerFrame[i] ?? 0;
|
|
995
|
+
if (f0 > 0) {
|
|
996
|
+
const h1h2 = h1MinusH2(power, sampleRate, f0);
|
|
997
|
+
if (Number.isFinite(h1h2)) h1h2Values.push(h1h2);
|
|
998
|
+
}
|
|
999
|
+
const [low, mid, high] = subbandRatios(power, sampleRate);
|
|
1000
|
+
lowRatios.push(low);
|
|
1001
|
+
midRatios.push(mid);
|
|
1002
|
+
highRatios.push(high);
|
|
1003
|
+
}
|
|
1004
|
+
const cppMean = mean(cppValues);
|
|
1005
|
+
const cppVar = variance(cppValues, cppMean);
|
|
1006
|
+
const tiltMean = mean(tiltValues);
|
|
1007
|
+
const tiltVar = variance(tiltValues, tiltMean);
|
|
1008
|
+
const h1h2Mean = mean(h1h2Values);
|
|
1009
|
+
const h1h2Var = variance(h1h2Values, h1h2Mean);
|
|
1010
|
+
const lowMean = mean(lowRatios);
|
|
1011
|
+
const midMean = mean(midRatios);
|
|
1012
|
+
const highMean = mean(highRatios);
|
|
1013
|
+
return [
|
|
1014
|
+
cppMean,
|
|
1015
|
+
cppVar,
|
|
1016
|
+
tiltMean,
|
|
1017
|
+
tiltVar,
|
|
1018
|
+
h1h2Mean,
|
|
1019
|
+
h1h2Var,
|
|
1020
|
+
lowMean,
|
|
1021
|
+
midMean,
|
|
1022
|
+
highMean
|
|
1023
|
+
];
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
// src/extraction/dct.ts
|
|
1027
|
+
function dctII(input, numCoefficients) {
|
|
1028
|
+
const N = input.length;
|
|
1029
|
+
const K = Math.max(0, numCoefficients);
|
|
1030
|
+
const output = new Array(K).fill(0);
|
|
1031
|
+
if (N === 0 || K === 0) return output;
|
|
1032
|
+
const upper = Math.min(K, N);
|
|
1033
|
+
const piOverN = Math.PI / N;
|
|
1034
|
+
for (let k = 0; k < upper; k++) {
|
|
1035
|
+
let sum = 0;
|
|
1036
|
+
for (let n = 0; n < N; n++) {
|
|
1037
|
+
sum += input[n] * Math.cos(piOverN * (n + 0.5) * k);
|
|
1038
|
+
}
|
|
1039
|
+
output[k] = sum;
|
|
1040
|
+
}
|
|
1041
|
+
return output;
|
|
1042
|
+
}
|
|
1043
|
+
function pitchContourShape(contour, numCoefficients = 5) {
|
|
1044
|
+
if (numCoefficients <= 0) return [];
|
|
1045
|
+
const zero = () => new Array(numCoefficients).fill(0);
|
|
1046
|
+
const voiced = [];
|
|
1047
|
+
for (const v of contour) {
|
|
1048
|
+
if (Number.isFinite(v) && v > 0) voiced.push(v);
|
|
1049
|
+
}
|
|
1050
|
+
if (voiced.length < numCoefficients * 2) return zero();
|
|
1051
|
+
let sum = 0;
|
|
1052
|
+
for (const v of voiced) sum += v;
|
|
1053
|
+
const mu = sum / voiced.length;
|
|
1054
|
+
const centered = voiced.map((v) => v - mu);
|
|
1055
|
+
const N = centered.length;
|
|
1056
|
+
const norm = 1 / Math.sqrt(N);
|
|
1057
|
+
return dctII(centered, numCoefficients).map((c) => c * norm);
|
|
1058
|
+
}
|
|
1059
|
+
var PITCH_CONTOUR_SHAPE_FEATURE_COUNT = 5;
|
|
1060
|
+
|
|
623
1061
|
// src/yield.ts
|
|
624
1062
|
function yieldToMainThread() {
|
|
625
1063
|
return new Promise((resolve) => {
|
|
@@ -651,10 +1089,13 @@ function getFrameSize(sampleRate) {
|
|
|
651
1089
|
function getHopSize(sampleRate) {
|
|
652
1090
|
return Math.max(1, Math.round(sampleRate * 0.01));
|
|
653
1091
|
}
|
|
654
|
-
var
|
|
1092
|
+
var LEGACY_SPEAKER_FEATURE_COUNT = 44;
|
|
1093
|
+
var LPC_COEFFICIENT_STATS = 12 * 2;
|
|
1094
|
+
var FORMANT_TRAJECTORY_FEATURE_COUNT = 16;
|
|
1095
|
+
var SPEAKER_FEATURE_COUNT = LEGACY_SPEAKER_FEATURE_COUNT + MFCC_FEATURE_COUNT + LPC_COEFFICIENT_STATS + FORMANT_TRAJECTORY_FEATURE_COUNT + VOICE_QUALITY_FEATURE_COUNT + PITCH_CONTOUR_SHAPE_FEATURE_COUNT;
|
|
655
1096
|
var pitchDetector = null;
|
|
656
1097
|
var pitchDetectorRate = 0;
|
|
657
|
-
var
|
|
1098
|
+
var meydaModule3 = null;
|
|
658
1099
|
async function getPitchDetector(sampleRate) {
|
|
659
1100
|
if (!pitchDetector || pitchDetectorRate !== sampleRate) {
|
|
660
1101
|
const PitchFinder = await import("pitchfinder");
|
|
@@ -663,15 +1104,15 @@ async function getPitchDetector(sampleRate) {
|
|
|
663
1104
|
}
|
|
664
1105
|
return pitchDetector;
|
|
665
1106
|
}
|
|
666
|
-
async function
|
|
667
|
-
if (!
|
|
1107
|
+
async function getMeyda3() {
|
|
1108
|
+
if (!meydaModule3) {
|
|
668
1109
|
try {
|
|
669
|
-
|
|
1110
|
+
meydaModule3 = await import("meyda");
|
|
670
1111
|
} catch {
|
|
671
1112
|
return null;
|
|
672
1113
|
}
|
|
673
1114
|
}
|
|
674
|
-
return
|
|
1115
|
+
return meydaModule3.default ?? meydaModule3;
|
|
675
1116
|
}
|
|
676
1117
|
var F0_YIELD_EVERY_N_FRAMES = 16;
|
|
677
1118
|
async function detectF0Contour(samples, sampleRate) {
|
|
@@ -801,8 +1242,10 @@ function computeHNR(samples, sampleRate, f0Contour) {
|
|
|
801
1242
|
async function computeLTAS(samples, sampleRate) {
|
|
802
1243
|
const frameSize = getFrameSize(sampleRate);
|
|
803
1244
|
const hopSize = getHopSize(sampleRate);
|
|
804
|
-
const Meyda = await
|
|
1245
|
+
const Meyda = await getMeyda3();
|
|
805
1246
|
if (!Meyda) return new Array(8).fill(0);
|
|
1247
|
+
Meyda.bufferSize = frameSize;
|
|
1248
|
+
Meyda.sampleRate = sampleRate;
|
|
806
1249
|
const centroids = [];
|
|
807
1250
|
const rolloffs = [];
|
|
808
1251
|
const flatnesses = [];
|
|
@@ -814,8 +1257,7 @@ async function computeLTAS(samples, sampleRate) {
|
|
|
814
1257
|
paddedFrame.set(samples.subarray(start, start + frameSize), 0);
|
|
815
1258
|
const features = Meyda.extract(
|
|
816
1259
|
["spectralCentroid", "spectralRolloff", "spectralFlatness", "spectralSpread"],
|
|
817
|
-
paddedFrame
|
|
818
|
-
{ sampleRate, bufferSize: frameSize }
|
|
1260
|
+
paddedFrame
|
|
819
1261
|
);
|
|
820
1262
|
if (features) {
|
|
821
1263
|
if (Number.isFinite(features.spectralCentroid)) centroids.push(features.spectralCentroid);
|
|
@@ -881,9 +1323,9 @@ async function extractSpeakerFeaturesDetailed(audio) {
|
|
|
881
1323
|
for (let i = 0; i < numFrames; i++) {
|
|
882
1324
|
const start = i * hopSize;
|
|
883
1325
|
let sum = 0;
|
|
884
|
-
const end = Math.min(start + frameSize,
|
|
1326
|
+
const end = Math.min(start + frameSize, normalizedSamples.length);
|
|
885
1327
|
for (let j = start; j < end; j++) {
|
|
886
|
-
sum += (
|
|
1328
|
+
sum += (normalizedSamples[j] ?? 0) * (normalizedSamples[j] ?? 0);
|
|
887
1329
|
}
|
|
888
1330
|
amplitudes.push(Math.sqrt(sum / (end - start)));
|
|
889
1331
|
}
|
|
@@ -902,9 +1344,9 @@ async function extractSpeakerFeaturesDetailed(audio) {
|
|
|
902
1344
|
const hnrEntropy = entropy(hnrValues);
|
|
903
1345
|
const hnrFeatures = [hnrStats.mean, hnrStats.variance, hnrStats.skewness, hnrStats.kurtosis, hnrEntropy];
|
|
904
1346
|
await yieldToMainThread();
|
|
905
|
-
const
|
|
906
|
-
const f1f2Stats = condense(f1f2);
|
|
907
|
-
const f2f3Stats = condense(f2f3);
|
|
1347
|
+
const lpc = extractLpcAnalysis(normalizedSamples, sampleRate, frameSize, hopSize);
|
|
1348
|
+
const f1f2Stats = condense(lpc.f1f2);
|
|
1349
|
+
const f2f3Stats = condense(lpc.f2f3);
|
|
908
1350
|
const formantFeatures = [
|
|
909
1351
|
f1f2Stats.mean,
|
|
910
1352
|
f1f2Stats.variance,
|
|
@@ -921,25 +1363,86 @@ async function extractSpeakerFeaturesDetailed(audio) {
|
|
|
921
1363
|
const ampStats = condense(amplitudes);
|
|
922
1364
|
const ampEntropy = entropy(amplitudes);
|
|
923
1365
|
const ampFeatures = [ampStats.mean, ampStats.variance, ampStats.skewness, ampStats.kurtosis, ampEntropy];
|
|
1366
|
+
await yieldToMainThread();
|
|
1367
|
+
const mfccFeatures = await extractMfccFeatures(
|
|
1368
|
+
normalizedSamples,
|
|
1369
|
+
sampleRate,
|
|
1370
|
+
frameSize,
|
|
1371
|
+
hopSize
|
|
1372
|
+
);
|
|
1373
|
+
const lpcStats = [];
|
|
1374
|
+
for (let c = 0; c < 12; c++) {
|
|
1375
|
+
const track = lpc.lpcCoefficients[c] ?? [];
|
|
1376
|
+
const mu = mean(track);
|
|
1377
|
+
lpcStats.push(mu, variance(track, mu));
|
|
1378
|
+
}
|
|
1379
|
+
const f1Stats = { mean: mean(lpc.f1), var: variance(lpc.f1) };
|
|
1380
|
+
const f2Stats = { mean: mean(lpc.f2), var: variance(lpc.f2) };
|
|
1381
|
+
const f3Stats = { mean: mean(lpc.f3), var: variance(lpc.f3) };
|
|
1382
|
+
const f1Delta = derivative(lpc.f1);
|
|
1383
|
+
const f2Delta = derivative(lpc.f2);
|
|
1384
|
+
const f3Delta = derivative(lpc.f3);
|
|
1385
|
+
const f1DeltaMu = mean(f1Delta);
|
|
1386
|
+
const f2DeltaMu = mean(f2Delta);
|
|
1387
|
+
const f3DeltaMu = mean(f3Delta);
|
|
1388
|
+
const b1Mu = mean(lpc.b1);
|
|
1389
|
+
const b2Mu = mean(lpc.b2);
|
|
1390
|
+
const formantTrajectoryFeatures = [
|
|
1391
|
+
f1Stats.mean,
|
|
1392
|
+
f1Stats.var,
|
|
1393
|
+
f2Stats.mean,
|
|
1394
|
+
f2Stats.var,
|
|
1395
|
+
f3Stats.mean,
|
|
1396
|
+
f3Stats.var,
|
|
1397
|
+
f1DeltaMu,
|
|
1398
|
+
variance(f1Delta, f1DeltaMu),
|
|
1399
|
+
f2DeltaMu,
|
|
1400
|
+
variance(f2Delta, f2DeltaMu),
|
|
1401
|
+
f3DeltaMu,
|
|
1402
|
+
variance(f3Delta, f3DeltaMu),
|
|
1403
|
+
b1Mu,
|
|
1404
|
+
variance(lpc.b1, b1Mu),
|
|
1405
|
+
b2Mu,
|
|
1406
|
+
variance(lpc.b2, b2Mu)
|
|
1407
|
+
];
|
|
1408
|
+
await yieldToMainThread();
|
|
1409
|
+
const voiceQualityFeatures = await extractVoiceQualityFeatures(
|
|
1410
|
+
normalizedSamples,
|
|
1411
|
+
sampleRate,
|
|
1412
|
+
frameSize,
|
|
1413
|
+
hopSize,
|
|
1414
|
+
f0
|
|
1415
|
+
);
|
|
1416
|
+
const pitchShapeFeatures = pitchContourShape(f0, PITCH_CONTOUR_SHAPE_FEATURE_COUNT);
|
|
924
1417
|
const features = [
|
|
925
1418
|
...f0Features,
|
|
926
|
-
// 5
|
|
1419
|
+
// 5 [0..5] F0_STATS
|
|
927
1420
|
...f0DeltaFeatures,
|
|
928
|
-
// 4
|
|
1421
|
+
// 4 [5..9] F0_DELTA
|
|
929
1422
|
...jitterFeatures,
|
|
930
|
-
// 4
|
|
1423
|
+
// 4 [9..13] JITTER
|
|
931
1424
|
...shimmerFeatures,
|
|
932
|
-
// 4
|
|
1425
|
+
// 4 [13..17] SHIMMER
|
|
933
1426
|
...hnrFeatures,
|
|
934
|
-
// 5
|
|
1427
|
+
// 5 [17..22] HNR
|
|
935
1428
|
...formantFeatures,
|
|
936
|
-
// 8
|
|
1429
|
+
// 8 [22..30] FORMANT_RATIOS
|
|
937
1430
|
...ltasFeatures,
|
|
938
|
-
// 8
|
|
1431
|
+
// 8 [30..38] LTAS
|
|
939
1432
|
...voicingFeatures,
|
|
940
|
-
// 1
|
|
941
|
-
...ampFeatures
|
|
942
|
-
// 5
|
|
1433
|
+
// 1 [38] VOICING_RATIO
|
|
1434
|
+
...ampFeatures,
|
|
1435
|
+
// 5 [39..44] AMPLITUDE
|
|
1436
|
+
...mfccFeatures,
|
|
1437
|
+
// 72 [44..116] MFCC + delta-MFCC (MFCC[0] dropped)
|
|
1438
|
+
...lpcStats,
|
|
1439
|
+
// 24 [116..140] LPC coefficient stats
|
|
1440
|
+
...formantTrajectoryFeatures,
|
|
1441
|
+
// 16 [140..156] Formant absolutes + dynamics + bandwidths
|
|
1442
|
+
...voiceQualityFeatures,
|
|
1443
|
+
// 9 [156..165] Voice quality
|
|
1444
|
+
...pitchShapeFeatures
|
|
1445
|
+
// 5 [165..170] Pitch contour shape DCT
|
|
943
1446
|
];
|
|
944
1447
|
return { features, f0Contour: f0 };
|
|
945
1448
|
}
|
|
@@ -948,7 +1451,102 @@ async function extractSpeakerFeatures(audio) {
|
|
|
948
1451
|
return features;
|
|
949
1452
|
}
|
|
950
1453
|
|
|
1454
|
+
// src/extraction/fft.ts
|
|
1455
|
+
function nextPow2(n) {
|
|
1456
|
+
if (n <= 2) return 2;
|
|
1457
|
+
let p = 2;
|
|
1458
|
+
while (p < n) p <<= 1;
|
|
1459
|
+
return p;
|
|
1460
|
+
}
|
|
1461
|
+
function realFFT(input, size) {
|
|
1462
|
+
if (size <= 0 || (size & size - 1) !== 0) {
|
|
1463
|
+
throw new Error(`FFT size must be a positive power of two, got ${size}`);
|
|
1464
|
+
}
|
|
1465
|
+
const real = new Array(size);
|
|
1466
|
+
const imag = new Array(size).fill(0);
|
|
1467
|
+
for (let i = 0; i < size; i++) {
|
|
1468
|
+
real[i] = i < input.length ? input[i] ?? 0 : 0;
|
|
1469
|
+
}
|
|
1470
|
+
for (let i = 1, j = 0; i < size; i++) {
|
|
1471
|
+
let bit = size >> 1;
|
|
1472
|
+
for (; j & bit; bit >>= 1) j ^= bit;
|
|
1473
|
+
j ^= bit;
|
|
1474
|
+
if (i < j) {
|
|
1475
|
+
const tr = real[i];
|
|
1476
|
+
real[i] = real[j];
|
|
1477
|
+
real[j] = tr;
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
for (let halfSize = 1; halfSize < size; halfSize <<= 1) {
|
|
1481
|
+
const fullSize = halfSize << 1;
|
|
1482
|
+
const phaseStep = -Math.PI / halfSize;
|
|
1483
|
+
for (let chunkStart = 0; chunkStart < size; chunkStart += fullSize) {
|
|
1484
|
+
for (let k = 0; k < halfSize; k++) {
|
|
1485
|
+
const phase = phaseStep * k;
|
|
1486
|
+
const wr = Math.cos(phase);
|
|
1487
|
+
const wi = Math.sin(phase);
|
|
1488
|
+
const ar = real[chunkStart + k];
|
|
1489
|
+
const ai = imag[chunkStart + k];
|
|
1490
|
+
const br = real[chunkStart + k + halfSize];
|
|
1491
|
+
const bi = imag[chunkStart + k + halfSize];
|
|
1492
|
+
const tr = wr * br - wi * bi;
|
|
1493
|
+
const ti = wr * bi + wi * br;
|
|
1494
|
+
real[chunkStart + k] = ar + tr;
|
|
1495
|
+
imag[chunkStart + k] = ai + ti;
|
|
1496
|
+
real[chunkStart + k + halfSize] = ar - tr;
|
|
1497
|
+
imag[chunkStart + k + halfSize] = ai - ti;
|
|
1498
|
+
}
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
return { real, imag };
|
|
1502
|
+
}
|
|
1503
|
+
function bandEnergy(real, imag, sampleRate, fLow, fHigh) {
|
|
1504
|
+
const N = real.length;
|
|
1505
|
+
if (N === 0 || !Number.isFinite(sampleRate) || sampleRate <= 0 || fLow >= fHigh || fLow < 0) {
|
|
1506
|
+
return 0;
|
|
1507
|
+
}
|
|
1508
|
+
const binHz = sampleRate / N;
|
|
1509
|
+
const kLow = Math.max(0, Math.ceil(fLow / binHz));
|
|
1510
|
+
const kHigh = Math.min(Math.floor(N / 2), Math.floor((fHigh - 1e-9) / binHz));
|
|
1511
|
+
let energy = 0;
|
|
1512
|
+
for (let k = kLow; k <= kHigh; k++) {
|
|
1513
|
+
const re = real[k] ?? 0;
|
|
1514
|
+
const im = imag[k] ?? 0;
|
|
1515
|
+
energy += re * re + im * im;
|
|
1516
|
+
}
|
|
1517
|
+
return energy / (N * N);
|
|
1518
|
+
}
|
|
1519
|
+
function peakInBand(real, imag, sampleRate, fLow, fHigh) {
|
|
1520
|
+
const N = real.length;
|
|
1521
|
+
if (N === 0 || !Number.isFinite(sampleRate) || sampleRate <= 0 || fLow >= fHigh || fLow < 0) {
|
|
1522
|
+
return { freq: 0, amplitude: 0 };
|
|
1523
|
+
}
|
|
1524
|
+
const binHz = sampleRate / N;
|
|
1525
|
+
const kLow = Math.max(0, Math.ceil(fLow / binHz));
|
|
1526
|
+
const kHigh = Math.min(Math.floor(N / 2), Math.floor((fHigh - 1e-9) / binHz));
|
|
1527
|
+
let bestK = -1;
|
|
1528
|
+
let bestAmp = -Infinity;
|
|
1529
|
+
for (let k = kLow; k <= kHigh; k++) {
|
|
1530
|
+
const re = real[k] ?? 0;
|
|
1531
|
+
const im = imag[k] ?? 0;
|
|
1532
|
+
const amp = re * re + im * im;
|
|
1533
|
+
if (amp > bestAmp) {
|
|
1534
|
+
bestAmp = amp;
|
|
1535
|
+
bestK = k;
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
if (bestK < 0) return { freq: 0, amplitude: 0 };
|
|
1539
|
+
return { freq: bestK * binHz, amplitude: bestAmp / (N * N) };
|
|
1540
|
+
}
|
|
1541
|
+
|
|
951
1542
|
// src/extraction/kinematic.ts
|
|
1543
|
+
var MOTION_LEGACY_COUNT = 54;
|
|
1544
|
+
var MOTION_V2_ADDITIONS = 27;
|
|
1545
|
+
var MOTION_FEATURE_COUNT = MOTION_LEGACY_COUNT + MOTION_V2_ADDITIONS;
|
|
1546
|
+
var TOUCH_LEGACY_COUNT = 36;
|
|
1547
|
+
var TOUCH_V2_ADDITIONS = 21;
|
|
1548
|
+
var TOUCH_FEATURE_COUNT = TOUCH_LEGACY_COUNT + TOUCH_V2_ADDITIONS;
|
|
1549
|
+
var MOUSE_DYNAMICS_FEATURE_COUNT = MOTION_FEATURE_COUNT;
|
|
952
1550
|
function extractAccelerationMagnitude(samples, targetFrameCount) {
|
|
953
1551
|
if (samples.length < 2 || targetFrameCount < 2) return [];
|
|
954
1552
|
const magnitudes = samples.map((s) => Math.sqrt(s.ax * s.ax + s.ay * s.ay + s.az * s.az));
|
|
@@ -966,7 +1564,7 @@ function extractAccelerationMagnitude(samples, targetFrameCount) {
|
|
|
966
1564
|
return out;
|
|
967
1565
|
}
|
|
968
1566
|
function extractMotionFeatures(samples) {
|
|
969
|
-
if (samples.length < 5) return new Array(
|
|
1567
|
+
if (samples.length < 5) return new Array(MOTION_FEATURE_COUNT).fill(0);
|
|
970
1568
|
const axes = {
|
|
971
1569
|
ax: samples.map((s) => s.ax),
|
|
972
1570
|
ay: samples.map((s) => s.ay),
|
|
@@ -1001,10 +1599,68 @@ function extractMotionFeatures(samples) {
|
|
|
1001
1599
|
}
|
|
1002
1600
|
features.push(windowVariances.length >= 2 ? variance(windowVariances) : 0);
|
|
1003
1601
|
}
|
|
1602
|
+
features.push(...computeMotionV2(axes, samples));
|
|
1004
1603
|
return features;
|
|
1005
1604
|
}
|
|
1605
|
+
function computeMotionV2(axes, samples) {
|
|
1606
|
+
const out = [];
|
|
1607
|
+
const covPairs = [
|
|
1608
|
+
[axes.ax, axes.gy],
|
|
1609
|
+
[axes.ay, axes.gx],
|
|
1610
|
+
[axes.az, axes.gz],
|
|
1611
|
+
[axes.ax, axes.az],
|
|
1612
|
+
[axes.ay, axes.az],
|
|
1613
|
+
[axes.gx, axes.gy]
|
|
1614
|
+
];
|
|
1615
|
+
for (const [a, b] of covPairs) out.push(covariance(a, b));
|
|
1616
|
+
const sampleRate = sampleRateFromTimestamps(samples.map((s) => s.timestamp));
|
|
1617
|
+
const fftSize = nextPow2(Math.max(64, axes.ax.length));
|
|
1618
|
+
const bands = [
|
|
1619
|
+
[0, 2],
|
|
1620
|
+
[2, 6],
|
|
1621
|
+
[6, 12],
|
|
1622
|
+
[12, 30]
|
|
1623
|
+
];
|
|
1624
|
+
const accelSpectra = [axes.ax, axes.ay, axes.az].map(
|
|
1625
|
+
(axis) => realFFT(meanCenter(axis), fftSize)
|
|
1626
|
+
);
|
|
1627
|
+
for (const spectrum of accelSpectra) {
|
|
1628
|
+
for (const [lo, hi] of bands) {
|
|
1629
|
+
out.push(bandEnergy(spectrum.real, spectrum.imag, sampleRate, lo, hi));
|
|
1630
|
+
}
|
|
1631
|
+
}
|
|
1632
|
+
const magnitude = samples.map(
|
|
1633
|
+
(s) => Math.sqrt(s.ax * s.ax + s.ay * s.ay + s.az * s.az)
|
|
1634
|
+
);
|
|
1635
|
+
const magSpectrum = realFFT(meanCenter(magnitude), fftSize);
|
|
1636
|
+
const tremor = peakInBand(
|
|
1637
|
+
magSpectrum.real,
|
|
1638
|
+
magSpectrum.imag,
|
|
1639
|
+
sampleRate,
|
|
1640
|
+
4,
|
|
1641
|
+
12
|
|
1642
|
+
);
|
|
1643
|
+
out.push(tremor.freq, tremor.amplitude);
|
|
1644
|
+
const duration = captureDurationSec(samples);
|
|
1645
|
+
const reversalRates = [axes.ax, axes.ay, axes.az].map(
|
|
1646
|
+
(axis) => duration > 0 ? signChangeCount(derivative2(axis)) / duration : 0
|
|
1647
|
+
);
|
|
1648
|
+
out.push(mean(reversalRates), variance(reversalRates));
|
|
1649
|
+
let gyroSum = 0;
|
|
1650
|
+
for (let i = 0; i < samples.length; i++) {
|
|
1651
|
+
const gx = samples[i].gx;
|
|
1652
|
+
const gy = samples[i].gy;
|
|
1653
|
+
const gz = samples[i].gz;
|
|
1654
|
+
gyroSum += Math.sqrt(gx * gx + gy * gy + gz * gz);
|
|
1655
|
+
}
|
|
1656
|
+
out.push(samples.length > 0 ? gyroSum / samples.length : 0);
|
|
1657
|
+
for (const lag of [1, 5, 10, 25]) {
|
|
1658
|
+
out.push(autocorrelation(magnitude, lag));
|
|
1659
|
+
}
|
|
1660
|
+
return out;
|
|
1661
|
+
}
|
|
1006
1662
|
function extractTouchFeatures(samples) {
|
|
1007
|
-
if (samples.length < 5) return new Array(
|
|
1663
|
+
if (samples.length < 5) return new Array(TOUCH_FEATURE_COUNT).fill(0);
|
|
1008
1664
|
const x = samples.map((s) => s.x);
|
|
1009
1665
|
const y = samples.map((s) => s.y);
|
|
1010
1666
|
const pressure = samples.map((s) => s.pressure);
|
|
@@ -1032,8 +1688,78 @@ function extractTouchFeatures(samples) {
|
|
|
1032
1688
|
}
|
|
1033
1689
|
features.push(windowVariances.length >= 2 ? variance(windowVariances) : 0);
|
|
1034
1690
|
}
|
|
1691
|
+
features.push(...computeTouchV2(samples, vx, vy));
|
|
1035
1692
|
return features;
|
|
1036
1693
|
}
|
|
1694
|
+
function computeTouchV2(samples, vx, vy) {
|
|
1695
|
+
const out = [];
|
|
1696
|
+
const pressure = samples.map((s) => s.pressure);
|
|
1697
|
+
const dPressure = derivative2(pressure);
|
|
1698
|
+
out.push(...Object.values(condense(dPressure)));
|
|
1699
|
+
const aspect = samples.map((s) => {
|
|
1700
|
+
const h = s.height;
|
|
1701
|
+
return h > 0 ? s.width / h : 0;
|
|
1702
|
+
});
|
|
1703
|
+
out.push(mean(aspect), variance(aspect));
|
|
1704
|
+
const area = samples.map((s) => s.width * s.height);
|
|
1705
|
+
const dArea = derivative2(area);
|
|
1706
|
+
out.push(mean(dArea), variance(dArea));
|
|
1707
|
+
const CURVATURE_REST_EPS = 1e-3;
|
|
1708
|
+
const curvatures = [];
|
|
1709
|
+
for (let i = 1; i < vx.length; i++) {
|
|
1710
|
+
const v1x = vx[i - 1] ?? 0;
|
|
1711
|
+
const v1y = vy[i - 1] ?? 0;
|
|
1712
|
+
const v2x = vx[i] ?? 0;
|
|
1713
|
+
const v2y = vy[i] ?? 0;
|
|
1714
|
+
if (Math.hypot(v1x, v1y) < CURVATURE_REST_EPS || Math.hypot(v2x, v2y) < CURVATURE_REST_EPS) {
|
|
1715
|
+
continue;
|
|
1716
|
+
}
|
|
1717
|
+
const a1 = Math.atan2(v1y, v1x);
|
|
1718
|
+
const a2 = Math.atan2(v2y, v2x);
|
|
1719
|
+
let d = a2 - a1;
|
|
1720
|
+
while (d > Math.PI) d -= 2 * Math.PI;
|
|
1721
|
+
while (d < -Math.PI) d += 2 * Math.PI;
|
|
1722
|
+
curvatures.push(Math.abs(d));
|
|
1723
|
+
}
|
|
1724
|
+
const curvStats = condense(curvatures);
|
|
1725
|
+
out.push(curvStats.mean, curvStats.variance, curvStats.skewness);
|
|
1726
|
+
const speed = vx.map((dx2, i) => {
|
|
1727
|
+
const dy2 = vy[i] ?? 0;
|
|
1728
|
+
return Math.sqrt(dx2 * dx2 + dy2 * dy2);
|
|
1729
|
+
});
|
|
1730
|
+
for (const lag of [1, 3, 5]) out.push(autocorrelation(speed, lag));
|
|
1731
|
+
const gaps = [];
|
|
1732
|
+
for (let i = 1; i < samples.length; i++) {
|
|
1733
|
+
gaps.push((samples[i]?.timestamp ?? 0) - (samples[i - 1]?.timestamp ?? 0));
|
|
1734
|
+
}
|
|
1735
|
+
out.push(...Object.values(condense(gaps)));
|
|
1736
|
+
const totalPath = speed.reduce((a, b) => a + b, 0);
|
|
1737
|
+
const dx = (samples[samples.length - 1]?.x ?? 0) - (samples[0]?.x ?? 0);
|
|
1738
|
+
const dy = (samples[samples.length - 1]?.y ?? 0) - (samples[0]?.y ?? 0);
|
|
1739
|
+
const straight = Math.sqrt(dx * dx + dy * dy);
|
|
1740
|
+
out.push(totalPath > 0 ? straight / totalPath : 0);
|
|
1741
|
+
const strokeLengths = perStrokePathLengths(speed);
|
|
1742
|
+
out.push(mean(strokeLengths), variance(strokeLengths));
|
|
1743
|
+
return out;
|
|
1744
|
+
}
|
|
1745
|
+
function perStrokePathLengths(speed) {
|
|
1746
|
+
const PAUSE_THRESHOLD = 0.5;
|
|
1747
|
+
const lengths = [];
|
|
1748
|
+
let acc = 0;
|
|
1749
|
+
let inStroke = false;
|
|
1750
|
+
for (const s of speed) {
|
|
1751
|
+
if (s >= PAUSE_THRESHOLD) {
|
|
1752
|
+
acc += s;
|
|
1753
|
+
inStroke = true;
|
|
1754
|
+
} else if (inStroke) {
|
|
1755
|
+
lengths.push(acc);
|
|
1756
|
+
acc = 0;
|
|
1757
|
+
inStroke = false;
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
if (inStroke && acc > 0) lengths.push(acc);
|
|
1761
|
+
return lengths;
|
|
1762
|
+
}
|
|
1037
1763
|
function derivative2(values) {
|
|
1038
1764
|
const d = [];
|
|
1039
1765
|
for (let i = 1; i < values.length; i++) {
|
|
@@ -1041,8 +1767,53 @@ function derivative2(values) {
|
|
|
1041
1767
|
}
|
|
1042
1768
|
return d;
|
|
1043
1769
|
}
|
|
1770
|
+
function meanCenter(values) {
|
|
1771
|
+
if (values.length === 0) return [];
|
|
1772
|
+
let sum = 0;
|
|
1773
|
+
for (const v of values) sum += v;
|
|
1774
|
+
const m = sum / values.length;
|
|
1775
|
+
return values.map((v) => v - m);
|
|
1776
|
+
}
|
|
1777
|
+
function covariance(a, b) {
|
|
1778
|
+
const n = Math.min(a.length, b.length);
|
|
1779
|
+
if (n < 2) return 0;
|
|
1780
|
+
let sumA = 0;
|
|
1781
|
+
let sumB = 0;
|
|
1782
|
+
for (let i = 0; i < n; i++) {
|
|
1783
|
+
sumA += a[i] ?? 0;
|
|
1784
|
+
sumB += b[i] ?? 0;
|
|
1785
|
+
}
|
|
1786
|
+
const meanA = sumA / n;
|
|
1787
|
+
const meanB = sumB / n;
|
|
1788
|
+
let cov = 0;
|
|
1789
|
+
for (let i = 0; i < n; i++) {
|
|
1790
|
+
cov += ((a[i] ?? 0) - meanA) * ((b[i] ?? 0) - meanB);
|
|
1791
|
+
}
|
|
1792
|
+
return cov / (n - 1);
|
|
1793
|
+
}
|
|
1794
|
+
function signChangeCount(values) {
|
|
1795
|
+
let count = 0;
|
|
1796
|
+
let last = 0;
|
|
1797
|
+
for (const v of values) {
|
|
1798
|
+
if (v > 0 && last < 0) count++;
|
|
1799
|
+
else if (v < 0 && last > 0) count++;
|
|
1800
|
+
if (v !== 0) last = v;
|
|
1801
|
+
}
|
|
1802
|
+
return count;
|
|
1803
|
+
}
|
|
1804
|
+
function sampleRateFromTimestamps(timestampsMs) {
|
|
1805
|
+
if (timestampsMs.length < 2) return 0;
|
|
1806
|
+
const span = (timestampsMs[timestampsMs.length - 1] ?? 0) - (timestampsMs[0] ?? 0);
|
|
1807
|
+
if (!Number.isFinite(span) || span <= 0) return 0;
|
|
1808
|
+
return (timestampsMs.length - 1) * 1e3 / span;
|
|
1809
|
+
}
|
|
1810
|
+
function captureDurationSec(samples) {
|
|
1811
|
+
if (samples.length < 2) return 0;
|
|
1812
|
+
const span = (samples[samples.length - 1]?.timestamp ?? 0) - (samples[0]?.timestamp ?? 0);
|
|
1813
|
+
return Number.isFinite(span) && span > 0 ? span / 1e3 : 0;
|
|
1814
|
+
}
|
|
1044
1815
|
function extractMouseDynamics(samples) {
|
|
1045
|
-
if (samples.length < 10) return new Array(
|
|
1816
|
+
if (samples.length < 10) return new Array(MOUSE_DYNAMICS_FEATURE_COUNT).fill(0);
|
|
1046
1817
|
const x = samples.map((s) => s.x);
|
|
1047
1818
|
const y = samples.map((s) => s.y);
|
|
1048
1819
|
const pressure = samples.map((s) => s.pressure);
|
|
@@ -1141,7 +1912,7 @@ function extractMouseDynamics(samples) {
|
|
|
1141
1912
|
const pressureStats = condense(pressure);
|
|
1142
1913
|
const moveDurStats = condense(movementDurations);
|
|
1143
1914
|
const segLenStats = condense(segmentLengths);
|
|
1144
|
-
|
|
1915
|
+
const legacyMouseDynamics = [
|
|
1145
1916
|
curvatureStats.mean,
|
|
1146
1917
|
curvatureStats.variance,
|
|
1147
1918
|
curvatureStats.skewness,
|
|
@@ -1197,6 +1968,61 @@ function extractMouseDynamics(samples) {
|
|
|
1197
1968
|
angleAutoCorr[2] ?? 0,
|
|
1198
1969
|
normalizedPathLength
|
|
1199
1970
|
];
|
|
1971
|
+
const v2 = computeMouseV2(samples, vx, vy, accX, accY, speed, acc, jerk, directions);
|
|
1972
|
+
return [...legacyMouseDynamics, ...v2];
|
|
1973
|
+
}
|
|
1974
|
+
function computeMouseV2(samples, vx, vy, accX, accY, speed, acc, jerk, directions) {
|
|
1975
|
+
const out = [];
|
|
1976
|
+
const covPairs = [
|
|
1977
|
+
[vx, vy],
|
|
1978
|
+
[vx, accX],
|
|
1979
|
+
[vx, accY],
|
|
1980
|
+
[vy, accX],
|
|
1981
|
+
[vy, accY],
|
|
1982
|
+
[accX, accY]
|
|
1983
|
+
];
|
|
1984
|
+
for (const [a, b] of covPairs) out.push(covariance(a, b));
|
|
1985
|
+
const sampleRate = sampleRateFromTimestamps(samples.map((s) => s.timestamp));
|
|
1986
|
+
const fftSize = nextPow2(Math.max(64, speed.length));
|
|
1987
|
+
const bands = [
|
|
1988
|
+
[0, 2],
|
|
1989
|
+
[2, 6],
|
|
1990
|
+
[6, 12],
|
|
1991
|
+
[12, 30]
|
|
1992
|
+
];
|
|
1993
|
+
const speedSpectrum = realFFT(meanCenter(speed), fftSize);
|
|
1994
|
+
const accSpectrum = realFFT(meanCenter(acc), fftSize);
|
|
1995
|
+
const jerkSpectrum = realFFT(meanCenter(jerk), fftSize);
|
|
1996
|
+
for (const spectrum of [speedSpectrum, accSpectrum, jerkSpectrum]) {
|
|
1997
|
+
for (const [lo, hi] of bands) {
|
|
1998
|
+
out.push(bandEnergy(spectrum.real, spectrum.imag, sampleRate, lo, hi));
|
|
1999
|
+
}
|
|
2000
|
+
}
|
|
2001
|
+
const tremor = peakInBand(
|
|
2002
|
+
speedSpectrum.real,
|
|
2003
|
+
speedSpectrum.imag,
|
|
2004
|
+
sampleRate,
|
|
2005
|
+
4,
|
|
2006
|
+
12
|
|
2007
|
+
);
|
|
2008
|
+
out.push(tremor.freq, tremor.amplitude);
|
|
2009
|
+
const duration = captureDurationSec(samples);
|
|
2010
|
+
const reversalRates = [vx, vy, speed].map(
|
|
2011
|
+
(channel) => duration > 0 ? signChangeCount(derivative2(channel)) / duration : 0
|
|
2012
|
+
);
|
|
2013
|
+
out.push(mean(reversalRates), variance(reversalRates));
|
|
2014
|
+
let dirAccum = 0;
|
|
2015
|
+
for (let i = 1; i < directions.length; i++) {
|
|
2016
|
+
let diff = directions[i] - directions[i - 1];
|
|
2017
|
+
while (diff > Math.PI) diff -= 2 * Math.PI;
|
|
2018
|
+
while (diff < -Math.PI) diff += 2 * Math.PI;
|
|
2019
|
+
dirAccum += Math.abs(diff);
|
|
2020
|
+
}
|
|
2021
|
+
out.push(directions.length > 1 ? dirAccum / (directions.length - 1) : 0);
|
|
2022
|
+
for (const lag of [1, 5, 10, 25]) {
|
|
2023
|
+
out.push(autocorrelation(speed, lag));
|
|
2024
|
+
}
|
|
2025
|
+
return out.map((v) => Number.isFinite(v) ? v : 0);
|
|
1200
2026
|
}
|
|
1201
2027
|
|
|
1202
2028
|
// src/hashing/simhash.ts
|
|
@@ -1236,7 +2062,7 @@ function getHyperplanes(dimension) {
|
|
|
1236
2062
|
cachedDimension = dimension;
|
|
1237
2063
|
return planes;
|
|
1238
2064
|
}
|
|
1239
|
-
var EXPECTED_FEATURE_DIMENSION =
|
|
2065
|
+
var EXPECTED_FEATURE_DIMENSION = SPEAKER_FEATURE_COUNT + MOTION_FEATURE_COUNT + TOUCH_FEATURE_COUNT;
|
|
1240
2066
|
function simhash(features) {
|
|
1241
2067
|
if (features.length === 0) {
|
|
1242
2068
|
return new Array(FINGERPRINT_BITS).fill(0);
|
|
@@ -4377,9 +5203,12 @@ async function extractFingerprintAndValidate(sensorData, config, walletAddress,
|
|
|
4377
5203
|
f0Contour,
|
|
4378
5204
|
accelMagnitude
|
|
4379
5205
|
} = await extractFeatures(sensorData);
|
|
5206
|
+
const AUDIO_END = SPEAKER_FEATURE_COUNT;
|
|
5207
|
+
const MOTION_END = AUDIO_END + MOTION_FEATURE_COUNT;
|
|
5208
|
+
const TOUCH_END = MOTION_END + TOUCH_FEATURE_COUNT;
|
|
4380
5209
|
const nonZero = features.filter((v) => v !== 0).length;
|
|
4381
5210
|
sdkLog(
|
|
4382
|
-
`[Entros SDK] Feature vector: ${features.length} dimensions, ${nonZero} non-zero. Audio[0
|
|
5211
|
+
`[Entros SDK] Feature vector: ${features.length} dimensions, ${nonZero} non-zero. Audio[0..${AUDIO_END - 1}]: ${features.slice(0, AUDIO_END).filter((v) => v !== 0).length} non-zero. Motion/Mouse[${AUDIO_END}..${MOTION_END - 1}]: ${features.slice(AUDIO_END, MOTION_END).filter((v) => v !== 0).length} non-zero. Touch[${MOTION_END}..${TOUCH_END - 1}]: ${features.slice(MOTION_END, TOUCH_END).filter((v) => v !== 0).length} non-zero.`
|
|
4383
5212
|
);
|
|
4384
5213
|
const fingerprint = simhash(normalizedFeatures);
|
|
4385
5214
|
const tbh = await generateTBH(fingerprint);
|
|
@@ -4583,9 +5412,12 @@ async function processSensorData(sensorData, config, wallet, connection, onProgr
|
|
|
4583
5412
|
);
|
|
4584
5413
|
solanaProof = serializeProof(proof, publicSignals);
|
|
4585
5414
|
} catch (proofErr) {
|
|
4586
|
-
const
|
|
4587
|
-
const
|
|
4588
|
-
const
|
|
5415
|
+
const motionStart = SPEAKER_FEATURE_COUNT;
|
|
5416
|
+
const touchStart = motionStart + MOTION_FEATURE_COUNT;
|
|
5417
|
+
const touchEnd = touchStart + TOUCH_FEATURE_COUNT;
|
|
5418
|
+
const audioNZ = features.slice(0, motionStart).filter((v) => v !== 0).length;
|
|
5419
|
+
const motionNZ = features.slice(motionStart, touchStart).filter((v) => v !== 0).length;
|
|
5420
|
+
const touchNZ = features.slice(touchStart, touchEnd).filter((v) => v !== 0).length;
|
|
4589
5421
|
const rawAudio = sensorData.audio?.samples.length ?? 0;
|
|
4590
5422
|
const rawMotion = sensorData.motion.length;
|
|
4591
5423
|
const rawTouch = sensorData.touch.length;
|