@mcptoolshop/voice-engine-dsp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +78 -0
  3. package/dist/src/adapters/AudioWorkletProcessor.d.ts +31 -0
  4. package/dist/src/adapters/AudioWorkletProcessor.d.ts.map +1 -0
  5. package/dist/src/adapters/AudioWorkletProcessor.js +77 -0
  6. package/dist/src/adapters/NodeStreamAutotune.d.ts +28 -0
  7. package/dist/src/adapters/NodeStreamAutotune.d.ts.map +1 -0
  8. package/dist/src/adapters/NodeStreamAutotune.js +103 -0
  9. package/dist/src/analysis/PitchTrackerRefV1.d.ts +13 -0
  10. package/dist/src/analysis/PitchTrackerRefV1.d.ts.map +1 -0
  11. package/dist/src/analysis/PitchTrackerRefV1.js +136 -0
  12. package/dist/src/analysis/VoicingDetectorRefV1.d.ts +13 -0
  13. package/dist/src/analysis/VoicingDetectorRefV1.d.ts.map +1 -0
  14. package/dist/src/analysis/VoicingDetectorRefV1.js +77 -0
  15. package/dist/src/index.d.ts +8 -0
  16. package/dist/src/index.d.ts.map +1 -0
  17. package/dist/src/index.js +22 -0
  18. package/dist/src/prosody/AccentRenderer.d.ts +15 -0
  19. package/dist/src/prosody/AccentRenderer.d.ts.map +1 -0
  20. package/dist/src/prosody/AccentRenderer.js +66 -0
  21. package/dist/src/prosody/Presets.d.ts +3 -0
  22. package/dist/src/prosody/Presets.d.ts.map +1 -0
  23. package/dist/src/prosody/Presets.js +49 -0
  24. package/dist/src/prosody/SafetyRails.d.ts +21 -0
  25. package/dist/src/prosody/SafetyRails.d.ts.map +1 -0
  26. package/dist/src/prosody/SafetyRails.js +65 -0
  27. package/dist/src/transformation/FormantStrategyV1.d.ts +5 -0
  28. package/dist/src/transformation/FormantStrategyV1.d.ts.map +1 -0
  29. package/dist/src/transformation/FormantStrategyV1.js +39 -0
  30. package/dist/src/transformation/PitchShifterRefV1.d.ts +9 -0
  31. package/dist/src/transformation/PitchShifterRefV1.d.ts.map +1 -0
  32. package/dist/src/transformation/PitchShifterRefV1.js +120 -0
  33. package/dist/src/tuning/AutotuneExecutor.d.ts +16 -0
  34. package/dist/src/tuning/AutotuneExecutor.d.ts.map +1 -0
  35. package/dist/src/tuning/AutotuneExecutor.js +217 -0
  36. package/dist/src/tuning/CorrectionController.d.ts +5 -0
  37. package/dist/src/tuning/CorrectionController.d.ts.map +1 -0
  38. package/dist/src/tuning/CorrectionController.js +91 -0
  39. package/dist/src/tuning/CorrectionControllerRefV1.d.ts +6 -0
  40. package/dist/src/tuning/CorrectionControllerRefV1.d.ts.map +1 -0
  41. package/dist/src/tuning/CorrectionControllerRefV1.js +63 -0
  42. package/dist/src/tuning/ScaleQuantizer.d.ts +7 -0
  43. package/dist/src/tuning/ScaleQuantizer.d.ts.map +1 -0
  44. package/dist/src/tuning/ScaleQuantizer.js +43 -0
  45. package/dist/src/tuning/StreamingAutotuneEngine.d.ts +43 -0
  46. package/dist/src/tuning/StreamingAutotuneEngine.d.ts.map +1 -0
  47. package/dist/src/tuning/StreamingAutotuneEngine.js +389 -0
  48. package/dist/src/tuning/StreamingAutotuneEngine_Fixed.d.ts +36 -0
  49. package/dist/src/tuning/StreamingAutotuneEngine_Fixed.d.ts.map +1 -0
  50. package/dist/src/tuning/StreamingAutotuneEngine_Fixed.js +344 -0
  51. package/dist/src/tuning/TargetCurveGenerator.d.ts +5 -0
  52. package/dist/src/tuning/TargetCurveGenerator.d.ts.map +1 -0
  53. package/dist/src/tuning/TargetCurveGenerator.js +69 -0
  54. package/dist/src/tuning/TargetCurveRefV1.d.ts +6 -0
  55. package/dist/src/tuning/TargetCurveRefV1.d.ts.map +1 -0
  56. package/dist/src/tuning/TargetCurveRefV1.js +69 -0
  57. package/dist/src/utils/AudioBufferUtils.d.ts +3 -0
  58. package/dist/src/utils/AudioBufferUtils.d.ts.map +1 -0
  59. package/dist/src/utils/AudioBufferUtils.js +19 -0
  60. package/dist/src/version.d.ts +2 -0
  61. package/dist/src/version.d.ts.map +1 -0
  62. package/dist/src/version.js +4 -0
  63. package/package.json +38 -0
@@ -0,0 +1,91 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CorrectionController = void 0;
4
+ class CorrectionController {
5
+ generate(f0, voicing, target, plan) {
6
+ const len = f0.f0MhzQ.length;
7
+ const strengthQ = new Int16Array(len);
8
+ const baseSnap = plan.parameters.snapStrengthQ / 10000.0; // 0..1
9
+ const protection = plan.parameters.consonantProtectionQ / 10000.0; // 0..1
10
+ const speedVal = plan.parameters.retuneSpeedQ; // 0..10000
11
+ // Phase 6 Controls
12
+ const globalMix = (plan.parameters.globalStrengthQ ?? 10000) / 10000.0; // 0..1
13
+ const attackMs = plan.parameters.attackMsQ ?? 0;
14
+ const releaseMs = plan.parameters.releaseMsQ ?? 0;
15
+ // Time constants
16
+ // const frameDur = f0.hopSamples / f0.sampleRateHz; // e.g. 0.01s
17
+ const frameDur = 0.01; // Force 10ms for debugging if needed
18
+ let alphaAtt = attackMs > 0 ? (1.0 - Math.exp(-frameDur / (attackMs / 1000.0))) : 1.0;
19
+ let alphaRel = releaseMs > 0 ? (1.0 - Math.exp(-frameDur / (releaseMs / 1000.0))) : 1.0;
20
+ let currentStrength = 0;
21
+ // Retune Speed Model:
22
+ // low speed => low alpha (lazy correction). high speed => high alpha.
23
+ // If speed=10000 (100%), alpha=1.0 (Instant).
24
+ // If speed=0 (0%), alpha=0.01 (Very slow).
25
+ // Exponential map typical for speed knobs? Linear is fine for V1.
26
+ const alpha = Math.max(0.01, speedVal / 10000.0);
27
+ let smoothDiff = 0;
28
+ for (let i = 0; i < len; i++) {
29
+ const isVoiced = voicing.voicedQ[i] > 0;
30
+ if (!isVoiced) {
31
+ strengthQ[i] = 0;
32
+ smoothDiff = 0; // Reset state on unvoiced
33
+ continue;
34
+ }
35
+ // 1. Calculate Raw Difference (Target - Input)
36
+ // Need Input Cents. Re-calc or assume we have it?
37
+ // Re-calc is safest (pure).
38
+ let f0Hz = f0.f0MhzQ[i] / 1000.0;
39
+ if (f0Hz < 20)
40
+ f0Hz = 20; // Guard
41
+ const inputCents = 6900 + 1200 * Math.log2(f0Hz / 440);
42
+ const targetVal = target.targetCentsQ[i] / 1000.0; // Milli-Cents to Cents
43
+ const rawDiff = targetVal - inputCents;
44
+ // 2. Apply Retune Speed (Smoothing the Correction)
45
+ smoothDiff += alpha * (rawDiff - smoothDiff);
46
+ // 3. Calculate Ratio Strength
47
+ // Wanted Correction = smoothDiff.
48
+ // Available Correction = rawDiff.
49
+ // Strength = smoothDiff / rawDiff.
50
+ let ratio = 0;
51
+ if (Math.abs(rawDiff) > 0.001) {
52
+ ratio = smoothDiff / rawDiff;
53
+ }
54
+ else {
55
+ ratio = 1.0; // Already there
56
+ }
57
+ // Clamp ratio 0..1?
58
+ // If overshoot (ringing), ratio > 1.
59
+ // Usually we clamp strength 0..1 for artifacts.
60
+ ratio = Math.max(0, Math.min(1, ratio));
61
+ // 4. Protection
62
+ // If confidence is low, reduce max strength.
63
+ // confQ: 0..10000.
64
+ const conf = f0.confQ[i] / 10000.0;
65
+ // protectedMax = 1.0 - protection * (1.0 - conf)
66
+ // If protection=1, and conf=0 => max=0.
67
+ // If protection=0 => max=1.
68
+ const confCheck = Math.max(0, 1.0 - protection * (1.0 - conf));
69
+ // 5. Final Strength
70
+ // Base Snap * Computed Ratio * Protection * Global Mix (Phase 6)
71
+ let targetStrength = baseSnap * ratio * confCheck * globalMix;
72
+ // 6. Attack / Release Smoothing
73
+ if (targetStrength > currentStrength) {
74
+ // console.log(`Debug Att: Target=${targetStrength}, Curr=${currentStrength}, Alpha=${alphaAtt}`);
75
+ currentStrength += (targetStrength - currentStrength) * alphaAtt;
76
+ }
77
+ else {
78
+ currentStrength += (targetStrength - currentStrength) * alphaRel;
79
+ }
80
+ strengthQ[i] = Math.floor(currentStrength * 10000);
81
+ }
82
+ return {
83
+ sampleRateHz: f0.sampleRateHz,
84
+ frameHz: f0.frameHz,
85
+ hopSamples: f0.hopSamples,
86
+ t0Samples: f0.t0Samples,
87
+ strengthQ
88
+ };
89
+ }
90
+ }
91
+ exports.CorrectionController = CorrectionController;
@@ -0,0 +1,6 @@
1
+ import { F0TrackV1, VoicingMaskV1, TuneScoreV1, CorrectionEnvelopeV1 } from "@mcptoolshop/voice-engine-core";
2
+ export declare class CorrectionControllerRefV1 {
3
+ constructor();
4
+ generate(f0Track: F0TrackV1, voicing: VoicingMaskV1, score: TuneScoreV1): CorrectionEnvelopeV1;
5
+ }
6
+ //# sourceMappingURL=CorrectionControllerRefV1.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"CorrectionControllerRefV1.d.ts","sourceRoot":"","sources":["../../../src/tuning/CorrectionControllerRefV1.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,SAAS,EAAE,aAAa,EAAE,WAAW,EAAE,oBAAoB,EAE9D,MAAM,gCAAgC,CAAC;AAExC,qBAAa,yBAAyB;;IAG3B,QAAQ,CACV,OAAO,EAAE,SAAS,EAClB,OAAO,EAAE,aAAa,EACtB,KAAK,EAAE,WAAW,GACpB,oBAAoB;CA+D1B"}
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CorrectionControllerRefV1 = void 0;
4
+ class CorrectionControllerRefV1 {
5
+ constructor() { }
6
+ generate(f0Track, voicing, score) {
7
+ const numFrames = f0Track.f0MhzQ.length;
8
+ const strengthQ = new Int16Array(numFrames);
9
+ if (score.mode !== "scale") {
10
+ throw new Error("Only scale mode supported in V1");
11
+ }
12
+ const config = score;
13
+ const baseStrength = config.snapStrengthQ;
14
+ // Pass 1: Raw Strength Calculation
15
+ for (let i = 0; i < numFrames; i++) {
16
+ const isVoiced = voicing.voicedQ[i] > 0;
17
+ if (!isVoiced) {
18
+ strengthQ[i] = 0;
19
+ }
20
+ else {
21
+ const conf = f0Track.confQ[i];
22
+ // strength = snapStrengthQ * (confQ / 10000)
23
+ // Linear scaling by confidence
24
+ strengthQ[i] = Math.round(baseStrength * (conf / 10000));
25
+ }
26
+ }
27
+ // Pass 2: Boundary Softening (Distance Transform)
28
+ // If dist(unvoiced) < 3, strength *= dist/3
29
+ const dist = new Int32Array(numFrames).fill(numFrames + 1);
30
+ // Init distance 0 at unvoiced frames
31
+ for (let i = 0; i < numFrames; i++) {
32
+ if (voicing.voicedQ[i] === 0) {
33
+ dist[i] = 0;
34
+ }
35
+ }
36
+ // Forward scan
37
+ for (let i = 1; i < numFrames; i++) {
38
+ dist[i] = Math.min(dist[i], dist[i - 1] + 1);
39
+ }
40
+ // Backward scan
41
+ for (let i = numFrames - 2; i >= 0; i--) {
42
+ dist[i] = Math.min(dist[i], dist[i + 1] + 1);
43
+ }
44
+ // Apply erosion
45
+ for (let i = 0; i < numFrames; i++) {
46
+ const d = dist[i];
47
+ if (d < 3) {
48
+ // d=0 -> strength=0 (already set)
49
+ // d=1 -> strength*=1/3
50
+ // d=2 -> strength*=2/3
51
+ strengthQ[i] = Math.floor((strengthQ[i] * d) / 3);
52
+ }
53
+ }
54
+ return {
55
+ sampleRateHz: f0Track.sampleRateHz,
56
+ frameHz: f0Track.frameHz,
57
+ hopSamples: f0Track.hopSamples,
58
+ t0Samples: f0Track.t0Samples,
59
+ strengthQ
60
+ };
61
+ }
62
+ }
63
+ exports.CorrectionControllerRefV1 = CorrectionControllerRefV1;
@@ -0,0 +1,7 @@
1
+ export declare class ScaleQuantizer {
2
+ /**
3
+ * Quantizes an input pitch (in cents relative to A4=440Hz=6900) to the nearest allowed note.
4
+ */
5
+ static quantize(inputCents: number, allowedPitchClasses: number[]): number;
6
+ }
7
+ //# sourceMappingURL=ScaleQuantizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ScaleQuantizer.d.ts","sourceRoot":"","sources":["../../../src/tuning/ScaleQuantizer.ts"],"names":[],"mappings":"AAAA,qBAAa,cAAc;IACvB;;OAEG;IACH,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,mBAAmB,EAAE,MAAM,EAAE,GAAG,MAAM;CAuC7E"}
@@ -0,0 +1,43 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ScaleQuantizer = void 0;
4
+ class ScaleQuantizer {
5
+ /**
6
+ * Quantizes an input pitch (in cents relative to A4=440Hz=6900) to the nearest allowed note.
7
+ */
8
+ static quantize(inputCents, allowedPitchClasses) {
9
+ // 1. Determine Octave and Semitone
10
+ // MIDI 0 = C-1 = 0 cents? No. A4=6900.
11
+ // C4 = 6000 cents.
12
+ // 1200 cents per octave.
13
+ // pitchClass = floor(cents / 100) % 12 ? No, cents are absolute.
14
+ // Let's align to MIDI numbers.
15
+ // midiVal = inputCents / 100.
16
+ const midiVal = inputCents / 100.0;
17
+ const noteIndex = Math.round(midiVal);
18
+ // Optimize: check if noteIndex is allowed
19
+ const pc = ((noteIndex % 12) + 12) % 12;
20
+ if (allowedPitchClasses.includes(pc)) {
21
+ return noteIndex * 100;
22
+ }
23
+ // Search nearest allowed
24
+ // Brute force is fast enough (12 classes max).
25
+ // Check neighbors up/down.
26
+ let bestDist = Infinity;
27
+ let bestCandidate = noteIndex;
28
+ // Check +/- 12 semitones is sufficient
29
+ for (let i = -6; i <= 6; i++) {
30
+ const candidate = noteIndex + i;
31
+ const cpc = ((candidate % 12) + 12) % 12;
32
+ if (allowedPitchClasses.includes(cpc)) {
33
+ const dist = Math.abs(candidate - midiVal);
34
+ if (dist < bestDist) {
35
+ bestDist = dist;
36
+ bestCandidate = candidate;
37
+ }
38
+ }
39
+ }
40
+ return bestCandidate * 100;
41
+ }
42
+ }
43
+ exports.ScaleQuantizer = ScaleQuantizer;
@@ -0,0 +1,43 @@
1
+ import { ProsodyRuntimeStateV1 } from '../../../voice-engine-core/src/prosody/StreamingProsodyTypes';
2
+ export declare class StreamingAutotuneEngine {
3
+ private state;
4
+ private config;
5
+ private preset;
6
+ private frameCount;
7
+ private eventScheduler;
8
+ private _reusableEventList;
9
+ private allowedSet;
10
+ private _lastOutputCents;
11
+ private _mockPitchHz;
12
+ constructor(config: any, preset: any);
13
+ private hzToCents;
14
+ private quantize;
15
+ getLastOutputCents(): number;
16
+ setMockPitch(hz: number): void;
17
+ enqueueEvents(events: any[]): void;
18
+ process(chunk: Float32Array): {
19
+ audio: Float32Array;
20
+ targets: Float32Array;
21
+ };
22
+ processFrame(analysis: {
23
+ energyDb: number;
24
+ confidenceQ: number;
25
+ pitchHz: number;
26
+ }, frameIndex: number): void;
27
+ processFramePipeline(analysis: {
28
+ energyDb: number;
29
+ confidenceQ: number;
30
+ pitchHz: number;
31
+ }, frameIndex: number): void;
32
+ private getCurrentRampValue;
33
+ private handleSegmentStart;
34
+ private handleSegmentEnd;
35
+ snapshot(): {
36
+ version: string;
37
+ state: ProsodyRuntimeStateV1;
38
+ };
39
+ restore(snapshot: any): void;
40
+ reset(): void;
41
+ private createInitialState;
42
+ }
43
+ //# sourceMappingURL=StreamingAutotuneEngine.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"StreamingAutotuneEngine.d.ts","sourceRoot":"","sources":["../../../src/tuning/StreamingAutotuneEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAY,MAAM,8DAA8D,CAAC;AA4D/G,qBAAa,uBAAuB;IAChC,OAAO,CAAC,KAAK,CAAwB;IACrC,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,cAAc,CAAsB;IAC5C,OAAO,CAAC,kBAAkB,CAAa;IACvC,OAAO,CAAC,UAAU,CAAc;IAChC,OAAO,CAAC,gBAAgB,CAAa;IACrC,OAAO,CAAC,YAAY,CAAa;gBAErB,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG;IAUpC,OAAO,CAAC,SAAS;IAIjB,OAAO,CAAC,QAAQ;IAuBT,kBAAkB,IAAI,MAAM;IAI5B,YAAY,CAAC,EAAE,EAAE,MAAM;IAIvB,aAAa,CAAC,MAAM,EAAE,GAAG,EAAE;IAIlC,OAAO,CAAC,KAAK,EAAE,YAAY,GAAG;QAAE,KAAK,EAAE,YAAY,CAAC;QAAC,OAAO,EAAE,YAAY,CAAA;KAAE;IAsC5E,YAAY,CAAC,QAAQ,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IAI5G,oBAAoB,CAAC,QAAQ,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IAqKpH,OAAO,CAAC,mBAAmB;IAQ3B,OAAO,CAAC,kBAAkB;IAe1B,OAAO,CAAC,gBAAgB;IAMrB,QAAQ,IAAI;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,qBAAqB,CAAA;KAAE;IASzD,OAAO,CAAC,QAAQ,EAAE,GAAG,GAAG,IAAI;IAqCnC,KAAK,IAAI,IAAI;IAIb,OAAO,CAAC,kBAAkB;CAqC7B"}
@@ -0,0 +1,389 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.StreamingAutotuneEngine = void 0;
4
+ const OnlineStats_1 = require("../../../voice-engine-core/src/prosody/OnlineStats");
5
+ const version_1 = require("../version");
6
+ class RingBufferScheduler {
7
+ buffer;
8
+ capacity;
9
+ head = 0;
10
+ count = 0;
11
+ constructor(capacity = 1024) {
12
+ this.capacity = capacity;
13
+ this.buffer = new Array(capacity);
14
+ }
15
+ enqueue(events) {
16
+ // Sort incoming batch to ensure local order
17
+ events.sort((a, b) => a.time - b.time);
18
+ for (const e of events) {
19
+ if (this.count === this.capacity) {
20
+ // Buffer full. Drop oldest (head) to make room.
21
+ this.head = (this.head + 1) % this.capacity;
22
+ }
23
+ else {
24
+ this.count++;
25
+ }
26
+ const tail = (this.head + this.count - 1) % this.capacity;
27
+ this.buffer[tail] = e;
28
+ }
29
+ }
30
+ pruneOldEvents(currentTime) {
31
+ while (this.count > 0) {
32
+ const e = this.buffer[this.head];
33
+ const end = e.endTime !== undefined ? e.endTime : (e.time + (e.duration || 10));
34
+ if (end < currentTime) {
35
+ this.buffer[this.head] = undefined;
36
+ this.head = (this.head + 1) % this.capacity;
37
+ this.count--;
38
+ }
39
+ else {
40
+ break;
41
+ }
42
+ }
43
+ }
44
+ getActiveEvents(currentTime, outList) {
45
+ outList.length = 0;
46
+ for (let i = 0; i < this.count; i++) {
47
+ const idx = (this.head + i) % this.capacity;
48
+ const e = this.buffer[idx];
49
+ if (!e)
50
+ continue;
51
+ const end = e.endTime !== undefined ? e.endTime : (e.time + (e.duration || 10));
52
+ if (e.time <= currentTime && end >= currentTime) {
53
+ outList.push(e);
54
+ }
55
+ }
56
+ }
57
+ }
58
+ class StreamingAutotuneEngine {
59
+ state;
60
+ config;
61
+ preset;
62
+ frameCount = 0;
63
+ eventScheduler;
64
+ _reusableEventList = [];
65
+ allowedSet;
66
+ _lastOutputCents = 0;
67
+ _mockPitchHz = 0;
68
+ constructor(config, preset) {
69
+ this.config = config;
70
+ this.preset = preset;
71
+ this.state = this.createInitialState();
72
+ this.eventScheduler = new RingBufferScheduler();
73
+ const allowed = config.allowedPitchClasses || [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
74
+ this.allowedSet = new Set(allowed);
75
+ }
76
+ hzToCents(h) {
77
+ return (h > 1.0) ? (6900 + 1200 * Math.log2(h / 440)) : 0;
78
+ }
79
+ quantize(c) {
80
+ const rootOffset = this.config.rootOffsetCents || 0;
81
+ const local = c - rootOffset;
82
+ const midi = Math.round(local / 100);
83
+ const pc = ((midi % 12) + 12) % 12;
84
+ if (this.allowedSet.has(pc)) {
85
+ return { id: midi, center: midi * 100 + rootOffset };
86
+ }
87
+ for (let i = 1; i <= 6; i++) {
88
+ let m = midi + i;
89
+ let p = ((m % 12) + 12) % 12;
90
+ if (this.allowedSet.has(p))
91
+ return { id: m, center: m * 100 + rootOffset };
92
+ m = midi - i;
93
+ p = ((m % 12) + 12) % 12;
94
+ if (this.allowedSet.has(p))
95
+ return { id: m, center: m * 100 + rootOffset };
96
+ }
97
+ return { id: midi, center: midi * 100 + rootOffset };
98
+ }
99
+ getLastOutputCents() {
100
+ return this._lastOutputCents;
101
+ }
102
+ setMockPitch(hz) {
103
+ this._mockPitchHz = hz;
104
+ }
105
+ enqueueEvents(events) {
106
+ this.eventScheduler.enqueue(events);
107
+ }
108
+ process(chunk) {
109
+ const hopSize = 128;
110
+ const numFrames = Math.floor(chunk.length / hopSize);
111
+ const targets = new Float32Array(numFrames);
112
+ for (let i = 0; i < numFrames; i++) {
113
+ const startSample = i * hopSize;
114
+ // Mock Analysis (to be replaced by StreamingPitchTracker)
115
+ let sumSq = 0;
116
+ for (let j = 0; j < hopSize; j++) {
117
+ // Check bounds
118
+ if (startSample + j < chunk.length) {
119
+ const s = chunk[startSample + j];
120
+ sumSq += s * s;
121
+ }
122
+ }
123
+ const rms = Math.sqrt(sumSq / hopSize);
124
+ const energyDb = rms > 1e-9 ? 20 * Math.log10(rms) : -100;
125
+ // Mock Pitch/Confidence (0 confidence = unvoiced)
126
+ // In real impl, this comes from F0Decomposer
127
+ const isVoiced = energyDb > -50 && this._mockPitchHz > 0;
128
+ const frameAnalysis = {
129
+ energyDb,
130
+ confidenceQ: isVoiced ? 10000 : 0,
131
+ pitchHz: this._mockPitchHz
132
+ };
133
+ // Using the new pipeline
134
+ this.processFramePipeline(frameAnalysis, this.frameCount++);
135
+ targets[i] = this._lastOutputCents;
136
+ }
137
+ return { audio: chunk.slice(), targets: targets };
138
+ }
139
+ processFrame(analysis, frameIndex) {
140
+ this.processFramePipeline(analysis, frameIndex);
141
+ }
142
+ processFramePipeline(analysis, frameIndex) {
143
+ const { energyDb, confidenceQ, pitchHz } = analysis;
144
+ const config = this.config;
145
+ const segmenter = this.state.segmenter;
146
+ // Defaults
147
+ const silenceDb = config.silenceThresholdDb ?? -60;
148
+ const voicingLimit = config.voicingThresholdQ ?? 2000;
149
+ const enterLimit = config.voicedEnterFrames ?? 2;
150
+ const exitLimit = config.voicedExitFrames ?? 5;
151
+ // 1. Input Conditions
152
+ const isSpeechCandidate = energyDb > silenceDb;
153
+ const isVoicedCandidate = isSpeechCandidate && (confidenceQ > voicingLimit);
154
+ // 2. Hysteresis Logic
155
+ if (segmenter.isVoiced) {
156
+ if (!isVoicedCandidate) {
157
+ segmenter.exitCount++;
158
+ if (segmenter.exitCount > exitLimit) {
159
+ this.handleSegmentEnd(frameIndex);
160
+ }
161
+ }
162
+ else {
163
+ segmenter.exitCount = 0;
164
+ }
165
+ }
166
+ else {
167
+ if (isVoicedCandidate) {
168
+ segmenter.enterCount++;
169
+ if (segmenter.enterCount >= enterLimit) {
170
+ this.handleSegmentStart(frameIndex);
171
+ }
172
+ }
173
+ else {
174
+ segmenter.enterCount = 0;
175
+ }
176
+ }
177
+ // 3. Process Voiced Frame Pipeline
178
+ if (segmenter.isVoiced) {
179
+ segmenter.accumulatedConf += confidenceQ;
180
+ segmenter.accumulatedEnergy += energyDb;
181
+ // --- Pipeline Step 1: Decomposition ---
182
+ const rawCents = this.hzToCents(pitchHz);
183
+ // For now, assuming raw input IS the decomposition
184
+ const centerCents = rawCents;
185
+ const residualCents = 0;
186
+ // --- Pipeline Step 2: Baseline/Intent ---
187
+ // Update Online Baseline (using Cents for linear regression on pitch)
188
+ OnlineStats_1.OnlineStats.update(this.state.baseline, frameIndex, rawCents);
189
+ const { slope, intercept } = OnlineStats_1.OnlineStats.getRegression(this.state.baseline);
190
+ const intentCents = centerCents;
191
+ // --- Pipeline Step 3: Stability ---
192
+ const stabilizer = this.state.stabilizer;
193
+ const hysteresis = config.hysteresisCents ?? 15;
194
+ const minHoldFrames = config.minHoldFrames ?? 6;
195
+ const rampFrames = config.rampFrames ?? 3;
196
+ // Calc delta/slope for transition detection
197
+ const delta = 0; // TODO: Track previous raw cents
198
+ const isTransition = delta > (config.slopeThreshFrame ?? 5);
199
+ const cand = this.quantize(intentCents);
200
+ // Initialize if first voiced frame logic covered by handleSegmentStart -> default state
201
+ if (stabilizer.currentNoteId === 0) {
202
+ stabilizer.currentNoteId = cand.id;
203
+ stabilizer.lastTargetCents = cand.center;
204
+ stabilizer.holdFrames = 0;
205
+ stabilizer.rampActive = false;
206
+ stabilizer.rampEndCents = cand.center;
207
+ stabilizer.rampStartCents = cand.center;
208
+ }
209
+ else {
210
+ stabilizer.holdFrames++;
211
+ let shouldSwitch = false;
212
+ if (!isTransition && stabilizer.holdFrames >= minHoldFrames) {
213
+ const currentErr = Math.abs(intentCents - stabilizer.lastTargetCents);
214
+ const candErr = Math.abs(intentCents - cand.center);
215
+ if (currentErr - candErr > hysteresis) {
216
+ shouldSwitch = true;
217
+ }
218
+ }
219
+ if (shouldSwitch) {
220
+ stabilizer.rampStartCents = stabilizer.rampActive ?
221
+ this.getCurrentRampValue(stabilizer) : stabilizer.lastTargetCents;
222
+ stabilizer.rampEndCents = cand.center;
223
+ stabilizer.rampActive = true;
224
+ stabilizer.rampProgress = 0;
225
+ stabilizer.currentNoteId = cand.id;
226
+ stabilizer.lastTargetCents = cand.center;
227
+ stabilizer.holdFrames = 0;
228
+ }
229
+ }
230
+ // Ramping
231
+ let macroCents = stabilizer.lastTargetCents;
232
+ if (stabilizer.rampActive) {
233
+ stabilizer.rampProgress++;
234
+ macroCents = this.getCurrentRampValue(stabilizer);
235
+ if (stabilizer.rampProgress >= rampFrames) {
236
+ stabilizer.rampActive = false;
237
+ }
238
+ }
239
+ // --- Pipeline Step 4: Events/PFC ---
240
+ this.eventScheduler.pruneOldEvents(frameIndex);
241
+ this.eventScheduler.getActiveEvents(frameIndex, this._reusableEventList);
242
+ let accentOffset = 0;
243
+ for (const event of this._reusableEventList) {
244
+ const duration = event.duration || 10;
245
+ const strength = event.strength || 0;
246
+ const shape = event.shape || 'rise';
247
+ const radius = duration / 2;
248
+ const d = frameIndex - event.time;
249
+ if (Math.abs(d) <= radius) {
250
+ const sign = (shape === 'fall' || shape === 'fall-rise') ? -1.0 : 1.0;
251
+ const w = 0.5 * (1 + Math.cos((Math.PI * d) / radius));
252
+ accentOffset += w * strength * sign; // Simple additive
253
+ }
254
+ }
255
+ // Update PFC State
256
+ const pfc = this.state.pfc;
257
+ // Track max accent strength in recent window (simple approach)
258
+ const currentAbsAccent = Math.abs(accentOffset);
259
+ if (currentAbsAccent > 0.01) {
260
+ if (currentAbsAccent > pfc.focusStrength) {
261
+ pfc.focusStrength = currentAbsAccent;
262
+ pfc.focusTime = frameIndex;
263
+ }
264
+ pfc.activeFade = 1.0;
265
+ }
266
+ else {
267
+ pfc.activeFade *= 0.95; // Decay
268
+ pfc.focusStrength *= 0.95;
269
+ }
270
+ // --- Pipeline Step 5: Reconstruct ---
271
+ let finalCents = macroCents + accentOffset + residualCents;
272
+ // Apply PFC
273
+ if (this.state.pfc.activeFade > 0) {
274
+ const { slope, intercept } = OnlineStats_1.OnlineStats.getRegression(this.state.baseline);
275
+ const baselineAtT = intercept + slope * frameIndex;
276
+ const compressionStrength = this.config.pfcStrength ?? 0.5;
277
+ const factor = this.state.pfc.activeFade * compressionStrength;
278
+ if (!isNaN(intercept)) {
279
+ finalCents = baselineAtT + (finalCents - baselineAtT) * (1.0 - factor);
280
+ }
281
+ }
282
+ this._lastOutputCents = finalCents;
283
+ }
284
+ }
285
+ getCurrentRampValue(stabilizer) {
286
+ // Simple linear interpolation
287
+ const total = this.config.rampFrames ?? 3;
288
+ if (stabilizer.rampProgress >= total)
289
+ return stabilizer.rampEndCents;
290
+ const t = stabilizer.rampProgress / total;
291
+ return stabilizer.rampStartCents + (stabilizer.rampEndCents - stabilizer.rampStartCents) * t;
292
+ }
293
+ handleSegmentStart(index) {
294
+ const segmenter = this.state.segmenter;
295
+ segmenter.isVoiced = true;
296
+ segmenter.enterCount = 0;
297
+ segmenter.exitCount = 0;
298
+ segmenter.currentSegmentStart = index;
299
+ segmenter.accumulatedConf = 0;
300
+ segmenter.accumulatedEnergy = 0;
301
+ // Reset Stabilizer
302
+ this.state.stabilizer.currentNoteId = 0;
303
+ this.state.stabilizer.holdFrames = 0;
304
+ this.state.stabilizer.rampActive = false;
305
+ }
306
+ handleSegmentEnd(index) {
307
+ const segmenter = this.state.segmenter;
308
+ segmenter.isVoiced = false;
309
+ segmenter.exitCount = 0;
310
+ segmenter.enterCount = 0;
311
+ }
312
+ snapshot() {
313
+ // Deep copy state to prevent mutation of the snapshot
314
+ const stateCopy = JSON.parse(JSON.stringify(this.state));
315
+ return {
316
+ version: version_1.PROSODY_API_VERSION,
317
+ state: stateCopy
318
+ };
319
+ }
320
+ restore(snapshot) {
321
+ if (!snapshot || typeof snapshot !== 'object') {
322
+ throw new Error('Invalid snapshot format');
323
+ }
324
+ const { version, state } = snapshot;
325
+ if (!version || typeof version !== 'string') {
326
+ throw new Error('Snapshot missing version');
327
+ }
328
+ const currentMajor = version_1.PROSODY_API_VERSION.split('.')[0];
329
+ const snapshotMajor = version.split('.')[0];
330
+ if (currentMajor !== snapshotMajor) {
331
+ throw new Error(`Incompatible snapshot version. Current: ${version_1.PROSODY_API_VERSION}, Snapshot: ${version}`);
332
+ }
333
+ if (!state) {
334
+ throw new Error('Snapshot missing state');
335
+ }
336
+ this.state = state;
337
+ // Rehydrate Float32Arrays if they were serialized to objects
338
+ if (this.state.decomposer && this.state.decomposer.buffer && !(this.state.decomposer.buffer instanceof Float32Array)) {
339
+ const buf = this.state.decomposer.buffer;
340
+ const len = Object.keys(buf).length;
341
+ const newBuf = new Float32Array(len);
342
+ for (let i = 0; i < len; i++) {
343
+ newBuf[i] = buf[i];
344
+ }
345
+ this.state.decomposer.buffer = newBuf;
346
+ }
347
+ }
348
+ reset() {
349
+ this.state = this.createInitialState();
350
+ }
351
+ createInitialState() {
352
+ return {
353
+ segmenter: {
354
+ isVoiced: false,
355
+ enterCount: 0,
356
+ exitCount: 0,
357
+ currentSegmentStart: 0,
358
+ accumulatedConf: 0,
359
+ accumulatedEnergy: 0
360
+ },
361
+ decomposer: {
362
+ buffer: new Float32Array(0),
363
+ microState: undefined
364
+ },
365
+ baseline: {
366
+ sumX: 0,
367
+ sumY: 0,
368
+ sumXY: 0,
369
+ sumXX: 0,
370
+ count: 0
371
+ },
372
+ stabilizer: {
373
+ currentNoteId: 0,
374
+ lastTargetCents: 0,
375
+ holdFrames: 0,
376
+ rampActive: false,
377
+ rampStartCents: 0,
378
+ rampEndCents: 0,
379
+ rampProgress: 0
380
+ },
381
+ pfc: {
382
+ focusTime: null,
383
+ focusStrength: 0,
384
+ activeFade: 0
385
+ }
386
+ };
387
+ }
388
+ }
389
+ exports.StreamingAutotuneEngine = StreamingAutotuneEngine;