@mcptoolshop/voice-engine-dsp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +78 -0
- package/dist/src/adapters/AudioWorkletProcessor.d.ts +31 -0
- package/dist/src/adapters/AudioWorkletProcessor.d.ts.map +1 -0
- package/dist/src/adapters/AudioWorkletProcessor.js +77 -0
- package/dist/src/adapters/NodeStreamAutotune.d.ts +28 -0
- package/dist/src/adapters/NodeStreamAutotune.d.ts.map +1 -0
- package/dist/src/adapters/NodeStreamAutotune.js +103 -0
- package/dist/src/analysis/PitchTrackerRefV1.d.ts +13 -0
- package/dist/src/analysis/PitchTrackerRefV1.d.ts.map +1 -0
- package/dist/src/analysis/PitchTrackerRefV1.js +136 -0
- package/dist/src/analysis/VoicingDetectorRefV1.d.ts +13 -0
- package/dist/src/analysis/VoicingDetectorRefV1.d.ts.map +1 -0
- package/dist/src/analysis/VoicingDetectorRefV1.js +77 -0
- package/dist/src/index.d.ts +8 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +22 -0
- package/dist/src/prosody/AccentRenderer.d.ts +15 -0
- package/dist/src/prosody/AccentRenderer.d.ts.map +1 -0
- package/dist/src/prosody/AccentRenderer.js +66 -0
- package/dist/src/prosody/Presets.d.ts +3 -0
- package/dist/src/prosody/Presets.d.ts.map +1 -0
- package/dist/src/prosody/Presets.js +49 -0
- package/dist/src/prosody/SafetyRails.d.ts +21 -0
- package/dist/src/prosody/SafetyRails.d.ts.map +1 -0
- package/dist/src/prosody/SafetyRails.js +65 -0
- package/dist/src/transformation/FormantStrategyV1.d.ts +5 -0
- package/dist/src/transformation/FormantStrategyV1.d.ts.map +1 -0
- package/dist/src/transformation/FormantStrategyV1.js +39 -0
- package/dist/src/transformation/PitchShifterRefV1.d.ts +9 -0
- package/dist/src/transformation/PitchShifterRefV1.d.ts.map +1 -0
- package/dist/src/transformation/PitchShifterRefV1.js +120 -0
- package/dist/src/tuning/AutotuneExecutor.d.ts +16 -0
- package/dist/src/tuning/AutotuneExecutor.d.ts.map +1 -0
- package/dist/src/tuning/AutotuneExecutor.js +217 -0
- package/dist/src/tuning/CorrectionController.d.ts +5 -0
- package/dist/src/tuning/CorrectionController.d.ts.map +1 -0
- package/dist/src/tuning/CorrectionController.js +91 -0
- package/dist/src/tuning/CorrectionControllerRefV1.d.ts +6 -0
- package/dist/src/tuning/CorrectionControllerRefV1.d.ts.map +1 -0
- package/dist/src/tuning/CorrectionControllerRefV1.js +63 -0
- package/dist/src/tuning/ScaleQuantizer.d.ts +7 -0
- package/dist/src/tuning/ScaleQuantizer.d.ts.map +1 -0
- package/dist/src/tuning/ScaleQuantizer.js +43 -0
- package/dist/src/tuning/StreamingAutotuneEngine.d.ts +43 -0
- package/dist/src/tuning/StreamingAutotuneEngine.d.ts.map +1 -0
- package/dist/src/tuning/StreamingAutotuneEngine.js +389 -0
- package/dist/src/tuning/StreamingAutotuneEngine_Fixed.d.ts +36 -0
- package/dist/src/tuning/StreamingAutotuneEngine_Fixed.d.ts.map +1 -0
- package/dist/src/tuning/StreamingAutotuneEngine_Fixed.js +344 -0
- package/dist/src/tuning/TargetCurveGenerator.d.ts +5 -0
- package/dist/src/tuning/TargetCurveGenerator.d.ts.map +1 -0
- package/dist/src/tuning/TargetCurveGenerator.js +69 -0
- package/dist/src/tuning/TargetCurveRefV1.d.ts +6 -0
- package/dist/src/tuning/TargetCurveRefV1.d.ts.map +1 -0
- package/dist/src/tuning/TargetCurveRefV1.js +69 -0
- package/dist/src/utils/AudioBufferUtils.d.ts +3 -0
- package/dist/src/utils/AudioBufferUtils.d.ts.map +1 -0
- package/dist/src/utils/AudioBufferUtils.js +19 -0
- package/dist/src/version.d.ts +2 -0
- package/dist/src/version.d.ts.map +1 -0
- package/dist/src/version.js +4 -0
- package/package.json +38 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
export declare class StreamingAutotuneEngine {
|
|
2
|
+
private state;
|
|
3
|
+
private config;
|
|
4
|
+
private preset;
|
|
5
|
+
private frameCount;
|
|
6
|
+
private eventScheduler;
|
|
7
|
+
private allowedSet;
|
|
8
|
+
private _lastOutputCents;
|
|
9
|
+
private _mockPitchHz;
|
|
10
|
+
constructor(config: any, preset: any);
|
|
11
|
+
private hzToCents;
|
|
12
|
+
private quantize;
|
|
13
|
+
getLastOutputCents(): number;
|
|
14
|
+
setMockPitch(hz: number): void;
|
|
15
|
+
enqueueEvents(events: any[]): void;
|
|
16
|
+
process(chunk: Float32Array): {
|
|
17
|
+
audio: Float32Array;
|
|
18
|
+
targets: Float32Array;
|
|
19
|
+
};
|
|
20
|
+
processFrame(analysis: {
|
|
21
|
+
energyDb: number;
|
|
22
|
+
confidenceQ: number;
|
|
23
|
+
pitchHz: number;
|
|
24
|
+
}, frameIndex: number): void;
|
|
25
|
+
processFramePipeline(analysis: {
|
|
26
|
+
energyDb: number;
|
|
27
|
+
confidenceQ: number;
|
|
28
|
+
pitchHz: number;
|
|
29
|
+
}, frameIndex: number): void;
|
|
30
|
+
private getCurrentRampValue;
|
|
31
|
+
private handleSegmentStart;
|
|
32
|
+
private handleSegmentEnd;
|
|
33
|
+
reset(): void;
|
|
34
|
+
private createInitialState;
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=StreamingAutotuneEngine_Fixed.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"StreamingAutotuneEngine_Fixed.d.ts","sourceRoot":"","sources":["../../../src/tuning/StreamingAutotuneEngine_Fixed.ts"],"names":[],"mappings":"AAiDA,qBAAa,uBAAuB;IAChC,OAAO,CAAC,KAAK,CAAwB;IACrC,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,UAAU,CAAc;IAChC,OAAO,CAAC,gBAAgB,CAAa;IACrC,OAAO,CAAC,YAAY,CAAa;gBAErB,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG;IAUpC,OAAO,CAAC,SAAS;IAIjB,OAAO,CAAC,QAAQ;IAuBT,kBAAkB,IAAI,MAAM;IAI5B,YAAY,CAAC,EAAE,EAAE,MAAM;IAIvB,aAAa,CAAC,MAAM,EAAE,GAAG,EAAE;IAIlC,OAAO,CAAC,KAAK,EAAE,YAAY,GAAG;QAAE,KAAK,EAAE,YAAY,CAAC;QAAC,OAAO,EAAE,YAAY,CAAA;KAAE;IAsC5E,YAAY,CAAC,QAAQ,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IAI5G,oBAAoB,CAAC,QAAQ,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IAqKpH,OAAO,CAAC,mBAAmB;IAQ3B,OAAO,CAAC,kBAAkB;IAe1B,OAAO,CAAC,gBAAgB;IAOxB,KAAK,IAAI,IAAI;IAIb,OAAO,CAAC,kBAAkB;CAqC7B"}
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.StreamingAutotuneEngine = void 0;
|
|
4
|
+
const OnlineStats_1 = require("../../../voice-engine-core/src/prosody/OnlineStats");
|
|
5
|
+
class EventScheduler {
|
|
6
|
+
queue = [];
|
|
7
|
+
startIndex = 0;
|
|
8
|
+
enqueue(events) {
|
|
9
|
+
this.queue.push(...events);
|
|
10
|
+
this.queue.sort((a, b) => a.time - b.time);
|
|
11
|
+
this.startIndex = 0;
|
|
12
|
+
}
|
|
13
|
+
getActiveEvents(currentTime) {
|
|
14
|
+
const active = [];
|
|
15
|
+
while (this.startIndex < this.queue.length) {
|
|
16
|
+
const e = this.queue[this.startIndex];
|
|
17
|
+
const end = e.endTime !== undefined ? e.endTime : (e.time + (e.duration || 10));
|
|
18
|
+
if (end < currentTime) {
|
|
19
|
+
this.startIndex++;
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
break;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
for (let i = this.startIndex; i < this.queue.length; i++) {
|
|
26
|
+
const e = this.queue[i];
|
|
27
|
+
if (e.endTime !== undefined) {
|
|
28
|
+
if (e.time <= currentTime && e.endTime >= currentTime)
|
|
29
|
+
active.push(e);
|
|
30
|
+
else if (e.time > currentTime)
|
|
31
|
+
break; // Future
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
const limit = e.duration || 10;
|
|
35
|
+
if (Math.abs(e.time - currentTime) <= limit)
|
|
36
|
+
active.push(e);
|
|
37
|
+
else if (e.time - limit > currentTime)
|
|
38
|
+
break; // Future
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return active;
|
|
42
|
+
}
|
|
43
|
+
pruneOldEvents(currentTime) {
|
|
44
|
+
if (this.startIndex > 50) {
|
|
45
|
+
this.queue.splice(0, this.startIndex);
|
|
46
|
+
this.startIndex = 0;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
class StreamingAutotuneEngine {
|
|
51
|
+
state;
|
|
52
|
+
config;
|
|
53
|
+
preset;
|
|
54
|
+
frameCount = 0;
|
|
55
|
+
eventScheduler;
|
|
56
|
+
allowedSet;
|
|
57
|
+
_lastOutputCents = 0;
|
|
58
|
+
_mockPitchHz = 0;
|
|
59
|
+
constructor(config, preset) {
|
|
60
|
+
this.config = config;
|
|
61
|
+
this.preset = preset;
|
|
62
|
+
this.state = this.createInitialState();
|
|
63
|
+
this.eventScheduler = new EventScheduler();
|
|
64
|
+
const allowed = config.allowedPitchClasses || [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
|
|
65
|
+
this.allowedSet = new Set(allowed);
|
|
66
|
+
}
|
|
67
|
+
hzToCents(h) {
|
|
68
|
+
return (h > 1.0) ? (6900 + 1200 * Math.log2(h / 440)) : 0;
|
|
69
|
+
}
|
|
70
|
+
quantize(c) {
|
|
71
|
+
const rootOffset = this.config.rootOffsetCents || 0;
|
|
72
|
+
const local = c - rootOffset;
|
|
73
|
+
const midi = Math.round(local / 100);
|
|
74
|
+
const pc = ((midi % 12) + 12) % 12;
|
|
75
|
+
if (this.allowedSet.has(pc)) {
|
|
76
|
+
return { id: midi, center: midi * 100 + rootOffset };
|
|
77
|
+
}
|
|
78
|
+
for (let i = 1; i <= 6; i++) {
|
|
79
|
+
let m = midi + i;
|
|
80
|
+
let p = ((m % 12) + 12) % 12;
|
|
81
|
+
if (this.allowedSet.has(p))
|
|
82
|
+
return { id: m, center: m * 100 + rootOffset };
|
|
83
|
+
m = midi - i;
|
|
84
|
+
p = ((m % 12) + 12) % 12;
|
|
85
|
+
if (this.allowedSet.has(p))
|
|
86
|
+
return { id: m, center: m * 100 + rootOffset };
|
|
87
|
+
}
|
|
88
|
+
return { id: midi, center: midi * 100 + rootOffset };
|
|
89
|
+
}
|
|
90
|
+
getLastOutputCents() {
|
|
91
|
+
return this._lastOutputCents;
|
|
92
|
+
}
|
|
93
|
+
setMockPitch(hz) {
|
|
94
|
+
this._mockPitchHz = hz;
|
|
95
|
+
}
|
|
96
|
+
enqueueEvents(events) {
|
|
97
|
+
this.eventScheduler.enqueue(events);
|
|
98
|
+
}
|
|
99
|
+
process(chunk) {
|
|
100
|
+
const hopSize = 128;
|
|
101
|
+
const numFrames = Math.floor(chunk.length / hopSize);
|
|
102
|
+
const targets = new Float32Array(numFrames);
|
|
103
|
+
for (let i = 0; i < numFrames; i++) {
|
|
104
|
+
const startSample = i * hopSize;
|
|
105
|
+
// Mock Analysis (to be replaced by StreamingPitchTracker)
|
|
106
|
+
let sumSq = 0;
|
|
107
|
+
for (let j = 0; j < hopSize; j++) {
|
|
108
|
+
// Check bounds
|
|
109
|
+
if (startSample + j < chunk.length) {
|
|
110
|
+
const s = chunk[startSample + j];
|
|
111
|
+
sumSq += s * s;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
const rms = Math.sqrt(sumSq / hopSize);
|
|
115
|
+
const energyDb = rms > 1e-9 ? 20 * Math.log10(rms) : -100;
|
|
116
|
+
// Mock Pitch/Confidence (0 confidence = unvoiced)
|
|
117
|
+
// In real impl, this comes from F0Decomposer
|
|
118
|
+
const isVoiced = energyDb > -50 && this._mockPitchHz > 0;
|
|
119
|
+
const frameAnalysis = {
|
|
120
|
+
energyDb,
|
|
121
|
+
confidenceQ: isVoiced ? 10000 : 0,
|
|
122
|
+
pitchHz: this._mockPitchHz
|
|
123
|
+
};
|
|
124
|
+
// Using the new pipeline
|
|
125
|
+
this.processFramePipeline(frameAnalysis, this.frameCount++);
|
|
126
|
+
targets[i] = this._lastOutputCents;
|
|
127
|
+
}
|
|
128
|
+
return { audio: chunk.slice(), targets: targets };
|
|
129
|
+
}
|
|
130
|
+
processFrame(analysis, frameIndex) {
|
|
131
|
+
this.processFramePipeline(analysis, frameIndex);
|
|
132
|
+
}
|
|
133
|
+
processFramePipeline(analysis, frameIndex) {
|
|
134
|
+
const { energyDb, confidenceQ, pitchHz } = analysis;
|
|
135
|
+
const config = this.config;
|
|
136
|
+
const segmenter = this.state.segmenter;
|
|
137
|
+
// Defaults
|
|
138
|
+
const silenceDb = config.silenceThresholdDb ?? -60;
|
|
139
|
+
const voicingLimit = config.voicingThresholdQ ?? 2000;
|
|
140
|
+
const enterLimit = config.voicedEnterFrames ?? 2;
|
|
141
|
+
const exitLimit = config.voicedExitFrames ?? 5;
|
|
142
|
+
// 1. Input Conditions
|
|
143
|
+
const isSpeechCandidate = energyDb > silenceDb;
|
|
144
|
+
const isVoicedCandidate = isSpeechCandidate && (confidenceQ > voicingLimit);
|
|
145
|
+
// 2. Hysteresis Logic
|
|
146
|
+
if (segmenter.isVoiced) {
|
|
147
|
+
if (!isVoicedCandidate) {
|
|
148
|
+
segmenter.exitCount++;
|
|
149
|
+
if (segmenter.exitCount > exitLimit) {
|
|
150
|
+
this.handleSegmentEnd(frameIndex);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
segmenter.exitCount = 0;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
if (isVoicedCandidate) {
|
|
159
|
+
segmenter.enterCount++;
|
|
160
|
+
if (segmenter.enterCount >= enterLimit) {
|
|
161
|
+
this.handleSegmentStart(frameIndex);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
segmenter.enterCount = 0;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// 3. Process Voiced Frame Pipeline
|
|
169
|
+
if (segmenter.isVoiced) {
|
|
170
|
+
segmenter.accumulatedConf += confidenceQ;
|
|
171
|
+
segmenter.accumulatedEnergy += energyDb;
|
|
172
|
+
// --- Pipeline Step 1: Decomposition ---
|
|
173
|
+
const rawCents = this.hzToCents(pitchHz);
|
|
174
|
+
// For now, assuming raw input IS the decomposition
|
|
175
|
+
const centerCents = rawCents;
|
|
176
|
+
const residualCents = 0;
|
|
177
|
+
// --- Pipeline Step 2: Baseline/Intent ---
|
|
178
|
+
// Update Online Baseline (using Cents for linear regression on pitch)
|
|
179
|
+
OnlineStats_1.OnlineStats.update(this.state.baseline, frameIndex, rawCents);
|
|
180
|
+
const { slope, intercept } = OnlineStats_1.OnlineStats.getRegression(this.state.baseline);
|
|
181
|
+
const intentCents = centerCents;
|
|
182
|
+
// --- Pipeline Step 3: Stability ---
|
|
183
|
+
const stabilizer = this.state.stabilizer;
|
|
184
|
+
const hysteresis = config.hysteresisCents ?? 15;
|
|
185
|
+
const minHoldFrames = config.minHoldFrames ?? 6;
|
|
186
|
+
const rampFrames = config.rampFrames ?? 3;
|
|
187
|
+
// Calc delta/slope for transition detection
|
|
188
|
+
const delta = 0; // TODO: Track previous raw cents
|
|
189
|
+
const isTransition = delta > (config.slopeThreshFrame ?? 5);
|
|
190
|
+
const cand = this.quantize(intentCents);
|
|
191
|
+
// Initialize if first voiced frame logic covered by handleSegmentStart -> default state
|
|
192
|
+
if (stabilizer.currentNoteId === 0) {
|
|
193
|
+
stabilizer.currentNoteId = cand.id;
|
|
194
|
+
stabilizer.lastTargetCents = cand.center;
|
|
195
|
+
stabilizer.holdFrames = 0;
|
|
196
|
+
stabilizer.rampActive = false;
|
|
197
|
+
stabilizer.rampEndCents = cand.center;
|
|
198
|
+
stabilizer.rampStartCents = cand.center;
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
stabilizer.holdFrames++;
|
|
202
|
+
let shouldSwitch = false;
|
|
203
|
+
if (!isTransition && stabilizer.holdFrames >= minHoldFrames) {
|
|
204
|
+
const currentErr = Math.abs(intentCents - stabilizer.lastTargetCents);
|
|
205
|
+
const candErr = Math.abs(intentCents - cand.center);
|
|
206
|
+
if (currentErr - candErr > hysteresis) {
|
|
207
|
+
shouldSwitch = true;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
if (shouldSwitch) {
|
|
211
|
+
stabilizer.rampStartCents = stabilizer.rampActive ?
|
|
212
|
+
this.getCurrentRampValue(stabilizer) : stabilizer.lastTargetCents;
|
|
213
|
+
stabilizer.rampEndCents = cand.center;
|
|
214
|
+
stabilizer.rampActive = true;
|
|
215
|
+
stabilizer.rampProgress = 0;
|
|
216
|
+
stabilizer.currentNoteId = cand.id;
|
|
217
|
+
stabilizer.lastTargetCents = cand.center;
|
|
218
|
+
stabilizer.holdFrames = 0;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// Ramping
|
|
222
|
+
let macroCents = stabilizer.lastTargetCents;
|
|
223
|
+
if (stabilizer.rampActive) {
|
|
224
|
+
stabilizer.rampProgress++;
|
|
225
|
+
macroCents = this.getCurrentRampValue(stabilizer);
|
|
226
|
+
if (stabilizer.rampProgress >= rampFrames) {
|
|
227
|
+
stabilizer.rampActive = false;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
// --- Pipeline Step 4: Events/PFC ---
|
|
231
|
+
this.eventScheduler.pruneOldEvents(frameIndex);
|
|
232
|
+
const activeEvents = this.eventScheduler.getActiveEvents(frameIndex);
|
|
233
|
+
let accentOffset = 0;
|
|
234
|
+
for (const event of activeEvents) {
|
|
235
|
+
const duration = event.duration || 10;
|
|
236
|
+
const strength = event.strength || 0;
|
|
237
|
+
const shape = event.shape || 'rise';
|
|
238
|
+
const radius = duration / 2;
|
|
239
|
+
const d = frameIndex - event.time;
|
|
240
|
+
if (Math.abs(d) <= radius) {
|
|
241
|
+
const sign = (shape === 'fall' || shape === 'fall-rise') ? -1.0 : 1.0;
|
|
242
|
+
const w = 0.5 * (1 + Math.cos((Math.PI * d) / radius));
|
|
243
|
+
accentOffset += w * strength * sign; // Simple additive
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
// Update PFC State
|
|
247
|
+
const pfc = this.state.pfc;
|
|
248
|
+
// Track max accent strength in recent window (simple approach)
|
|
249
|
+
const currentAbsAccent = Math.abs(accentOffset);
|
|
250
|
+
if (currentAbsAccent > 0.01) {
|
|
251
|
+
if (currentAbsAccent > pfc.focusStrength) {
|
|
252
|
+
pfc.focusStrength = currentAbsAccent;
|
|
253
|
+
pfc.focusTime = frameIndex;
|
|
254
|
+
}
|
|
255
|
+
pfc.activeFade = 1.0;
|
|
256
|
+
}
|
|
257
|
+
else {
|
|
258
|
+
pfc.activeFade *= 0.95; // Decay
|
|
259
|
+
pfc.focusStrength *= 0.95;
|
|
260
|
+
}
|
|
261
|
+
// --- Pipeline Step 5: Reconstruct ---
|
|
262
|
+
let finalCents = macroCents + accentOffset + residualCents;
|
|
263
|
+
// Apply PFC
|
|
264
|
+
if (this.state.pfc.activeFade > 0) {
|
|
265
|
+
const { slope, intercept } = OnlineStats_1.OnlineStats.getRegression(this.state.baseline);
|
|
266
|
+
const baselineAtT = intercept + slope * frameIndex;
|
|
267
|
+
const compressionStrength = this.config.pfcStrength ?? 0.5;
|
|
268
|
+
const factor = this.state.pfc.activeFade * compressionStrength;
|
|
269
|
+
if (!isNaN(intercept)) {
|
|
270
|
+
finalCents = baselineAtT + (finalCents - baselineAtT) * (1.0 - factor);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
this._lastOutputCents = finalCents;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
getCurrentRampValue(stabilizer) {
|
|
277
|
+
// Simple linear interpolation
|
|
278
|
+
const total = this.config.rampFrames ?? 3;
|
|
279
|
+
if (stabilizer.rampProgress >= total)
|
|
280
|
+
return stabilizer.rampEndCents;
|
|
281
|
+
const t = stabilizer.rampProgress / total;
|
|
282
|
+
return stabilizer.rampStartCents + (stabilizer.rampEndCents - stabilizer.rampStartCents) * t;
|
|
283
|
+
}
|
|
284
|
+
handleSegmentStart(index) {
|
|
285
|
+
const segmenter = this.state.segmenter;
|
|
286
|
+
segmenter.isVoiced = true;
|
|
287
|
+
segmenter.enterCount = 0;
|
|
288
|
+
segmenter.exitCount = 0;
|
|
289
|
+
segmenter.currentSegmentStart = index;
|
|
290
|
+
segmenter.accumulatedConf = 0;
|
|
291
|
+
segmenter.accumulatedEnergy = 0;
|
|
292
|
+
// Reset Stabilizer
|
|
293
|
+
this.state.stabilizer.currentNoteId = 0;
|
|
294
|
+
this.state.stabilizer.holdFrames = 0;
|
|
295
|
+
this.state.stabilizer.rampActive = false;
|
|
296
|
+
}
|
|
297
|
+
handleSegmentEnd(index) {
|
|
298
|
+
const segmenter = this.state.segmenter;
|
|
299
|
+
segmenter.isVoiced = false;
|
|
300
|
+
segmenter.exitCount = 0;
|
|
301
|
+
segmenter.enterCount = 0;
|
|
302
|
+
}
|
|
303
|
+
reset() {
|
|
304
|
+
this.state = this.createInitialState();
|
|
305
|
+
}
|
|
306
|
+
createInitialState() {
|
|
307
|
+
return {
|
|
308
|
+
segmenter: {
|
|
309
|
+
isVoiced: false,
|
|
310
|
+
enterCount: 0,
|
|
311
|
+
exitCount: 0,
|
|
312
|
+
currentSegmentStart: 0,
|
|
313
|
+
accumulatedConf: 0,
|
|
314
|
+
accumulatedEnergy: 0
|
|
315
|
+
},
|
|
316
|
+
decomposer: {
|
|
317
|
+
buffer: new Float32Array(0),
|
|
318
|
+
microState: undefined
|
|
319
|
+
},
|
|
320
|
+
baseline: {
|
|
321
|
+
sumX: 0,
|
|
322
|
+
sumY: 0,
|
|
323
|
+
sumXY: 0,
|
|
324
|
+
sumXX: 0,
|
|
325
|
+
count: 0
|
|
326
|
+
},
|
|
327
|
+
stabilizer: {
|
|
328
|
+
currentNoteId: 0,
|
|
329
|
+
lastTargetCents: 0,
|
|
330
|
+
holdFrames: 0,
|
|
331
|
+
rampActive: false,
|
|
332
|
+
rampStartCents: 0,
|
|
333
|
+
rampEndCents: 0,
|
|
334
|
+
rampProgress: 0
|
|
335
|
+
},
|
|
336
|
+
pfc: {
|
|
337
|
+
focusTime: null,
|
|
338
|
+
focusStrength: 0,
|
|
339
|
+
activeFade: 0
|
|
340
|
+
}
|
|
341
|
+
};
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
exports.StreamingAutotuneEngine = StreamingAutotuneEngine;
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { F0TrackV1, VoicingMaskV1, TunePlanV1, TargetCurveV1 } from "@mcptoolshop/voice-engine-core";
|
|
2
|
+
export declare class TargetCurveGenerator {
|
|
3
|
+
generate(f0: F0TrackV1, voicing: VoicingMaskV1, plan: TunePlanV1): TargetCurveV1;
|
|
4
|
+
}
|
|
5
|
+
//# sourceMappingURL=TargetCurveGenerator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TargetCurveGenerator.d.ts","sourceRoot":"","sources":["../../../src/tuning/TargetCurveGenerator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAGrG,qBAAa,oBAAoB;IAC7B,QAAQ,CACJ,EAAE,EAAE,SAAS,EACb,OAAO,EAAE,aAAa,EACtB,IAAI,EAAE,UAAU,GACjB,aAAa;CAsEnB"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TargetCurveGenerator = void 0;
|
|
4
|
+
const ScaleQuantizer_js_1 = require("./ScaleQuantizer.js");
|
|
5
|
+
class TargetCurveGenerator {
|
|
6
|
+
generate(f0, voicing, plan) {
|
|
7
|
+
const len = f0.f0MhzQ.length;
|
|
8
|
+
const targetCentsQ = new Int32Array(len);
|
|
9
|
+
const allowed = plan.scaleConfig.allowedPitchClasses;
|
|
10
|
+
const glideMs = plan.parameters.glideMsQ;
|
|
11
|
+
const dtMs = (f0.hopSamples / f0.sampleRateHz) * 1000;
|
|
12
|
+
// Glide Factor (1-pole exponential approach)
|
|
13
|
+
// alpha = 1 - exp(-dt / tau)
|
|
14
|
+
// usage: y = y + alpha * (target - y)
|
|
15
|
+
// If glideMs (tau) is 0, alpha = 1.
|
|
16
|
+
let alpha = 1.0;
|
|
17
|
+
if (glideMs > 0) {
|
|
18
|
+
alpha = 1.0 - Math.exp(-dtMs / Math.max(1, glideMs));
|
|
19
|
+
}
|
|
20
|
+
let currentCents = 0;
|
|
21
|
+
let p_initialized = false;
|
|
22
|
+
for (let i = 0; i < len; i++) {
|
|
23
|
+
// 1. Get Input Pitch
|
|
24
|
+
const isVoiced = voicing.voicedQ[i] > 0;
|
|
25
|
+
if (!isVoiced) {
|
|
26
|
+
// If not initialized, default to something sane (e.g. 6900)
|
|
27
|
+
// But generally hold last value.
|
|
28
|
+
if (!p_initialized) {
|
|
29
|
+
currentCents = 6900;
|
|
30
|
+
p_initialized = true;
|
|
31
|
+
}
|
|
32
|
+
// Hold last
|
|
33
|
+
targetCentsQ[i] = Math.round(currentCents * 1000);
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
let f0Hz = f0.f0MhzQ[i] / 1000.0;
|
|
37
|
+
// Sanity clamp
|
|
38
|
+
if (f0Hz < 20)
|
|
39
|
+
f0Hz = 20;
|
|
40
|
+
// MIDI 69 = A4 440Hz = 6900 cents
|
|
41
|
+
// Cents = 6900 + 1200 * log2(Hz/440)
|
|
42
|
+
const inputCents = 6900 + 1200 * Math.log2(f0Hz / 440);
|
|
43
|
+
// 2. Quantize
|
|
44
|
+
const targetNote = ScaleQuantizer_js_1.ScaleQuantizer.quantize(inputCents, allowed);
|
|
45
|
+
// 3. Glide
|
|
46
|
+
if (!p_initialized) {
|
|
47
|
+
currentCents = targetNote;
|
|
48
|
+
p_initialized = true;
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
if (alpha >= 0.999) {
|
|
52
|
+
currentCents = targetNote;
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
currentCents += alpha * (targetNote - currentCents);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
targetCentsQ[i] = Math.round(currentCents * 1000);
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
sampleRateHz: f0.sampleRateHz,
|
|
62
|
+
frameHz: f0.frameHz,
|
|
63
|
+
hopSamples: f0.hopSamples,
|
|
64
|
+
t0Samples: f0.t0Samples,
|
|
65
|
+
targetCentsQ
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
exports.TargetCurveGenerator = TargetCurveGenerator;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { F0TrackV1, VoicingMaskV1, TuneScoreV1, TargetCurveV1 } from "@mcptoolshop/voice-engine-core";
|
|
2
|
+
export declare class TargetCurveRefV1 {
|
|
3
|
+
constructor();
|
|
4
|
+
generate(f0Track: F0TrackV1, voicing: VoicingMaskV1, score: TuneScoreV1): TargetCurveV1;
|
|
5
|
+
}
|
|
6
|
+
//# sourceMappingURL=TargetCurveRefV1.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TargetCurveRefV1.d.ts","sourceRoot":"","sources":["../../../src/tuning/TargetCurveRefV1.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,SAAS,EAAE,aAAa,EAAE,WAAW,EAAE,aAAa,EAGvD,MAAM,gCAAgC,CAAC;AAExC,qBAAa,gBAAgB;;IAGlB,QAAQ,CACX,OAAO,EAAE,SAAS,EAClB,OAAO,EAAE,aAAa,EACtB,KAAK,EAAE,WAAW,GACnB,aAAa;CA4EnB"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TargetCurveRefV1 = void 0;
|
|
4
|
+
const voice_engine_core_1 = require("@mcptoolshop/voice-engine-core");
|
|
5
|
+
class TargetCurveRefV1 {
|
|
6
|
+
constructor() { }
|
|
7
|
+
generate(f0Track, voicing, score) {
|
|
8
|
+
// Validate inputs match in length/framing
|
|
9
|
+
const numFrames = f0Track.f0MhzQ.length;
|
|
10
|
+
const targetCentsQ = new Int32Array(numFrames);
|
|
11
|
+
// Parse Score
|
|
12
|
+
if (score.mode !== "scale") {
|
|
13
|
+
throw new Error("Only scale mode supported in V1");
|
|
14
|
+
}
|
|
15
|
+
const config = score;
|
|
16
|
+
const { tonicMidi } = (0, voice_engine_core_1.parseKey)(config.key);
|
|
17
|
+
const pitchClasses = (0, voice_engine_core_1.scaleToPitchClasses)(config.scale, tonicMidi);
|
|
18
|
+
// Glide Logic
|
|
19
|
+
const msPerFrame = 1000 / f0Track.frameHz;
|
|
20
|
+
const glideFrames = config.glideMsQ > 0 ? config.glideMsQ / msPerFrame : 0;
|
|
21
|
+
// Constant-Time Exponential Glide (1-pole IIR)
|
|
22
|
+
// k = 1 / (glideFrames + 1)
|
|
23
|
+
const k = glideFrames > 0 ? (1 / (glideFrames + 1)) : 1;
|
|
24
|
+
let currentCentsQ = 0; // State
|
|
25
|
+
let initialized = false;
|
|
26
|
+
for (let i = 0; i < numFrames; i++) {
|
|
27
|
+
const isVoiced = voicing.voicedQ[i] > 0;
|
|
28
|
+
if (!isVoiced) {
|
|
29
|
+
// If unvoiced, hold previous value.
|
|
30
|
+
// If never initialized, default to tonic.
|
|
31
|
+
if (!initialized) {
|
|
32
|
+
currentCentsQ = tonicMidi * 1000;
|
|
33
|
+
initialized = true;
|
|
34
|
+
}
|
|
35
|
+
targetCentsQ[i] = Math.round(currentCentsQ);
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
// 1. Measure F0 -> Cents
|
|
39
|
+
const f0Mhz = f0Track.f0MhzQ[i];
|
|
40
|
+
const f0Hz = f0Mhz / 1000;
|
|
41
|
+
// Safety check for invalid f0
|
|
42
|
+
if (f0Hz <= 1) { // Basic safety floor
|
|
43
|
+
targetCentsQ[i] = Math.round(currentCentsQ);
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
// Standard MIDI Cents: 69000 + 12000 * log2(f / 440)
|
|
47
|
+
const centsQ = 69000 + 12000 * Math.log2(f0Hz / 440);
|
|
48
|
+
// 2. Quantize
|
|
49
|
+
const goalCentsQ = (0, voice_engine_core_1.nearestAllowedPitch)(Math.round(centsQ), pitchClasses);
|
|
50
|
+
// 3. Initialize if needed
|
|
51
|
+
if (!initialized) {
|
|
52
|
+
currentCentsQ = goalCentsQ;
|
|
53
|
+
initialized = true;
|
|
54
|
+
}
|
|
55
|
+
// 4. Glide
|
|
56
|
+
currentCentsQ += (goalCentsQ - currentCentsQ) * k;
|
|
57
|
+
// 5. Store
|
|
58
|
+
targetCentsQ[i] = Math.round(currentCentsQ);
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
sampleRateHz: f0Track.sampleRateHz,
|
|
62
|
+
frameHz: f0Track.frameHz,
|
|
63
|
+
hopSamples: f0Track.hopSamples,
|
|
64
|
+
t0Samples: f0Track.t0Samples,
|
|
65
|
+
targetCentsQ
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
exports.TargetCurveRefV1 = TargetCurveRefV1;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AudioBufferUtils.d.ts","sourceRoot":"","sources":["../../../src/utils/AudioBufferUtils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAE/D,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,GAAG,YAAY,CAkB/D"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.monoDownmix = monoDownmix;
|
|
4
|
+
function monoDownmix(buffer) {
|
|
5
|
+
if (buffer.channels === 1) {
|
|
6
|
+
return buffer.data[0];
|
|
7
|
+
}
|
|
8
|
+
const length = buffer.data[0].length;
|
|
9
|
+
const output = new Float32Array(length);
|
|
10
|
+
const channelCount = buffer.channels;
|
|
11
|
+
for (let i = 0; i < length; i++) {
|
|
12
|
+
let sum = 0;
|
|
13
|
+
for (let ch = 0; ch < channelCount; ch++) {
|
|
14
|
+
sum += buffer.data[ch][i];
|
|
15
|
+
}
|
|
16
|
+
output[i] = sum / channelCount;
|
|
17
|
+
}
|
|
18
|
+
return output;
|
|
19
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"version.d.ts","sourceRoot":"","sources":["../../src/version.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,mBAAmB,UAAU,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mcptoolshop/voice-engine-dsp",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"main": "dist/src/index.js",
|
|
5
|
+
"types": "dist/src/index.d.ts",
|
|
6
|
+
"files": [
|
|
7
|
+
"dist/src",
|
|
8
|
+
"README.md",
|
|
9
|
+
"LICENSE"
|
|
10
|
+
],
|
|
11
|
+
"publishConfig": {
|
|
12
|
+
"access": "public"
|
|
13
|
+
},
|
|
14
|
+
"repository": {
|
|
15
|
+
"type": "git",
|
|
16
|
+
"url": "git+https://github.com/mcp-tool-shop-org/mcp-voice-engine.git"
|
|
17
|
+
},
|
|
18
|
+
"keywords": [
|
|
19
|
+
"voice",
|
|
20
|
+
"prosody",
|
|
21
|
+
"autotune",
|
|
22
|
+
"streaming",
|
|
23
|
+
"dsp",
|
|
24
|
+
"mcp"
|
|
25
|
+
],
|
|
26
|
+
"author": "MCP Voice Engine Maintainers",
|
|
27
|
+
"license": "MIT",
|
|
28
|
+
"scripts": {
|
|
29
|
+
"test": "npx vitest run",
|
|
30
|
+
"test:meaning": "npx vitest run test/streaming_golden.test.ts",
|
|
31
|
+
"test:determinism": "npx vitest run test/determinism_matrix.test.ts",
|
|
32
|
+
"bench:rtf": "npx vitest run test/rtf_benchmark.test.ts",
|
|
33
|
+
"smoke": "npx tsx tools/smoke-cli.ts"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@mcptoolshop/voice-engine-core": "0.0.1"
|
|
37
|
+
}
|
|
38
|
+
}
|