@mcptoolshop/voice-engine-dsp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +78 -0
- package/dist/src/adapters/AudioWorkletProcessor.d.ts +31 -0
- package/dist/src/adapters/AudioWorkletProcessor.d.ts.map +1 -0
- package/dist/src/adapters/AudioWorkletProcessor.js +77 -0
- package/dist/src/adapters/NodeStreamAutotune.d.ts +28 -0
- package/dist/src/adapters/NodeStreamAutotune.d.ts.map +1 -0
- package/dist/src/adapters/NodeStreamAutotune.js +103 -0
- package/dist/src/analysis/PitchTrackerRefV1.d.ts +13 -0
- package/dist/src/analysis/PitchTrackerRefV1.d.ts.map +1 -0
- package/dist/src/analysis/PitchTrackerRefV1.js +136 -0
- package/dist/src/analysis/VoicingDetectorRefV1.d.ts +13 -0
- package/dist/src/analysis/VoicingDetectorRefV1.d.ts.map +1 -0
- package/dist/src/analysis/VoicingDetectorRefV1.js +77 -0
- package/dist/src/index.d.ts +8 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +22 -0
- package/dist/src/prosody/AccentRenderer.d.ts +15 -0
- package/dist/src/prosody/AccentRenderer.d.ts.map +1 -0
- package/dist/src/prosody/AccentRenderer.js +66 -0
- package/dist/src/prosody/Presets.d.ts +3 -0
- package/dist/src/prosody/Presets.d.ts.map +1 -0
- package/dist/src/prosody/Presets.js +49 -0
- package/dist/src/prosody/SafetyRails.d.ts +21 -0
- package/dist/src/prosody/SafetyRails.d.ts.map +1 -0
- package/dist/src/prosody/SafetyRails.js +65 -0
- package/dist/src/transformation/FormantStrategyV1.d.ts +5 -0
- package/dist/src/transformation/FormantStrategyV1.d.ts.map +1 -0
- package/dist/src/transformation/FormantStrategyV1.js +39 -0
- package/dist/src/transformation/PitchShifterRefV1.d.ts +9 -0
- package/dist/src/transformation/PitchShifterRefV1.d.ts.map +1 -0
- package/dist/src/transformation/PitchShifterRefV1.js +120 -0
- package/dist/src/tuning/AutotuneExecutor.d.ts +16 -0
- package/dist/src/tuning/AutotuneExecutor.d.ts.map +1 -0
- package/dist/src/tuning/AutotuneExecutor.js +217 -0
- package/dist/src/tuning/CorrectionController.d.ts +5 -0
- package/dist/src/tuning/CorrectionController.d.ts.map +1 -0
- package/dist/src/tuning/CorrectionController.js +91 -0
- package/dist/src/tuning/CorrectionControllerRefV1.d.ts +6 -0
- package/dist/src/tuning/CorrectionControllerRefV1.d.ts.map +1 -0
- package/dist/src/tuning/CorrectionControllerRefV1.js +63 -0
- package/dist/src/tuning/ScaleQuantizer.d.ts +7 -0
- package/dist/src/tuning/ScaleQuantizer.d.ts.map +1 -0
- package/dist/src/tuning/ScaleQuantizer.js +43 -0
- package/dist/src/tuning/StreamingAutotuneEngine.d.ts +43 -0
- package/dist/src/tuning/StreamingAutotuneEngine.d.ts.map +1 -0
- package/dist/src/tuning/StreamingAutotuneEngine.js +389 -0
- package/dist/src/tuning/StreamingAutotuneEngine_Fixed.d.ts +36 -0
- package/dist/src/tuning/StreamingAutotuneEngine_Fixed.d.ts.map +1 -0
- package/dist/src/tuning/StreamingAutotuneEngine_Fixed.js +344 -0
- package/dist/src/tuning/TargetCurveGenerator.d.ts +5 -0
- package/dist/src/tuning/TargetCurveGenerator.d.ts.map +1 -0
- package/dist/src/tuning/TargetCurveGenerator.js +69 -0
- package/dist/src/tuning/TargetCurveRefV1.d.ts +6 -0
- package/dist/src/tuning/TargetCurveRefV1.d.ts.map +1 -0
- package/dist/src/tuning/TargetCurveRefV1.js +69 -0
- package/dist/src/utils/AudioBufferUtils.d.ts +3 -0
- package/dist/src/utils/AudioBufferUtils.d.ts.map +1 -0
- package/dist/src/utils/AudioBufferUtils.js +19 -0
- package/dist/src/version.d.ts +2 -0
- package/dist/src/version.d.ts.map +1 -0
- package/dist/src/version.js +4 -0
- package/package.json +38 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AccentRenderer = void 0;
|
|
4
|
+
class AccentRenderer {
|
|
5
|
+
/**
|
|
6
|
+
* Renders prosodic accents into a per-frame control curve.
|
|
7
|
+
* Uses a raised cosine window for smooth parameter modulation.
|
|
8
|
+
*
|
|
9
|
+
* @param events List of prosody events to render
|
|
10
|
+
* @param totalFrames Total number of frames in the output buffer
|
|
11
|
+
* @param style Prosody style configuration
|
|
12
|
+
* @param frameRateHz Frame rate for time conversions (default 100)
|
|
13
|
+
* @returns Float32Array of rendered values (additive relative cents)
|
|
14
|
+
*/
|
|
15
|
+
static render(events, totalFrames, style, frameRateHz = 100) {
|
|
16
|
+
const output = new Float32Array(totalFrames);
|
|
17
|
+
const { accentMaxCents, accentSpanSeconds, eventStrengthScale } = style;
|
|
18
|
+
// Convert seconds to frames
|
|
19
|
+
const defaultDurationFrames = Math.round(accentSpanSeconds * frameRateHz);
|
|
20
|
+
for (const event of events) {
|
|
21
|
+
// Only process accent events
|
|
22
|
+
if (event.type !== 'accent')
|
|
23
|
+
continue;
|
|
24
|
+
const time = event.time;
|
|
25
|
+
const strength = event.strength;
|
|
26
|
+
const shape = event.shape || 'rise';
|
|
27
|
+
// Use event duration if provided, else default from style
|
|
28
|
+
let duration = event.spanFrames;
|
|
29
|
+
if (!duration || duration <= 0) {
|
|
30
|
+
duration = defaultDurationFrames;
|
|
31
|
+
}
|
|
32
|
+
// Calculate peak cents: normalized strength * global scale * max cents
|
|
33
|
+
const peakCents = strength * eventStrengthScale * accentMaxCents;
|
|
34
|
+
const radius = duration / 2;
|
|
35
|
+
// Determine range of frames to process
|
|
36
|
+
const startFrame = Math.ceil(time - radius);
|
|
37
|
+
const endFrame = Math.floor(time + radius);
|
|
38
|
+
// Clamp to valid buffer range
|
|
39
|
+
const validStart = Math.max(0, startFrame);
|
|
40
|
+
const validEnd = Math.min(totalFrames - 1, endFrame);
|
|
41
|
+
// Determine sign based on shape
|
|
42
|
+
let sign = 1.0;
|
|
43
|
+
if (shape === 'fall' || shape === 'fall-rise') {
|
|
44
|
+
sign = -1.0;
|
|
45
|
+
}
|
|
46
|
+
// For complex shapes like fall-rise or rise-fall, simple sign flip might not be enough.
|
|
47
|
+
// But preserving existing logic for now:
|
|
48
|
+
// Existing logic: if fall or fall-rise, sign = -1.0.
|
|
49
|
+
for (let i = validStart; i <= validEnd; i++) {
|
|
50
|
+
const d = i - time;
|
|
51
|
+
if (radius > 0) {
|
|
52
|
+
// Kernel: 0.5 * (1 + cos(pi * d / radius))
|
|
53
|
+
// This creates a window from -radius to +radius
|
|
54
|
+
// Check if d is within radius (it should be given loop range generally but good to verify)
|
|
55
|
+
if (Math.abs(d) <= radius) {
|
|
56
|
+
const ratio = d / radius;
|
|
57
|
+
const w = 0.5 * (1 + Math.cos(Math.PI * ratio));
|
|
58
|
+
output[i] += w * peakCents * sign;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return output;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
exports.AccentRenderer = AccentRenderer;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Presets.d.ts","sourceRoot":"","sources":["../../../src/prosody/Presets.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAqD,MAAM,qDAAqD,CAAC;AAsBzI,eAAO,MAAM,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAqDnD,CAAC"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PRESETS = void 0;
|
|
4
|
+
const ProsodyPresets_js_1 = require("../../../voice-engine-core/src/config/ProsodyPresets.js");
|
|
5
|
+
function createPreset(id, name, desc, overrides = {}) {
|
|
6
|
+
return {
|
|
7
|
+
id,
|
|
8
|
+
name,
|
|
9
|
+
description: desc,
|
|
10
|
+
analysis: { ...ProsodyPresets_js_1.DEFAULT_PROSODY_CONFIG_V1, ...(overrides.analysis || {}) },
|
|
11
|
+
stabilizer: { ...ProsodyPresets_js_1.DEFAULT_STABILIZER_CONFIG_V1, ...(overrides.stabilizer || {}) },
|
|
12
|
+
tuning: { ...ProsodyPresets_js_1.DEFAULT_TUNING_CONFIG_V1, ...(overrides.tuning || {}) },
|
|
13
|
+
correctionStrength: overrides.correctionStrength ?? 1.0,
|
|
14
|
+
attackMs: overrides.attackMs ?? 20,
|
|
15
|
+
releaseMs: overrides.releaseMs ?? 100
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
exports.PRESETS = {
|
|
19
|
+
DEFAULT_CLEAN: createPreset("default_clean", "Default Clean", "Balanced correction for clean speech", {
|
|
20
|
+
stabilizer: {
|
|
21
|
+
hysteresisCents: 15,
|
|
22
|
+
switchRampMs: 30
|
|
23
|
+
}
|
|
24
|
+
} // Uses defaults mostly
|
|
25
|
+
),
|
|
26
|
+
HARD_TUNE: createPreset("hard_tune", "Hard Tune", "Zero hysteresis, fast ramp for maximum robotic effect", {
|
|
27
|
+
stabilizer: {
|
|
28
|
+
hysteresisCents: 0, // Note: SafetyRails may clamp this to 5
|
|
29
|
+
switchRampMs: 5, // Fast ramp
|
|
30
|
+
minHoldMs: 10 // Short hold
|
|
31
|
+
},
|
|
32
|
+
correctionStrength: 1.0,
|
|
33
|
+
attackMs: 5,
|
|
34
|
+
releaseMs: 5
|
|
35
|
+
}),
|
|
36
|
+
NO_WARBLE: createPreset("no_warble", "No Warble", "High hysteresis and slow ramp to prevent artifacts", {
|
|
37
|
+
stabilizer: {
|
|
38
|
+
hysteresisCents: 25,
|
|
39
|
+
switchRampMs: 100, // "Slow ramp" - interpreted as 100ms (approx 10 frames @ 100Hz)
|
|
40
|
+
minHoldMs: 100
|
|
41
|
+
},
|
|
42
|
+
analysis: {
|
|
43
|
+
voicingThresholdQ: 3000 // Conservative voicing (higher confidence required)
|
|
44
|
+
}
|
|
45
|
+
}),
|
|
46
|
+
SUBTLE: createPreset("subtle", "Subtle", "Low correction strength for natural enhancement", {
|
|
47
|
+
correctionStrength: 0.3
|
|
48
|
+
})
|
|
49
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { ProsodyPresetV1 } from "../../../voice-engine-core/src/prosody/ProsodyV1.js";
|
|
2
|
+
/**
|
|
3
|
+
* Validates and clamps a Prosody configuration to safe operating ranges.
|
|
4
|
+
* This prevents configurations that might cause severe audio artifacts
|
|
5
|
+
* (like rapid switching/warble due to 0 hysteresis or extreme sensitivity).
|
|
6
|
+
*
|
|
7
|
+
* @param config The configuration to validate
|
|
8
|
+
* @returns A new configuration object with clamped values
|
|
9
|
+
*/
|
|
10
|
+
export declare function validateAndClampConfig(config: ProsodyPresetV1): ProsodyPresetV1;
|
|
11
|
+
/**
|
|
12
|
+
* Applies an expressiveness scale factor to the configuration.
|
|
13
|
+
*
|
|
14
|
+
* @param config The base configuration
|
|
15
|
+
* @param amount Amount of expressiveness (0.0 to 1.0).
|
|
16
|
+
* 0.0 = Robotic / Strict (Full Correction)
|
|
17
|
+
* 1.0 = Expressive / Natural (Zero Correction or reduced strength)
|
|
18
|
+
* @returns Modified configuration
|
|
19
|
+
*/
|
|
20
|
+
export declare function applyExpressiveness(config: ProsodyPresetV1, amount: number): ProsodyPresetV1;
|
|
21
|
+
//# sourceMappingURL=SafetyRails.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SafetyRails.d.ts","sourceRoot":"","sources":["../../../src/prosody/SafetyRails.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,qDAAqD,CAAC;AAEtF;;;;;;;GAOG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,eAAe,GAAG,eAAe,CAoC/E;AAED;;;;;;;;GAQG;AACH,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,eAAe,EAAE,MAAM,EAAE,MAAM,GAAG,eAAe,CAe5F"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.validateAndClampConfig = validateAndClampConfig;
|
|
4
|
+
exports.applyExpressiveness = applyExpressiveness;
|
|
5
|
+
/**
|
|
6
|
+
* Validates and clamps a Prosody configuration to safe operating ranges.
|
|
7
|
+
* This prevents configurations that might cause severe audio artifacts
|
|
8
|
+
* (like rapid switching/warble due to 0 hysteresis or extreme sensitivity).
|
|
9
|
+
*
|
|
10
|
+
* @param config The configuration to validate
|
|
11
|
+
* @returns A new configuration object with clamped values
|
|
12
|
+
*/
|
|
13
|
+
function validateAndClampConfig(config) {
|
|
14
|
+
const clamped = { ...config }; // Shallow copy is enough for top-level, but we need deep for nested
|
|
15
|
+
// Deep copy specific sections we modify
|
|
16
|
+
if (config.stabilizer) {
|
|
17
|
+
clamped.stabilizer = { ...config.stabilizer };
|
|
18
|
+
}
|
|
19
|
+
if (config.analysis) {
|
|
20
|
+
clamped.analysis = { ...config.analysis };
|
|
21
|
+
}
|
|
22
|
+
// Ensure min hysteresisCents >= 5
|
|
23
|
+
// Hysteresis < 5 cents can cause rapid oscillation between pitch classes (warble)
|
|
24
|
+
if (clamped.stabilizer) {
|
|
25
|
+
const currentHysteresis = clamped.stabilizer.hysteresisCents ?? 15;
|
|
26
|
+
if (currentHysteresis < 5) {
|
|
27
|
+
clamped.stabilizer.hysteresisCents = 5;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
// Ensure voicingThresholdQ is reasonable
|
|
31
|
+
// Range: 0 to 10000.
|
|
32
|
+
// < 100 implies almost everything is voiced (noise artifacts).
|
|
33
|
+
// > 9000 implies almost nothing is voiced.
|
|
34
|
+
if (clamped.analysis) {
|
|
35
|
+
let thresh = clamped.analysis.voicingThresholdQ;
|
|
36
|
+
if (thresh === undefined) {
|
|
37
|
+
// Default if missing
|
|
38
|
+
thresh = 2000;
|
|
39
|
+
}
|
|
40
|
+
// Clamp to [100, 9000]
|
|
41
|
+
clamped.analysis.voicingThresholdQ = Math.max(100, Math.min(9000, thresh));
|
|
42
|
+
}
|
|
43
|
+
return clamped;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Applies an expressiveness scale factor to the configuration.
|
|
47
|
+
*
|
|
48
|
+
* @param config The base configuration
|
|
49
|
+
* @param amount Amount of expressiveness (0.0 to 1.0).
|
|
50
|
+
* 0.0 = Robotic / Strict (Full Correction)
|
|
51
|
+
* 1.0 = Expressive / Natural (Zero Correction or reduced strength)
|
|
52
|
+
* @returns Modified configuration
|
|
53
|
+
*/
|
|
54
|
+
function applyExpressiveness(config, amount) {
|
|
55
|
+
const modified = { ...config };
|
|
56
|
+
// Clamp amount 0..1
|
|
57
|
+
const safeAmount = Math.max(0, Math.min(1, amount));
|
|
58
|
+
// Interpretation: "Expressiveness" reduces the correction strength.
|
|
59
|
+
// Base strength is scaled down by the expressiveness amount.
|
|
60
|
+
// If amount is 0 (No Expressiveness), we keep full strength.
|
|
61
|
+
// If amount is 1 (Full Expressiveness), we reduce strength to 0.
|
|
62
|
+
const baseStrength = config.correctionStrength ?? 1.0;
|
|
63
|
+
modified.correctionStrength = baseStrength * (1.0 - safeAmount);
|
|
64
|
+
return modified;
|
|
65
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { IFormantStrategy, AudioBufferV1 } from "@mcptoolshop/voice-engine-core";
|
|
2
|
+
export declare class FormantStrategyV1 implements IFormantStrategy {
|
|
3
|
+
apply(tuned: AudioBufferV1, original: AudioBufferV1): Promise<AudioBufferV1>;
|
|
4
|
+
}
|
|
5
|
+
//# sourceMappingURL=FormantStrategyV1.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"FormantStrategyV1.d.ts","sourceRoot":"","sources":["../../../src/transformation/FormantStrategyV1.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAEjF,qBAAa,iBAAkB,YAAW,gBAAgB;IAChD,KAAK,CAAC,KAAK,EAAE,aAAa,EAAE,QAAQ,EAAE,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC;CAsCrF"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.FormantStrategyV1 = void 0;
|
|
4
|
+
class FormantStrategyV1 {
|
|
5
|
+
async apply(tuned, original) {
|
|
6
|
+
const outData = new Float32Array(tuned.data[0]);
|
|
7
|
+
// const origData = original.data[0];
|
|
8
|
+
// 1. Spectral Tilt / Dynamics Guard (RMS Matching)
|
|
9
|
+
// Simple global RMS match for V1 to prevent massive gain changes
|
|
10
|
+
// Or frame-based? Global is safer.
|
|
11
|
+
/*
|
|
12
|
+
let sumSqTuned = 0;
|
|
13
|
+
let sumSqOrig = 0;
|
|
14
|
+
for (let i = 0; i < outData.length; i++) sumSqTuned += outData[i] * outData[i];
|
|
15
|
+
for (let i = 0; i < origData.length; i++) sumSqOrig += origData[i] * origData[i];
|
|
16
|
+
|
|
17
|
+
const rmsGain = Math.sqrt((sumSqOrig + 1e-9) / (sumSqTuned + 1e-9));
|
|
18
|
+
// Apply Gain (Softly)
|
|
19
|
+
for (let i = 0; i < outData.length; i++) outData[i] *= rmsGain;
|
|
20
|
+
*/
|
|
21
|
+
// 2. Artifact Guard (Limiter / Soft Clip)
|
|
22
|
+
// Soft clip: tanh
|
|
23
|
+
for (let i = 0; i < outData.length; i++) {
|
|
24
|
+
let s = outData[i];
|
|
25
|
+
// Soft Clip
|
|
26
|
+
if (s > 1.0 || s < -1.0) {
|
|
27
|
+
s = Math.tanh(s);
|
|
28
|
+
}
|
|
29
|
+
// De-click / Smoothing (Simple Lowpass)?
|
|
30
|
+
// No, strictly limiter for V1 safety.
|
|
31
|
+
outData[i] = s;
|
|
32
|
+
}
|
|
33
|
+
return {
|
|
34
|
+
...tuned,
|
|
35
|
+
data: [outData]
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
exports.FormantStrategyV1 = FormantStrategyV1;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { IPitchShifter, AudioBufferV1, F0TrackV1, VoicingMaskV1, TargetCurveV1, CorrectionEnvelopeV1 } from "@mcptoolshop/voice-engine-core";
|
|
2
|
+
export declare class PitchShifterRefV1 implements IPitchShifter {
|
|
3
|
+
readonly id = "voice-engine-dsp.pitch-shifter.v1";
|
|
4
|
+
readonly version = "1.0.0";
|
|
5
|
+
private formantStrategy;
|
|
6
|
+
capabilities(): string[];
|
|
7
|
+
shift(audio: AudioBufferV1, f0Track: F0TrackV1, voicing: VoicingMaskV1, target: TargetCurveV1, envelope: CorrectionEnvelopeV1, request?: any): Promise<AudioBufferV1>;
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=PitchShifterRefV1.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PitchShifterRefV1.d.ts","sourceRoot":"","sources":["../../../src/transformation/PitchShifterRefV1.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,aAAa,EACb,aAAa,EAAE,SAAS,EAAE,aAAa,EAAE,aAAa,EAAE,oBAAoB,EAC/E,MAAM,gCAAgC,CAAC;AAIxC,qBAAa,iBAAkB,YAAW,aAAa;IACnD,QAAQ,CAAC,EAAE,uCAAuC;IAClD,QAAQ,CAAC,OAAO,WAAW;IAE3B,OAAO,CAAC,eAAe,CAA2B;IAElD,YAAY,IAAI,MAAM,EAAE;IAIlB,KAAK,CACP,KAAK,EAAE,aAAa,EACpB,OAAO,EAAE,SAAS,EAClB,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,oBAAoB,EAC9B,OAAO,CAAC,EAAE,GAAG,GACd,OAAO,CAAC,aAAa,CAAC;CA4H5B"}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PitchShifterRefV1 = void 0;
|
|
4
|
+
const FormantStrategyV1_1 = require("./FormantStrategyV1");
|
|
5
|
+
class PitchShifterRefV1 {
|
|
6
|
+
id = "voice-engine-dsp.pitch-shifter.v1";
|
|
7
|
+
version = "1.0.0";
|
|
8
|
+
formantStrategy = new FormantStrategyV1_1.FormantStrategyV1();
|
|
9
|
+
capabilities() {
|
|
10
|
+
return ["pitch-shift", "psola-lite", "formant-guard"];
|
|
11
|
+
}
|
|
12
|
+
async shift(audio, f0Track, voicing, target, envelope, request // Pass full request if needed
|
|
13
|
+
) {
|
|
14
|
+
// Validation
|
|
15
|
+
const sr = audio.sampleRate;
|
|
16
|
+
if (sr !== f0Track.sampleRateHz) {
|
|
17
|
+
throw new Error("Sample rate mismatch");
|
|
18
|
+
}
|
|
19
|
+
const outData = new Float32Array(audio.data[0].length);
|
|
20
|
+
const inData = audio.data[0]; // Mono assumption V1
|
|
21
|
+
// Granular/PSOLA State
|
|
22
|
+
let phase = 0;
|
|
23
|
+
const frames = f0Track.f0MhzQ.length;
|
|
24
|
+
const hop = f0Track.hopSamples;
|
|
25
|
+
// Determine Mode (Default Preserve)
|
|
26
|
+
// If request provided, use it. But signature didn't have request.
|
|
27
|
+
// We'll update signature or assume preserve.
|
|
28
|
+
// IPitchShifter interface signature uses specific args.
|
|
29
|
+
// We can pass `formantMode` via a config object if we change interface?
|
|
30
|
+
// Or assume this is "V1 Deterministic" which is formant preserving.
|
|
31
|
+
// But for "Chipmunk" support:
|
|
32
|
+
const useChipmunk = false; // TODO: pipe from request
|
|
33
|
+
// Output pointer
|
|
34
|
+
for (let i = 0; i < outData.length; i++) {
|
|
35
|
+
// 1. Determine Frame Index
|
|
36
|
+
let frameIdx = Math.floor(i / hop);
|
|
37
|
+
if (frameIdx >= frames)
|
|
38
|
+
frameIdx = frames - 1;
|
|
39
|
+
if (frameIdx < 0)
|
|
40
|
+
frameIdx = 0;
|
|
41
|
+
// Debug
|
|
42
|
+
// if (i === 24000) console.log(`Debug Shifter Frame: ${frameIdx}, Voiced: ${voicing.voicedQ[frameIdx]}`);
|
|
43
|
+
// 2. Unvoiced Bypass
|
|
44
|
+
const isVoiced = voicing.voicedQ[frameIdx] > 0;
|
|
45
|
+
if (!isVoiced) {
|
|
46
|
+
outData[i] = inData[i]; // TODO: Crossfade
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
// 3. Calculate Target Pitch
|
|
50
|
+
const targetValCents = target.targetCentsQ[frameIdx] / 1000.0; // Assume Milli-Cents in Q
|
|
51
|
+
let inputF0Mhz = f0Track.f0MhzQ[frameIdx];
|
|
52
|
+
if (inputF0Mhz <= 0)
|
|
53
|
+
inputF0Mhz = 100000;
|
|
54
|
+
const strength = envelope.strengthQ[frameIdx] / 10000;
|
|
55
|
+
const inputHz = inputF0Mhz / 1000;
|
|
56
|
+
// Base: MIDI 69 (A4 440) = 6900 Cents
|
|
57
|
+
const inputCents = 6900 + 1200 * Math.log2(inputHz / 440);
|
|
58
|
+
const desiredCents = inputCents + (targetValCents - inputCents) * strength;
|
|
59
|
+
const shiftCents = (desiredCents - inputCents);
|
|
60
|
+
const ratio = Math.pow(2, shiftCents / 1200);
|
|
61
|
+
// 4. PSOLA-lite Grain Trigger
|
|
62
|
+
const outputF0 = inputHz * ratio;
|
|
63
|
+
phase += outputF0 / sr;
|
|
64
|
+
/*
|
|
65
|
+
if (i > 24000 && i < 24005) {
|
|
66
|
+
console.log(`Debug Shifter Loop: i=${i}, phase=${phase}, outF0=${outputF0}`);
|
|
67
|
+
console.log(`Debug Cents: InputC=${inputCents}, TargetC=${targetValCents}, DesiredC=${desiredCents}`);
|
|
68
|
+
}
|
|
69
|
+
*/
|
|
70
|
+
if (phase >= 1) {
|
|
71
|
+
phase -= 1;
|
|
72
|
+
// Grain Length Strategy
|
|
73
|
+
// Preserve Formants: Length = 2 * InputPeriod
|
|
74
|
+
// Shift Formants (Chipmunk): Length = 2 * OutputPeriod
|
|
75
|
+
const pInput = sr / inputHz;
|
|
76
|
+
const pBase = useChipmunk ? (sr / outputF0) : pInput;
|
|
77
|
+
const grainLen = Math.floor(2 * pBase);
|
|
78
|
+
const overlapGain = inputHz / outputF0; // Simple density comp
|
|
79
|
+
// Refinement: Find local peak (Pitch Mark) within one period of input
|
|
80
|
+
// This aligns the grain to the waveform phase, crucial for coherence
|
|
81
|
+
let center = i;
|
|
82
|
+
const searchWin = Math.min(Math.floor(pInput / 2), 512);
|
|
83
|
+
let maxVal = -1;
|
|
84
|
+
let bestOffset = 0;
|
|
85
|
+
for (let o = -searchWin; o <= searchWin; o++) {
|
|
86
|
+
const idx = i + o;
|
|
87
|
+
if (idx >= 0 && idx < inData.length) {
|
|
88
|
+
const val = Math.abs(inData[idx]);
|
|
89
|
+
if (val > maxVal) {
|
|
90
|
+
maxVal = val;
|
|
91
|
+
bestOffset = o;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
center = i + bestOffset;
|
|
96
|
+
for (let k = 0; k < grainLen; k++) {
|
|
97
|
+
const pos = i - Math.floor(grainLen / 2) + k;
|
|
98
|
+
// Read from Time-Aligned Input (center)
|
|
99
|
+
// Input Grain is centered at 'center'
|
|
100
|
+
// Window is centered at 'center'
|
|
101
|
+
// k goes 0..grainLen.
|
|
102
|
+
// readPos should be relative to center.
|
|
103
|
+
// readPos = center - grainLen/2 + k
|
|
104
|
+
const readPos = center - Math.floor(grainLen / 2) + k;
|
|
105
|
+
if (pos >= 0 && pos < outData.length && readPos >= 0 && readPos < inData.length) {
|
|
106
|
+
const w = 0.5 - 0.5 * Math.cos(2 * Math.PI * k / grainLen);
|
|
107
|
+
outData[pos] += inData[readPos] * w * overlapGain;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// End Loop
|
|
113
|
+
// 5. Formant / Artifact Guard
|
|
114
|
+
const result = { ...audio, data: [outData] };
|
|
115
|
+
const guarded = await this.formantStrategy.apply(result, audio);
|
|
116
|
+
return guarded;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
exports.PitchShifterRefV1 = PitchShifterRefV1;
|
|
120
|
+
function floor(x) { return Math.floor(x); }
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { TuneRequestV1, AudioBufferV1 } from "@mcptoolshop/voice-engine-core";
|
|
2
|
+
export declare class AutotuneExecutor {
|
|
3
|
+
private resolver;
|
|
4
|
+
private tracker;
|
|
5
|
+
private curveGen;
|
|
6
|
+
private envGen;
|
|
7
|
+
private shifter;
|
|
8
|
+
private decomposer;
|
|
9
|
+
private segmenter;
|
|
10
|
+
private baselineModel;
|
|
11
|
+
private stabilizer;
|
|
12
|
+
private resolveProsodyPreset;
|
|
13
|
+
execute(req: TuneRequestV1, audio: AudioBufferV1): Promise<AudioBufferV1>;
|
|
14
|
+
private calculateEnergyDb;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=AutotuneExecutor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AutotuneExecutor.d.ts","sourceRoot":"","sources":["../../../src/tuning/AutotuneExecutor.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,aAAa,EAAE,aAAa,EAG/B,MAAM,gCAAgC,CAAC;AAqBxC,qBAAa,gBAAgB;IACzB,OAAO,CAAC,QAAQ,CAA0B;IAG1C,OAAO,CAAC,OAAO,CAKZ;IAEH,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,MAAM,CAA8B;IAC5C,OAAO,CAAC,OAAO,CAA2B;IAC1C,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,SAAS,CAA0B;IAC3C,OAAO,CAAC,aAAa,CAA6B;IAClD,OAAO,CAAC,UAAU,CAA0B;IAE5C,OAAO,CAAC,oBAAoB;IActB,OAAO,CAAC,GAAG,EAAE,aAAa,EAAE,KAAK,EAAE,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC;IA0L/E,OAAO,CAAC,iBAAiB;CAqB5B"}
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AutotuneExecutor = void 0;
|
|
4
|
+
const voice_engine_core_1 = require("@mcptoolshop/voice-engine-core");
|
|
5
|
+
const PitchTrackerRefV1_js_1 = require("../analysis/PitchTrackerRefV1.js");
|
|
6
|
+
const PitchShifterRefV1_js_1 = require("../transformation/PitchShifterRefV1.js");
|
|
7
|
+
const TargetCurveGenerator_js_1 = require("./TargetCurveGenerator.js");
|
|
8
|
+
const CorrectionController_js_1 = require("./CorrectionController.js");
|
|
9
|
+
const F0Decomposer_js_1 = require("../../../voice-engine-core/src/prosody/F0Decomposer.js");
|
|
10
|
+
const ProsodySegmenter_js_1 = require("../../../voice-engine-core/src/prosody/ProsodySegmenter.js");
|
|
11
|
+
const PhraseBaselineModel_js_1 = require("../../../voice-engine-core/src/prosody/PhraseBaselineModel.js");
|
|
12
|
+
const TargetStabilizer_js_1 = require("../../../voice-engine-core/src/prosody/TargetStabilizer.js");
|
|
13
|
+
const AccentRenderer_js_1 = require("../prosody/AccentRenderer.js");
|
|
14
|
+
const ProsodyPresets_js_1 = require("../../../voice-engine-core/src/config/ProsodyPresets.js");
|
|
15
|
+
const ProsodyStyles_js_1 = require("../../../voice-engine-core/src/config/ProsodyStyles.js");
|
|
16
|
+
class AutotuneExecutor {
|
|
17
|
+
resolver = new voice_engine_core_1.TunePlanResolver();
|
|
18
|
+
// Default trackers - could be overridden or config passed in
|
|
19
|
+
tracker = new PitchTrackerRefV1_js_1.PitchTrackerRefV1({
|
|
20
|
+
windowMs: 40,
|
|
21
|
+
hopMs: 10,
|
|
22
|
+
f0Min: 50,
|
|
23
|
+
f0Max: 1000
|
|
24
|
+
});
|
|
25
|
+
curveGen = new TargetCurveGenerator_js_1.TargetCurveGenerator();
|
|
26
|
+
envGen = new CorrectionController_js_1.CorrectionController();
|
|
27
|
+
shifter = new PitchShifterRefV1_js_1.PitchShifterRefV1(); // "PSOLA-lite"
|
|
28
|
+
decomposer = new F0Decomposer_js_1.F0Decomposer();
|
|
29
|
+
segmenter = new ProsodySegmenter_js_1.ProsodySegmenter();
|
|
30
|
+
baselineModel = new PhraseBaselineModel_js_1.PhraseBaselineModel();
|
|
31
|
+
stabilizer = new TargetStabilizer_js_1.TargetStabilizer();
|
|
32
|
+
resolveProsodyPreset(presetName) {
|
|
33
|
+
switch (presetName) {
|
|
34
|
+
case "hard":
|
|
35
|
+
case "robot":
|
|
36
|
+
return ProsodyPresets_js_1.HARD_TUNE_PRESET;
|
|
37
|
+
case "subtle":
|
|
38
|
+
return ProsodyPresets_js_1.SUBTLE_PRESET;
|
|
39
|
+
case "natural":
|
|
40
|
+
case "pop":
|
|
41
|
+
default:
|
|
42
|
+
return ProsodyPresets_js_1.NATURAL_PRESET;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
async execute(req, audio) {
|
|
46
|
+
// 1. Resolve Plan
|
|
47
|
+
const plan = this.resolver.resolve(req);
|
|
48
|
+
const preset = this.resolveProsodyPreset(req.preset);
|
|
49
|
+
// 2. Analyze Pitch
|
|
50
|
+
const f0Analysis = this.tracker.analyze(audio);
|
|
51
|
+
const frameCount = f0Analysis.f0MhzQ.length;
|
|
52
|
+
// 3. Decompose Pitch (New in Phase 7.2)
|
|
53
|
+
// We separate macro (intonation) from micro (jitter/vibrato).
|
|
54
|
+
const decomposition = this.decomposer.decompose(f0Analysis);
|
|
55
|
+
// 3b. Prosody Segmentation (Phase 7.1)
|
|
56
|
+
// Identify voiced vs unvoiced vs silence phrases.
|
|
57
|
+
const segments = this.segmenter.segment(audio.data[0], f0Analysis, preset.analysis);
|
|
58
|
+
// 3c. Phrase Baseline (Phase 7.3)
|
|
59
|
+
// Model the declination trend of each phrase.
|
|
60
|
+
const baseline = this.baselineModel.analyze(segments, decomposition.macro.valuesHz);
|
|
61
|
+
// 3d. Derive Voicing (Enhanced with Segments)
|
|
62
|
+
// Instead of raw heuristic, we can now use the segments to define the voicing mask.
|
|
63
|
+
const voicedQ = new Uint8Array(frameCount);
|
|
64
|
+
const voicingProbQ = new Int16Array(frameCount);
|
|
65
|
+
for (const seg of segments) {
|
|
66
|
+
if (seg.kind === 'voiced') {
|
|
67
|
+
for (let i = seg.startFrame; i < seg.endFrame; i++) {
|
|
68
|
+
voicedQ[i] = 1;
|
|
69
|
+
voicingProbQ[i] = 10000;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
const voicing = {
|
|
74
|
+
sampleRateHz: f0Analysis.sampleRateHz,
|
|
75
|
+
frameHz: f0Analysis.frameHz,
|
|
76
|
+
hopSamples: f0Analysis.hopSamples,
|
|
77
|
+
t0Samples: f0Analysis.t0Samples,
|
|
78
|
+
voicedQ,
|
|
79
|
+
voicingProbQ
|
|
80
|
+
};
|
|
81
|
+
// 4. Generate Control Curves
|
|
82
|
+
// Old Method:
|
|
83
|
+
// const target = this.curveGen.generate(f0Analysis, voicing, plan);
|
|
84
|
+
// Phase 7.4 Target Stabilizer Integration:
|
|
85
|
+
// Stabilize the INTENT curve (macro - baseline)
|
|
86
|
+
const stabilized = this.stabilizer.stabilize(baseline.intentHz, segments, {
|
|
87
|
+
allowedPitchClasses: plan.scaleConfig?.allowedPitchClasses,
|
|
88
|
+
hysteresisCents: preset.stabilizer.hysteresisCents,
|
|
89
|
+
minHoldMs: preset.stabilizer.minHoldMs,
|
|
90
|
+
switchRampMs: preset.stabilizer.switchRampMs,
|
|
91
|
+
transitionSlopeThreshCentsPerSec: preset.stabilizer.transitionSlopeThreshCentsPerSec,
|
|
92
|
+
rootOffsetCents: 0
|
|
93
|
+
}, f0Analysis.frameHz);
|
|
94
|
+
// Phase 8: Expressive Rendering (8.4 Style Profiles)
|
|
95
|
+
const style = (0, ProsodyStyles_js_1.resolveProsodyStyle)(req.style || 'speech_neutral');
|
|
96
|
+
if (req.events && req.events.length > 0) {
|
|
97
|
+
const frameRateHz = f0Analysis.sampleRateHz / f0Analysis.hopSamples;
|
|
98
|
+
const accentOffsets = AccentRenderer_js_1.AccentRenderer.render(req.events, frameCount, style, frameRateHz);
|
|
99
|
+
// Add offsets to stabilized target
|
|
100
|
+
for (let i = 0; i < frameCount; i++) {
|
|
101
|
+
if (stabilized.noteIds[i] >= 0) {
|
|
102
|
+
stabilized.targetCents[i] += accentOffsets[i];
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// Phase 8.5: Post-Focus Compression (PFC)
|
|
106
|
+
// Reduces pitch range/variance after a strong focus event to de-accentuate specific information.
|
|
107
|
+
if (style.postFocusCompression > 0) {
|
|
108
|
+
// 1. Find the strongest accent (focus)
|
|
109
|
+
let maxStrength = 0;
|
|
110
|
+
let focusTime = -1;
|
|
111
|
+
// Simple approach: global max in request
|
|
112
|
+
for (const event of req.events) {
|
|
113
|
+
if (event.type === 'accent' && event.strength > maxStrength) {
|
|
114
|
+
maxStrength = event.strength;
|
|
115
|
+
focusTime = event.time;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
// Threshold to trigger PFC (e.g. > 0.5 strength)
|
|
119
|
+
if (maxStrength > 0.5 && focusTime >= 0 && focusTime < frameCount - 1) {
|
|
120
|
+
const pfcStrength = style.postFocusCompression;
|
|
121
|
+
// Apply compression for frames AFTER the focus event
|
|
122
|
+
// We can retain a small buffer (e.g. 50ms) before compressing fully
|
|
123
|
+
// But for now, let's just start compressing after focusTime + span/2 or similar?
|
|
124
|
+
// User said: For frames t > focusTime (center).
|
|
125
|
+
// Let's add a small grace period (e.g. 10 frames = 100ms) to let the accent finish falling.
|
|
126
|
+
// Or just strict t > focusTime.
|
|
127
|
+
for (let t = focusTime; t < frameCount; t++) {
|
|
128
|
+
// We need baseline in Cents.
|
|
129
|
+
// baseline.baselineHz[t] -> Cents (MIDI absolute)
|
|
130
|
+
// 440Hz = 6900 cents.
|
|
131
|
+
// cents = 6900 + 1200 * log2(hz / 440)
|
|
132
|
+
const bHz = baseline.baselineHz[t];
|
|
133
|
+
if (bHz > 10) { // avoid log(0)
|
|
134
|
+
const baselineCents = 6900 + 1200 * Math.log2(bHz / 440);
|
|
135
|
+
// Calculate deviation of current target from baseline
|
|
136
|
+
const currentCents = stabilized.targetCents[t];
|
|
137
|
+
const deviation = currentCents - baselineCents;
|
|
138
|
+
// Compress deviation
|
|
139
|
+
// newDeviation = deviation * (1 - pfcStrength)
|
|
140
|
+
// newTarget = baselineCents + newDeviation
|
|
141
|
+
// Ramp-in the compression?
|
|
142
|
+
// Let's do a simple linear ramp over 20 frames (200ms)
|
|
143
|
+
let ramp = 1.0;
|
|
144
|
+
if (t < focusTime + 20) {
|
|
145
|
+
ramp = (t - focusTime) / 20.0;
|
|
146
|
+
}
|
|
147
|
+
const effectiveCompression = pfcStrength * ramp;
|
|
148
|
+
stabilized.targetCents[t] = baselineCents + (deviation * (1 - effectiveCompression));
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// Re-construct the Final Target Curve
|
|
155
|
+
// Final Target = Stabilized Intent + Micro (Vibrato)
|
|
156
|
+
// Note: baseline is discarded (flattened out) if we just use Stabilized.
|
|
157
|
+
// If we want to strictly follow the scale, we discard the baseline declination.
|
|
158
|
+
const finalTargetCentsQ = new Int32Array(frameCount);
|
|
159
|
+
for (let i = 0; i < frameCount; i++) {
|
|
160
|
+
if (stabilized.noteIds[i] >= 0) {
|
|
161
|
+
// Stabilized Cents (e.g. 6900.0)
|
|
162
|
+
const stabCents = stabilized.targetCents[i];
|
|
163
|
+
// Micro deviation in Hz -> Cents
|
|
164
|
+
// microHz is deviation from macroHz
|
|
165
|
+
// We approximate Cents Micro: 1200 * log2((macro + micro) / macro)
|
|
166
|
+
// Wait, micro is deviation around 0? No, F0Decomposer says "Relative Pitch Deviation (e.g., +2.5)".
|
|
167
|
+
const macroHz = decomposition.macro.valuesHz[i];
|
|
168
|
+
const microHz = decomposition.micro.valuesHz[i];
|
|
169
|
+
let microCents = 0;
|
|
170
|
+
if (macroHz > 10) {
|
|
171
|
+
microCents = 1200 * Math.log2((macroHz + microHz) / macroHz);
|
|
172
|
+
}
|
|
173
|
+
// Final Cents = Stabilized + Micro
|
|
174
|
+
const finalCents = stabCents + microCents;
|
|
175
|
+
// Convert to Int32 Scaled (x1000)
|
|
176
|
+
finalTargetCentsQ[i] = Math.round(finalCents * 1000);
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
// Unvoiced - hold last or default?
|
|
180
|
+
// Let's copy from curveGen behavior or input pitch
|
|
181
|
+
// Here we just use 0 or last. Shifter usually ignores target for unvoiced.
|
|
182
|
+
finalTargetCentsQ[i] = 0;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
const target = {
|
|
186
|
+
sampleRateHz: f0Analysis.sampleRateHz,
|
|
187
|
+
frameHz: f0Analysis.frameHz,
|
|
188
|
+
hopSamples: f0Analysis.hopSamples,
|
|
189
|
+
t0Samples: f0Analysis.t0Samples,
|
|
190
|
+
targetCentsQ: finalTargetCentsQ
|
|
191
|
+
};
|
|
192
|
+
const envelope = this.envGen.generate(f0Analysis, voicing, target, plan);
|
|
193
|
+
// 5. Apply Pitch Shift
|
|
194
|
+
const result = await this.shifter.shift(audio, f0Analysis, voicing, target, envelope);
|
|
195
|
+
return result;
|
|
196
|
+
}
|
|
197
|
+
calculateEnergyDb(signal, frameCount, hopSamples, windowSamples) {
|
|
198
|
+
const energyDb = new Float32Array(frameCount);
|
|
199
|
+
const len = signal.length;
|
|
200
|
+
for (let i = 0; i < frameCount; i++) {
|
|
201
|
+
const start = i * hopSamples;
|
|
202
|
+
const end = Math.min(start + windowSamples, len);
|
|
203
|
+
if (end <= start) {
|
|
204
|
+
energyDb[i] = -120;
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
let sumSq = 0;
|
|
208
|
+
for (let j = start; j < end; j++) {
|
|
209
|
+
sumSq += signal[j] * signal[j];
|
|
210
|
+
}
|
|
211
|
+
const rms = Math.sqrt(sumSq / (end - start));
|
|
212
|
+
energyDb[i] = rms > 1e-9 ? 20 * Math.log10(rms) : -120;
|
|
213
|
+
}
|
|
214
|
+
return energyDb;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
exports.AutotuneExecutor = AutotuneExecutor;
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { F0TrackV1, VoicingMaskV1, TunePlanV1, TargetCurveV1, CorrectionEnvelopeV1 } from "@mcptoolshop/voice-engine-core";
|
|
2
|
+
export declare class CorrectionController {
|
|
3
|
+
generate(f0: F0TrackV1, voicing: VoicingMaskV1, target: TargetCurveV1, plan: TunePlanV1): CorrectionEnvelopeV1;
|
|
4
|
+
}
|
|
5
|
+
//# sourceMappingURL=CorrectionController.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CorrectionController.d.ts","sourceRoot":"","sources":["../../../src/tuning/CorrectionController.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,UAAU,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,gCAAgC,CAAC;AAE3H,qBAAa,oBAAoB;IAC7B,QAAQ,CACJ,EAAE,EAAE,SAAS,EACb,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,aAAa,EACrB,IAAI,EAAE,UAAU,GACjB,oBAAoB;CAoG1B"}
|