@mcptoolshop/voice-engine-dsp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +78 -0
- package/dist/src/adapters/AudioWorkletProcessor.d.ts +31 -0
- package/dist/src/adapters/AudioWorkletProcessor.d.ts.map +1 -0
- package/dist/src/adapters/AudioWorkletProcessor.js +77 -0
- package/dist/src/adapters/NodeStreamAutotune.d.ts +28 -0
- package/dist/src/adapters/NodeStreamAutotune.d.ts.map +1 -0
- package/dist/src/adapters/NodeStreamAutotune.js +103 -0
- package/dist/src/analysis/PitchTrackerRefV1.d.ts +13 -0
- package/dist/src/analysis/PitchTrackerRefV1.d.ts.map +1 -0
- package/dist/src/analysis/PitchTrackerRefV1.js +136 -0
- package/dist/src/analysis/VoicingDetectorRefV1.d.ts +13 -0
- package/dist/src/analysis/VoicingDetectorRefV1.d.ts.map +1 -0
- package/dist/src/analysis/VoicingDetectorRefV1.js +77 -0
- package/dist/src/index.d.ts +8 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +22 -0
- package/dist/src/prosody/AccentRenderer.d.ts +15 -0
- package/dist/src/prosody/AccentRenderer.d.ts.map +1 -0
- package/dist/src/prosody/AccentRenderer.js +66 -0
- package/dist/src/prosody/Presets.d.ts +3 -0
- package/dist/src/prosody/Presets.d.ts.map +1 -0
- package/dist/src/prosody/Presets.js +49 -0
- package/dist/src/prosody/SafetyRails.d.ts +21 -0
- package/dist/src/prosody/SafetyRails.d.ts.map +1 -0
- package/dist/src/prosody/SafetyRails.js +65 -0
- package/dist/src/transformation/FormantStrategyV1.d.ts +5 -0
- package/dist/src/transformation/FormantStrategyV1.d.ts.map +1 -0
- package/dist/src/transformation/FormantStrategyV1.js +39 -0
- package/dist/src/transformation/PitchShifterRefV1.d.ts +9 -0
- package/dist/src/transformation/PitchShifterRefV1.d.ts.map +1 -0
- package/dist/src/transformation/PitchShifterRefV1.js +120 -0
- package/dist/src/tuning/AutotuneExecutor.d.ts +16 -0
- package/dist/src/tuning/AutotuneExecutor.d.ts.map +1 -0
- package/dist/src/tuning/AutotuneExecutor.js +217 -0
- package/dist/src/tuning/CorrectionController.d.ts +5 -0
- package/dist/src/tuning/CorrectionController.d.ts.map +1 -0
- package/dist/src/tuning/CorrectionController.js +91 -0
- package/dist/src/tuning/CorrectionControllerRefV1.d.ts +6 -0
- package/dist/src/tuning/CorrectionControllerRefV1.d.ts.map +1 -0
- package/dist/src/tuning/CorrectionControllerRefV1.js +63 -0
- package/dist/src/tuning/ScaleQuantizer.d.ts +7 -0
- package/dist/src/tuning/ScaleQuantizer.d.ts.map +1 -0
- package/dist/src/tuning/ScaleQuantizer.js +43 -0
- package/dist/src/tuning/StreamingAutotuneEngine.d.ts +43 -0
- package/dist/src/tuning/StreamingAutotuneEngine.d.ts.map +1 -0
- package/dist/src/tuning/StreamingAutotuneEngine.js +389 -0
- package/dist/src/tuning/StreamingAutotuneEngine_Fixed.d.ts +36 -0
- package/dist/src/tuning/StreamingAutotuneEngine_Fixed.d.ts.map +1 -0
- package/dist/src/tuning/StreamingAutotuneEngine_Fixed.js +344 -0
- package/dist/src/tuning/TargetCurveGenerator.d.ts +5 -0
- package/dist/src/tuning/TargetCurveGenerator.d.ts.map +1 -0
- package/dist/src/tuning/TargetCurveGenerator.js +69 -0
- package/dist/src/tuning/TargetCurveRefV1.d.ts +6 -0
- package/dist/src/tuning/TargetCurveRefV1.d.ts.map +1 -0
- package/dist/src/tuning/TargetCurveRefV1.js +69 -0
- package/dist/src/utils/AudioBufferUtils.d.ts +3 -0
- package/dist/src/utils/AudioBufferUtils.d.ts.map +1 -0
- package/dist/src/utils/AudioBufferUtils.js +19 -0
- package/dist/src/version.d.ts +2 -0
- package/dist/src/version.d.ts.map +1 -0
- package/dist/src/version.js +4 -0
- package/package.json +38 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 MCP Voice Engine Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<img src="https://raw.githubusercontent.com/mcp-tool-shop-org/mcp-voice-engine/master/assets/logo.jpg" alt="MCP Voice Engine Logo" width="100%">
|
|
3
|
+
</div>
|
|
4
|
+
|
|
5
|
+
# MCP Voice Engine
|
|
6
|
+
|
|
7
|
+
Deterministic, streaming-first prosody engine for expressive voice synthesis, pitch control, and real-time voice transformation.
|
|
8
|
+
|
|
9
|
+
<!-- Badges -->
|
|
10
|
+
<!--   -->
|
|
11
|
+
|
|
12
|
+
## Why this exists
|
|
13
|
+
|
|
14
|
+
Most voice DSP systems fail in two places: **stability** (warble, jitter, note flutter) and **reproducibility** (“it only happens sometimes”). MCP Voice Engine is built to be musical, causal, and deterministic—so it behaves like software, not folklore.
|
|
15
|
+
|
|
16
|
+
## What you can build with it
|
|
17
|
+
|
|
18
|
+
* **Real-time voice stylization** for games and interactive apps (stable targets, expressive controls)
|
|
19
|
+
* **Streaming voice pipelines** (servers, bots, live processing)
|
|
20
|
+
* **DAW / toolchain integration** (deterministic pitch targets, consistent render behavior)
|
|
21
|
+
* **Web Audio demos** (AudioWorklet-ready architecture)
|
|
22
|
+
|
|
23
|
+
## Quickstart
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
npm i
|
|
27
|
+
npm run build
|
|
28
|
+
npm test
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Core capabilities
|
|
32
|
+
|
|
33
|
+
### Deterministic output
|
|
34
|
+
Same input + config (and chunking policy) produces the same output, with regression protection via hash-based tests.
|
|
35
|
+
|
|
36
|
+
### Streaming-first runtime
|
|
37
|
+
Stateful, causal processing designed for low latency. No retroactive edits. Snapshot/restore supported for persistence and resumability.
|
|
38
|
+
|
|
39
|
+
### Expressive prosody controls
|
|
40
|
+
Event-driven accents and boundary tones let you shape cadence and intonation intentionally—without destabilizing pitch targets.
|
|
41
|
+
|
|
42
|
+
### Meaning tests (semantic guardrails)
|
|
43
|
+
The test suite enforces communicative behavior, including:
|
|
44
|
+
* **accent locality** (no “smear”)
|
|
45
|
+
* **question vs statement boundaries** (rise vs fall)
|
|
46
|
+
* **post-focus compression** (focus has consequences)
|
|
47
|
+
* **deterministic event ordering**
|
|
48
|
+
* **style monotonicity** (expressive > neutral > flat without increasing instability)
|
|
49
|
+
|
|
50
|
+
## Documentation
|
|
51
|
+
|
|
52
|
+
Primary docs live in [packages/voice-engine-dsp/docs/](packages/voice-engine-dsp/docs/).
|
|
53
|
+
|
|
54
|
+
### Key documents
|
|
55
|
+
|
|
56
|
+
* [Streaming Architecture](packages/voice-engine-dsp/docs/STREAMING_ARCHITECTURE.md)
|
|
57
|
+
* [Meaning Contract](packages/voice-engine-dsp/docs/MEANING_CONTRACT.md)
|
|
58
|
+
* [Debugging Guide](packages/voice-engine-dsp/docs/DEBUGGING.md)
|
|
59
|
+
* [Reference Handbook](Reference_Handbook.md)
|
|
60
|
+
|
|
61
|
+
### Repository structure
|
|
62
|
+
|
|
63
|
+
`packages/voice-engine-dsp/` — core DSP + streaming prosody engine, tests, and benchmarks
|
|
64
|
+
|
|
65
|
+
## Running the test suites
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
npm test
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Or run specific suites:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
npm run test:meaning
|
|
75
|
+
npm run test:determinism
|
|
76
|
+
npm run bench:rtf
|
|
77
|
+
npm run smoke
|
|
78
|
+
```
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AutotuneProcessor
|
|
3
|
+
*
|
|
4
|
+
* This class implements the processing logic for an AudioWorkletProcessor.
|
|
5
|
+
* It does not extend AudioWorkletProcessor directly to avoid issues in non-browser environments (like Node.js tests),
|
|
6
|
+
* but it matches the interface required by the Web Audio API.
|
|
7
|
+
*
|
|
8
|
+
* Usage in a real AudioWorklet:
|
|
9
|
+
* 1. Bundle this file and its dependencies (e.g. using Vite, Webpack, or Rollup).
|
|
10
|
+
* 2. In your worklet entry file:
|
|
11
|
+
*
|
|
12
|
+
* import { AutotuneProcessor } from './AutotuneProcessor';
|
|
13
|
+
*
|
|
14
|
+
* class RealAutotuneProcessor extends AudioWorkletProcessor {
|
|
15
|
+
* constructor(options) {
|
|
16
|
+
* super();
|
|
17
|
+
* this.impl = new AutotuneProcessor(options, this.port);
|
|
18
|
+
* }
|
|
19
|
+
* process(inputs, outputs, parameters) {
|
|
20
|
+
* return this.impl.process(inputs, outputs, parameters);
|
|
21
|
+
* }
|
|
22
|
+
* }
|
|
23
|
+
* registerProcessor('autotune-processor', RealAutotuneProcessor);
|
|
24
|
+
*/
|
|
25
|
+
export declare class AutotuneProcessor {
|
|
26
|
+
private engine;
|
|
27
|
+
private port;
|
|
28
|
+
constructor(options: any, port?: MessagePort);
|
|
29
|
+
process(inputs: Float32Array[][], outputs: Float32Array[][], parameters: Record<string, Float32Array>): boolean;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=AudioWorkletProcessor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AudioWorkletProcessor.d.ts","sourceRoot":"","sources":["../../../src/adapters/AudioWorkletProcessor.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,iBAAiB;IAC1B,OAAO,CAAC,MAAM,CAA0B;IACxC,OAAO,CAAC,IAAI,CAAqB;gBAErB,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,EAAE,WAAW;IAmB5C,OAAO,CAAC,MAAM,EAAE,YAAY,EAAE,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,GAAG,OAAO;CA+BlH"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AutotuneProcessor = void 0;
|
|
4
|
+
const StreamingAutotuneEngine_1 = require("../tuning/StreamingAutotuneEngine");
|
|
5
|
+
/**
|
|
6
|
+
* AutotuneProcessor
|
|
7
|
+
*
|
|
8
|
+
* This class implements the processing logic for an AudioWorkletProcessor.
|
|
9
|
+
* It does not extend AudioWorkletProcessor directly to avoid issues in non-browser environments (like Node.js tests),
|
|
10
|
+
* but it matches the interface required by the Web Audio API.
|
|
11
|
+
*
|
|
12
|
+
* Usage in a real AudioWorklet:
|
|
13
|
+
* 1. Bundle this file and its dependencies (e.g. using Vite, Webpack, or Rollup).
|
|
14
|
+
* 2. In your worklet entry file:
|
|
15
|
+
*
|
|
16
|
+
* import { AutotuneProcessor } from './AutotuneProcessor';
|
|
17
|
+
*
|
|
18
|
+
* class RealAutotuneProcessor extends AudioWorkletProcessor {
|
|
19
|
+
* constructor(options) {
|
|
20
|
+
* super();
|
|
21
|
+
* this.impl = new AutotuneProcessor(options, this.port);
|
|
22
|
+
* }
|
|
23
|
+
* process(inputs, outputs, parameters) {
|
|
24
|
+
* return this.impl.process(inputs, outputs, parameters);
|
|
25
|
+
* }
|
|
26
|
+
* }
|
|
27
|
+
* registerProcessor('autotune-processor', RealAutotuneProcessor);
|
|
28
|
+
*/
|
|
29
|
+
class AutotuneProcessor {
|
|
30
|
+
engine;
|
|
31
|
+
port;
|
|
32
|
+
constructor(options, port) {
|
|
33
|
+
const config = options.processorOptions?.config || {};
|
|
34
|
+
const preset = options.processorOptions?.preset || {};
|
|
35
|
+
this.engine = new StreamingAutotuneEngine_1.StreamingAutotuneEngine(config, preset);
|
|
36
|
+
this.port = port || null;
|
|
37
|
+
if (this.port) {
|
|
38
|
+
this.port.onmessage = (event) => {
|
|
39
|
+
const { type, data } = event.data;
|
|
40
|
+
if (type === 'enqueueEvents') {
|
|
41
|
+
this.engine.enqueueEvents(data);
|
|
42
|
+
}
|
|
43
|
+
else if (type === 'updateConfig') {
|
|
44
|
+
// potentially update config
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
process(inputs, outputs, parameters) {
|
|
50
|
+
// inputs[inputIndex][channelIndex]
|
|
51
|
+
const input = inputs[0];
|
|
52
|
+
const output = outputs[0];
|
|
53
|
+
// If no input or empty implementation, return true to keep alive
|
|
54
|
+
if (!input || input.length === 0)
|
|
55
|
+
return true;
|
|
56
|
+
const inputChannel0 = input[0];
|
|
57
|
+
const outputChannel0 = output[0];
|
|
58
|
+
// Process Mono (Channel 0)
|
|
59
|
+
// Note: StreamingAutotuneEngine expects a Float32Array block.
|
|
60
|
+
// AudioWorklet usually provides 128 frames.
|
|
61
|
+
if (inputChannel0 && outputChannel0) {
|
|
62
|
+
// Check if input size matches block size or create sub-blocks?
|
|
63
|
+
// Usually AudioWorklet input is fixed size (128).
|
|
64
|
+
// StreamingAutotuneEngine handles any size block (it iterates internally).
|
|
65
|
+
const result = this.engine.process(inputChannel0);
|
|
66
|
+
outputChannel0.set(result.audio);
|
|
67
|
+
// Copy to other channels if output has more than one
|
|
68
|
+
for (let c = 1; c < output.length; c++) {
|
|
69
|
+
if (output[c]) {
|
|
70
|
+
output[c].set(result.audio);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return true;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
exports.AutotuneProcessor = AutotuneProcessor;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { Transform, TransformCallback, TransformOptions } from 'stream';
|
|
2
|
+
import { StreamingAutotuneEngine } from '../tuning/StreamingAutotuneEngine';
|
|
3
|
+
export interface NodeStreamAutotuneOptions extends TransformOptions {
|
|
4
|
+
engine: StreamingAutotuneEngine;
|
|
5
|
+
blockSize?: number;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* A Node.js Transform stream wrapper for StreamingAutotuneEngine.
|
|
9
|
+
* Handles buffering of incoming audio chunks to match the engine's required block size.
|
|
10
|
+
* Assumes input is Float32Array (object mode) or Buffer (raw float32 LE bytes).
|
|
11
|
+
*/
|
|
12
|
+
export declare class NodeStreamAutotune extends Transform {
|
|
13
|
+
private engine;
|
|
14
|
+
private blockSize;
|
|
15
|
+
private _buffer;
|
|
16
|
+
private _bufferedSamples;
|
|
17
|
+
constructor(options: NodeStreamAutotuneOptions);
|
|
18
|
+
/**
|
|
19
|
+
* Delegate method to enqueue control events to the engine.
|
|
20
|
+
* @param events Array of control events (notes, params, etc.)
|
|
21
|
+
*/
|
|
22
|
+
enqueueEvents(events: any[]): void;
|
|
23
|
+
_transform(chunk: any, encoding: BufferEncoding, callback: TransformCallback): void;
|
|
24
|
+
_flush(callback: TransformCallback): void;
|
|
25
|
+
private _appendToBuffer;
|
|
26
|
+
private _processBufferedData;
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=NodeStreamAutotune.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"NodeStreamAutotune.d.ts","sourceRoot":"","sources":["../../../src/adapters/NodeStreamAutotune.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,QAAQ,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,mCAAmC,CAAC;AAE5E,MAAM,WAAW,yBAA0B,SAAQ,gBAAgB;IAC/D,MAAM,EAAE,uBAAuB,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;GAIG;AACH,qBAAa,kBAAmB,SAAQ,SAAS;IAC7C,OAAO,CAAC,MAAM,CAA0B;IACxC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,OAAO,CAAe;IAC9B,OAAO,CAAC,gBAAgB,CAAS;gBAErB,OAAO,EAAE,yBAAyB;IAY9C;;;OAGG;IACI,aAAa,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI;IAIzC,UAAU,CAAC,KAAK,EAAE,GAAG,EAAE,QAAQ,EAAE,cAAc,EAAE,QAAQ,EAAE,iBAAiB,GAAG,IAAI;IAuBnF,MAAM,CAAC,QAAQ,EAAE,iBAAiB,GAAG,IAAI;IAgBzC,OAAO,CAAC,eAAe;IAavB,OAAO,CAAC,oBAAoB;CA2B/B"}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.NodeStreamAutotune = void 0;
|
|
4
|
+
const stream_1 = require("stream");
|
|
5
|
+
/**
|
|
6
|
+
* A Node.js Transform stream wrapper for StreamingAutotuneEngine.
|
|
7
|
+
* Handles buffering of incoming audio chunks to match the engine's required block size.
|
|
8
|
+
* Assumes input is Float32Array (object mode) or Buffer (raw float32 LE bytes).
|
|
9
|
+
*/
|
|
10
|
+
class NodeStreamAutotune extends stream_1.Transform {
|
|
11
|
+
engine;
|
|
12
|
+
blockSize;
|
|
13
|
+
_buffer;
|
|
14
|
+
_bufferedSamples;
|
|
15
|
+
constructor(options) {
|
|
16
|
+
// Enforce objectMode if not specified, as we want to output Float32Array by default
|
|
17
|
+
// or Buffer if the downstream expects it.
|
|
18
|
+
// For now, let's stick to standard Buffer input/output if not in objectMode.
|
|
19
|
+
super(options);
|
|
20
|
+
this.engine = options.engine;
|
|
21
|
+
this.blockSize = options.blockSize || 128; // Default block size
|
|
22
|
+
this._buffer = new Float32Array(this.blockSize * 2); // Start with double capacity
|
|
23
|
+
this._bufferedSamples = 0;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Delegate method to enqueue control events to the engine.
|
|
27
|
+
* @param events Array of control events (notes, params, etc.)
|
|
28
|
+
*/
|
|
29
|
+
enqueueEvents(events) {
|
|
30
|
+
this.engine.enqueueEvents(events);
|
|
31
|
+
}
|
|
32
|
+
_transform(chunk, encoding, callback) {
|
|
33
|
+
let inputFloats;
|
|
34
|
+
// 1. Convert chunk to Float32Array
|
|
35
|
+
if (Buffer.isBuffer(chunk)) {
|
|
36
|
+
// Assume raw float32 LE bytes
|
|
37
|
+
const numSamples = chunk.length / 4;
|
|
38
|
+
inputFloats = new Float32Array(chunk.buffer, chunk.byteOffset, numSamples);
|
|
39
|
+
}
|
|
40
|
+
else if (chunk instanceof Float32Array) {
|
|
41
|
+
inputFloats = chunk;
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
return callback(new Error('NodeStreamAutotune expects Buffer or Float32Array chunks.'));
|
|
45
|
+
}
|
|
46
|
+
// 2. Append to internal buffer
|
|
47
|
+
this._appendToBuffer(inputFloats);
|
|
48
|
+
// 3. Process as many blocks as possible
|
|
49
|
+
this._processBufferedData();
|
|
50
|
+
callback();
|
|
51
|
+
}
|
|
52
|
+
_flush(callback) {
|
|
53
|
+
// Build remaining samples
|
|
54
|
+
// If there are leftover samples, we could pad with zeros or just process them?
|
|
55
|
+
// The engine might expect full blocks (128).
|
|
56
|
+
// Let's pad with silence to complete the last block if necessary.
|
|
57
|
+
if (this._bufferedSamples > 0) {
|
|
58
|
+
const needed = this.blockSize - this._bufferedSamples;
|
|
59
|
+
if (needed > 0) {
|
|
60
|
+
const padding = new Float32Array(needed).fill(0);
|
|
61
|
+
this._appendToBuffer(padding);
|
|
62
|
+
}
|
|
63
|
+
this._processBufferedData();
|
|
64
|
+
}
|
|
65
|
+
callback();
|
|
66
|
+
}
|
|
67
|
+
_appendToBuffer(newData) {
|
|
68
|
+
const requiredCapacity = this._bufferedSamples + newData.length;
|
|
69
|
+
if (requiredCapacity > this._buffer.length) {
|
|
70
|
+
// Resize buffer
|
|
71
|
+
const newCapacity = Math.max(requiredCapacity, this._buffer.length * 2);
|
|
72
|
+
const newBuffer = new Float32Array(newCapacity);
|
|
73
|
+
newBuffer.set(this._buffer.subarray(0, this._bufferedSamples));
|
|
74
|
+
this._buffer = newBuffer;
|
|
75
|
+
}
|
|
76
|
+
this._buffer.set(newData, this._bufferedSamples);
|
|
77
|
+
this._bufferedSamples += newData.length;
|
|
78
|
+
}
|
|
79
|
+
_processBufferedData() {
|
|
80
|
+
let offset = 0;
|
|
81
|
+
while (this._bufferedSamples >= this.blockSize) {
|
|
82
|
+
// Extract one block
|
|
83
|
+
const block = this._buffer.subarray(offset, offset + this.blockSize);
|
|
84
|
+
// Process (engine expects Float32Array of size blockSize usually)
|
|
85
|
+
const result = this.engine.process(block);
|
|
86
|
+
// Output
|
|
87
|
+
if (this.writableObjectMode || this.readableObjectMode) {
|
|
88
|
+
this.push(result.audio); // Push Float32Array directly
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
// Convert back to Buffer (raw bytes)
|
|
92
|
+
this.push(Buffer.from(result.audio.buffer, result.audio.byteOffset, result.audio.byteLength));
|
|
93
|
+
}
|
|
94
|
+
offset += this.blockSize;
|
|
95
|
+
this._bufferedSamples -= this.blockSize;
|
|
96
|
+
}
|
|
97
|
+
// Shift remaining data to start of buffer
|
|
98
|
+
if (this._bufferedSamples > 0 && offset > 0) {
|
|
99
|
+
this._buffer.copyWithin(0, offset, offset + this._bufferedSamples);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
exports.NodeStreamAutotune = NodeStreamAutotune;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { AudioBufferV1, F0TrackV1 } from "@mcptoolshop/voice-engine-core";
|
|
2
|
+
export interface PitchTrackerConfig {
|
|
3
|
+
windowMs: number;
|
|
4
|
+
hopMs: number;
|
|
5
|
+
f0Min: number;
|
|
6
|
+
f0Max: number;
|
|
7
|
+
}
|
|
8
|
+
export declare class PitchTrackerRefV1 {
|
|
9
|
+
private config;
|
|
10
|
+
constructor(config: PitchTrackerConfig);
|
|
11
|
+
analyze(buffer: AudioBufferV1): F0TrackV1;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=PitchTrackerRefV1.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PitchTrackerRefV1.d.ts","sourceRoot":"","sources":["../../../src/analysis/PitchTrackerRefV1.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,gCAAgC,CAAC;AAG1E,MAAM,WAAW,kBAAkB;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,iBAAiB;IACd,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,kBAAkB;IAEvC,OAAO,CAAC,MAAM,EAAE,aAAa,GAAG,SAAS;CA2InD"}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PitchTrackerRefV1 = void 0;
|
|
4
|
+
const AudioBufferUtils_1 = require("../utils/AudioBufferUtils");
|
|
5
|
+
class PitchTrackerRefV1 {
|
|
6
|
+
config;
|
|
7
|
+
constructor(config) {
|
|
8
|
+
this.config = config;
|
|
9
|
+
}
|
|
10
|
+
analyze(buffer) {
|
|
11
|
+
const audio = (0, AudioBufferUtils_1.monoDownmix)(buffer);
|
|
12
|
+
const sr = buffer.sampleRate;
|
|
13
|
+
const windowSamples = Math.floor(this.config.windowMs * sr / 1000);
|
|
14
|
+
const hopSamples = Math.floor(this.config.hopMs * sr / 1000);
|
|
15
|
+
// Ensure lags are within search range
|
|
16
|
+
const minLag = Math.floor(sr / this.config.f0Max);
|
|
17
|
+
const maxLag = Math.floor(sr / this.config.f0Min);
|
|
18
|
+
// Calculate number of frames
|
|
19
|
+
// We need audio up to start + windowSamples + maxLag
|
|
20
|
+
// Last frame i: i * hopSamples + windowSamples + maxLag <= audio.length
|
|
21
|
+
// i * hopSamples <= audio.length - windowSamples - maxLag
|
|
22
|
+
let numFrames = 0;
|
|
23
|
+
if (audio.length >= windowSamples + maxLag) {
|
|
24
|
+
numFrames = Math.floor((audio.length - windowSamples - maxLag) / hopSamples) + 1;
|
|
25
|
+
}
|
|
26
|
+
const f0MhzQ = new Int32Array(numFrames);
|
|
27
|
+
const confQ = new Int16Array(numFrames);
|
|
28
|
+
const t0Samples = 0;
|
|
29
|
+
if (numFrames <= 0) {
|
|
30
|
+
return {
|
|
31
|
+
sampleRateHz: sr,
|
|
32
|
+
frameHz: sr / hopSamples,
|
|
33
|
+
hopSamples,
|
|
34
|
+
t0Samples,
|
|
35
|
+
f0MhzQ,
|
|
36
|
+
confQ
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
const difference = new Float32Array(maxLag + 2);
|
|
40
|
+
const cmndf = new Float32Array(maxLag + 2);
|
|
41
|
+
for (let i = 0; i < numFrames; i++) {
|
|
42
|
+
const start = i * hopSamples;
|
|
43
|
+
// 1. Difference function
|
|
44
|
+
// d(tau) = sum_{j=0}^{W-1} (x[start+j] - x[start+j+tau])^2
|
|
45
|
+
for (let tau = 1; tau <= maxLag; tau++) {
|
|
46
|
+
let sum = 0;
|
|
47
|
+
for (let j = 0; j < windowSamples; j++) {
|
|
48
|
+
const delta = audio[start + j] - audio[start + j + tau];
|
|
49
|
+
sum += delta * delta;
|
|
50
|
+
}
|
|
51
|
+
difference[tau] = sum;
|
|
52
|
+
}
|
|
53
|
+
difference[0] = 0;
|
|
54
|
+
// 2. CMNDF
|
|
55
|
+
cmndf[0] = 1;
|
|
56
|
+
let runningSum = 0;
|
|
57
|
+
for (let tau = 1; tau <= maxLag; tau++) {
|
|
58
|
+
runningSum += difference[tau];
|
|
59
|
+
if (runningSum === 0) {
|
|
60
|
+
cmndf[tau] = 1;
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
cmndf[tau] = difference[tau] * tau / runningSum;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// 3. Absolute Threshold
|
|
67
|
+
let bestTau = -1;
|
|
68
|
+
const threshold = 0.1;
|
|
69
|
+
let found = false;
|
|
70
|
+
// Only search in valid pitch range
|
|
71
|
+
for (let tau = minLag; tau <= maxLag; tau++) {
|
|
72
|
+
if (cmndf[tau] < threshold) {
|
|
73
|
+
// Start of a dip. Find local minimum in this dip.
|
|
74
|
+
let localTau = tau;
|
|
75
|
+
while (localTau + 1 <= maxLag && cmndf[localTau + 1] < cmndf[localTau]) {
|
|
76
|
+
localTau++;
|
|
77
|
+
}
|
|
78
|
+
bestTau = localTau;
|
|
79
|
+
found = true;
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
// If not found, use global minimum in range
|
|
84
|
+
if (!found) {
|
|
85
|
+
let minVal = Number.MAX_VALUE;
|
|
86
|
+
for (let tau = minLag; tau <= maxLag; tau++) {
|
|
87
|
+
if (cmndf[tau] < minVal) {
|
|
88
|
+
minVal = cmndf[tau];
|
|
89
|
+
bestTau = tau;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
// 4. Parabolic Interpolation
|
|
94
|
+
let refinedTau = bestTau;
|
|
95
|
+
let currentF0 = 0;
|
|
96
|
+
let confidence = 0;
|
|
97
|
+
if (bestTau >= minLag && bestTau <= maxLag) {
|
|
98
|
+
// Ensure we have neighbors
|
|
99
|
+
if (bestTau > 0 && bestTau < maxLag + 1) { // bounds check within cmndf array size
|
|
100
|
+
// Note: cmndf size is maxLag + 2, valid indices 0..maxLag+1.
|
|
101
|
+
// bestTau is in [minLag, maxLag]. so bestTau >= 1.
|
|
102
|
+
const y1 = cmndf[bestTau - 1];
|
|
103
|
+
const y2 = cmndf[bestTau];
|
|
104
|
+
const y3 = cmndf[bestTau + 1];
|
|
105
|
+
// Peak fitting (minimum)
|
|
106
|
+
const denom = (y1 - 2 * y2 + y3);
|
|
107
|
+
if (Math.abs(denom) > 1e-9) { // Avoid division by zero
|
|
108
|
+
const delta = (y1 - y3) / (2 * denom);
|
|
109
|
+
refinedTau = bestTau - delta;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (refinedTau > 0) {
|
|
113
|
+
currentF0 = sr / refinedTau;
|
|
114
|
+
}
|
|
115
|
+
// Confidence: 1 - min_cmndf
|
|
116
|
+
let minCmndf = cmndf[bestTau];
|
|
117
|
+
if (minCmndf > 1)
|
|
118
|
+
minCmndf = 1;
|
|
119
|
+
if (minCmndf < 0)
|
|
120
|
+
minCmndf = 0;
|
|
121
|
+
confidence = 1.0 - minCmndf;
|
|
122
|
+
}
|
|
123
|
+
f0MhzQ[i] = Math.round(currentF0 * 1000);
|
|
124
|
+
confQ[i] = Math.round(confidence * 10000);
|
|
125
|
+
}
|
|
126
|
+
return {
|
|
127
|
+
sampleRateHz: sr,
|
|
128
|
+
frameHz: sr / hopSamples,
|
|
129
|
+
hopSamples: hopSamples,
|
|
130
|
+
t0Samples: 0,
|
|
131
|
+
f0MhzQ,
|
|
132
|
+
confQ
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
exports.PitchTrackerRefV1 = PitchTrackerRefV1;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { AudioBufferV1, VoicingMaskV1, F0TrackV1 } from "@mcptoolshop/voice-engine-core";
|
|
2
|
+
export interface VoicingConfig {
|
|
3
|
+
silenceThreshold: number;
|
|
4
|
+
voicingThreshold: number;
|
|
5
|
+
windowMs: number;
|
|
6
|
+
}
|
|
7
|
+
export declare class VoicingDetectorRefV1 {
|
|
8
|
+
private config;
|
|
9
|
+
constructor(config: VoicingConfig);
|
|
10
|
+
analyze(buffer: AudioBufferV1, f0Track: F0TrackV1): VoicingMaskV1;
|
|
11
|
+
private hangoverSmoothing;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=VoicingDetectorRefV1.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"VoicingDetectorRefV1.d.ts","sourceRoot":"","sources":["../../../src/analysis/VoicingDetectorRefV1.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,SAAS,EAAoB,MAAM,gCAAgC,CAAC;AAG3G,MAAM,WAAW,aAAa;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,oBAAoB;IACjB,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,aAAa;IAElC,OAAO,CAAC,MAAM,EAAE,aAAa,EAAE,OAAO,EAAE,SAAS,GAAG,aAAa;IAiExE,OAAO,CAAC,iBAAiB;CAe5B"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.VoicingDetectorRefV1 = void 0;
|
|
4
|
+
const AudioBufferUtils_1 = require("../utils/AudioBufferUtils");
|
|
5
|
+
class VoicingDetectorRefV1 {
|
|
6
|
+
config;
|
|
7
|
+
constructor(config) {
|
|
8
|
+
this.config = config;
|
|
9
|
+
}
|
|
10
|
+
analyze(buffer, f0Track) {
|
|
11
|
+
const audio = (0, AudioBufferUtils_1.monoDownmix)(buffer);
|
|
12
|
+
const numFrames = f0Track.f0MhzQ.length;
|
|
13
|
+
const hopSamples = f0Track.hopSamples;
|
|
14
|
+
const windowSamples = Math.floor(this.config.windowMs * buffer.sampleRate / 1000);
|
|
15
|
+
const voicedQ = new Uint8Array(numFrames);
|
|
16
|
+
const voicingProbQ = new Int16Array(numFrames);
|
|
17
|
+
const zcrThreshold = 0.4;
|
|
18
|
+
const zcrThreshQ = zcrThreshold * 10000;
|
|
19
|
+
const silenceThreshQ = this.config.silenceThreshold * 10000;
|
|
20
|
+
const voicingThreshQ = this.config.voicingThreshold * 10000;
|
|
21
|
+
for (let i = 0; i < numFrames; i++) {
|
|
22
|
+
const start = i * hopSamples;
|
|
23
|
+
if (start + windowSamples > audio.length) {
|
|
24
|
+
break;
|
|
25
|
+
}
|
|
26
|
+
let zeroCrossings = 0;
|
|
27
|
+
let sumSq = 0;
|
|
28
|
+
for (let j = 0; j < windowSamples; j++) {
|
|
29
|
+
const samp = audio[start + j];
|
|
30
|
+
sumSq += samp * samp;
|
|
31
|
+
if (j > 0) {
|
|
32
|
+
const prev = audio[start + j - 1];
|
|
33
|
+
if ((prev >= 0 && samp < 0) || (prev < 0 && samp >= 0)) {
|
|
34
|
+
zeroCrossings++;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
const rms = Math.sqrt(sumSq / windowSamples);
|
|
39
|
+
const zcr = zeroCrossings / windowSamples;
|
|
40
|
+
const rmsQ = Math.min(10000, Math.round(rms * 10000));
|
|
41
|
+
const zcrQ = Math.min(10000, Math.round(zcr * 10000));
|
|
42
|
+
const confQ = f0Track.confQ[i];
|
|
43
|
+
let isVoiced = 0;
|
|
44
|
+
if (rmsQ > silenceThreshQ && confQ > voicingThreshQ && zcrQ < zcrThreshQ) {
|
|
45
|
+
isVoiced = 1;
|
|
46
|
+
}
|
|
47
|
+
voicedQ[i] = isVoiced;
|
|
48
|
+
voicingProbQ[i] = confQ;
|
|
49
|
+
}
|
|
50
|
+
// Apply smoothing
|
|
51
|
+
this.hangoverSmoothing(voicedQ, 2);
|
|
52
|
+
return {
|
|
53
|
+
sampleRateHz: f0Track.sampleRateHz,
|
|
54
|
+
frameHz: f0Track.frameHz,
|
|
55
|
+
hopSamples: f0Track.hopSamples,
|
|
56
|
+
t0Samples: f0Track.t0Samples,
|
|
57
|
+
voicedQ,
|
|
58
|
+
voicingProbQ
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
hangoverSmoothing(mask, frames) {
|
|
62
|
+
const len = mask.length;
|
|
63
|
+
let hangoverCounter = 0;
|
|
64
|
+
for (let i = 0; i < len; i++) {
|
|
65
|
+
if (mask[i] === 1) {
|
|
66
|
+
hangoverCounter = frames;
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
if (hangoverCounter > 0) {
|
|
70
|
+
mask[i] = 1;
|
|
71
|
+
hangoverCounter--;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
exports.VoicingDetectorRefV1 = VoicingDetectorRefV1;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export * from './tuning/StreamingAutotuneEngine';
|
|
2
|
+
export * from './version';
|
|
3
|
+
export { ProsodyRuntimeStateV1 } from '../../voice-engine-core/src/prosody/StreamingProsodyTypes';
|
|
4
|
+
export * from './adapters/NodeStreamAutotune';
|
|
5
|
+
export * from './adapters/AudioWorkletProcessor';
|
|
6
|
+
export * from './prosody/SafetyRails';
|
|
7
|
+
export * from './prosody/Presets';
|
|
8
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,kCAAkC,CAAC;AACjD,cAAc,WAAW,CAAC;AAE1B,OAAO,EAAE,qBAAqB,EAAE,MAAM,2DAA2D,CAAC;AAClG,cAAc,+BAA+B,CAAC;AAC9C,cAAc,kCAAkC,CAAC;AACjD,cAAc,uBAAuB,CAAC;AACtC,cAAc,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./tuning/StreamingAutotuneEngine"), exports);
|
|
18
|
+
__exportStar(require("./version"), exports);
|
|
19
|
+
__exportStar(require("./adapters/NodeStreamAutotune"), exports);
|
|
20
|
+
__exportStar(require("./adapters/AudioWorkletProcessor"), exports);
|
|
21
|
+
__exportStar(require("./prosody/SafetyRails"), exports);
|
|
22
|
+
__exportStar(require("./prosody/Presets"), exports);
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { ProsodyEventV1, ProsodyStyleV1 } from "../../../voice-engine-core/src/prosody/ProsodyV1.js";
|
|
2
|
+
export declare class AccentRenderer {
|
|
3
|
+
/**
|
|
4
|
+
* Renders prosodic accents into a per-frame control curve.
|
|
5
|
+
* Uses a raised cosine window for smooth parameter modulation.
|
|
6
|
+
*
|
|
7
|
+
* @param events List of prosody events to render
|
|
8
|
+
* @param totalFrames Total number of frames in the output buffer
|
|
9
|
+
* @param style Prosody style configuration
|
|
10
|
+
* @param frameRateHz Frame rate for time conversions (default 100)
|
|
11
|
+
* @returns Float32Array of rendered values (additive relative cents)
|
|
12
|
+
*/
|
|
13
|
+
static render(events: ProsodyEventV1[], totalFrames: number, style: ProsodyStyleV1, frameRateHz?: number): Float32Array;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=AccentRenderer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AccentRenderer.d.ts","sourceRoot":"","sources":["../../../src/prosody/AccentRenderer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,qDAAqD,CAAC;AAErG,qBAAa,cAAc;IACvB;;;;;;;;;OASG;IACH,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,EAAE,EAAE,WAAW,EAAE,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,WAAW,GAAE,MAAY,GAAG,YAAY;CA6D/H"}
|