modular-voice-agent-sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +102 -0
- package/USAGE.md +567 -0
- package/dist/backends/cloud/index.d.ts +7 -0
- package/dist/backends/cloud/index.d.ts.map +1 -0
- package/dist/backends/cloud/index.js +6 -0
- package/dist/backends/cloud/index.js.map +1 -0
- package/dist/backends/cloud/llm.d.ts +22 -0
- package/dist/backends/cloud/llm.d.ts.map +1 -0
- package/dist/backends/cloud/llm.js +234 -0
- package/dist/backends/cloud/llm.js.map +1 -0
- package/dist/backends/index.d.ts +2 -0
- package/dist/backends/index.d.ts.map +1 -0
- package/dist/backends/index.js +6 -0
- package/dist/backends/index.js.map +1 -0
- package/dist/backends/native/index.d.ts +5 -0
- package/dist/backends/native/index.d.ts.map +1 -0
- package/dist/backends/native/index.js +6 -0
- package/dist/backends/native/index.js.map +1 -0
- package/dist/backends/native/llm.d.ts +71 -0
- package/dist/backends/native/llm.d.ts.map +1 -0
- package/dist/backends/native/llm.js +435 -0
- package/dist/backends/native/llm.js.map +1 -0
- package/dist/backends/native/stt.d.ts +15 -0
- package/dist/backends/native/stt.d.ts.map +1 -0
- package/dist/backends/native/stt.js +94 -0
- package/dist/backends/native/stt.js.map +1 -0
- package/dist/backends/native/tts.d.ts +21 -0
- package/dist/backends/native/tts.d.ts.map +1 -0
- package/dist/backends/native/tts.js +105 -0
- package/dist/backends/native/tts.js.map +1 -0
- package/dist/backends/transformers/index.d.ts +4 -0
- package/dist/backends/transformers/index.d.ts.map +1 -0
- package/dist/backends/transformers/index.js +4 -0
- package/dist/backends/transformers/index.js.map +1 -0
- package/dist/backends/transformers/llm.d.ts +29 -0
- package/dist/backends/transformers/llm.d.ts.map +1 -0
- package/dist/backends/transformers/llm.js +117 -0
- package/dist/backends/transformers/llm.js.map +1 -0
- package/dist/backends/transformers/stt.d.ts +17 -0
- package/dist/backends/transformers/stt.d.ts.map +1 -0
- package/dist/backends/transformers/stt.js +43 -0
- package/dist/backends/transformers/stt.js.map +1 -0
- package/dist/backends/transformers/tts.d.ts +17 -0
- package/dist/backends/transformers/tts.d.ts.map +1 -0
- package/dist/backends/transformers/tts.js +40 -0
- package/dist/backends/transformers/tts.js.map +1 -0
- package/dist/cache.d.ts +37 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +49 -0
- package/dist/cache.js.map +1 -0
- package/dist/cli.d.ts +11 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +392 -0
- package/dist/cli.js.map +1 -0
- package/dist/client/audio-player.d.ts +45 -0
- package/dist/client/audio-player.d.ts.map +1 -0
- package/dist/client/audio-player.js +90 -0
- package/dist/client/audio-player.js.map +1 -0
- package/dist/client/audio-recorder.d.ts +42 -0
- package/dist/client/audio-recorder.d.ts.map +1 -0
- package/dist/client/audio-recorder.js +128 -0
- package/dist/client/audio-recorder.js.map +1 -0
- package/dist/client/index.d.ts +34 -0
- package/dist/client/index.d.ts.map +1 -0
- package/dist/client/index.js +33 -0
- package/dist/client/index.js.map +1 -0
- package/dist/client/protocol.d.ts +80 -0
- package/dist/client/protocol.d.ts.map +1 -0
- package/dist/client/protocol.js +29 -0
- package/dist/client/protocol.js.map +1 -0
- package/dist/client/voice-client.d.ts +249 -0
- package/dist/client/voice-client.d.ts.map +1 -0
- package/dist/client/voice-client.js +826 -0
- package/dist/client/voice-client.js.map +1 -0
- package/dist/client/web-speech-stt.d.ts +65 -0
- package/dist/client/web-speech-stt.d.ts.map +1 -0
- package/dist/client/web-speech-stt.js +122 -0
- package/dist/client/web-speech-stt.js.map +1 -0
- package/dist/client/web-speech-tts.d.ts +59 -0
- package/dist/client/web-speech-tts.d.ts.map +1 -0
- package/dist/client/web-speech-tts.js +145 -0
- package/dist/client/web-speech-tts.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -0
- package/dist/server/encoding.d.ts +18 -0
- package/dist/server/encoding.d.ts.map +1 -0
- package/dist/server/encoding.js +41 -0
- package/dist/server/encoding.js.map +1 -0
- package/dist/server/handler.d.ts +86 -0
- package/dist/server/handler.d.ts.map +1 -0
- package/dist/server/handler.js +224 -0
- package/dist/server/handler.js.map +1 -0
- package/dist/server/index.d.ts +31 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +32 -0
- package/dist/server/index.js.map +1 -0
- package/dist/services/function-service.d.ts +17 -0
- package/dist/services/function-service.d.ts.map +1 -0
- package/dist/services/function-service.js +82 -0
- package/dist/services/function-service.js.map +1 -0
- package/dist/services/index.d.ts +4 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +3 -0
- package/dist/services/index.js.map +1 -0
- package/dist/services/llm-logger.d.ts +136 -0
- package/dist/services/llm-logger.d.ts.map +1 -0
- package/dist/services/llm-logger.js +275 -0
- package/dist/services/llm-logger.js.map +1 -0
- package/dist/services/text-normalizer.d.ts +17 -0
- package/dist/services/text-normalizer.d.ts.map +1 -0
- package/dist/services/text-normalizer.js +100 -0
- package/dist/services/text-normalizer.js.map +1 -0
- package/dist/types.d.ts +195 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +48 -0
- package/dist/types.js.map +1 -0
- package/dist/voice-pipeline.d.ts +125 -0
- package/dist/voice-pipeline.d.ts.map +1 -0
- package/dist/voice-pipeline.js +390 -0
- package/dist/voice-pipeline.js.map +1 -0
- package/package.json +96 -0
- package/scripts/setup-binaries.sh +159 -0
- package/scripts/setup.sh +201 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Native TTS Pipeline (sherpa-onnx)
|
|
3
|
+
* Server-only - requires sherpa-onnx binary
|
|
4
|
+
*
|
|
5
|
+
* Uses sherpa-onnx-offline-tts which supports Piper ONNX models.
|
|
6
|
+
* See: https://github.com/k2-fsa/sherpa-onnx
|
|
7
|
+
*/
|
|
8
|
+
import { execSync } from 'child_process';
|
|
9
|
+
import { existsSync, readFileSync, unlinkSync } from 'fs';
|
|
10
|
+
import { join } from 'path';
|
|
11
|
+
import { tmpdir } from 'os';
|
|
12
|
+
import { BufferedAudioPlayable } from '../../types';
|
|
13
|
+
export class NativeTTS {
|
|
14
|
+
config;
|
|
15
|
+
ready = false;
|
|
16
|
+
modelPath = '';
|
|
17
|
+
tokensPath = '';
|
|
18
|
+
dataDir = '';
|
|
19
|
+
constructor(config) {
|
|
20
|
+
this.config = {
|
|
21
|
+
speakerId: 0,
|
|
22
|
+
speedScale: 1.0,
|
|
23
|
+
...config,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
async initialize(_onProgress) {
|
|
27
|
+
console.log('Initializing native TTS (sherpa-onnx)...');
|
|
28
|
+
if (!existsSync(this.config.binaryPath)) {
|
|
29
|
+
throw new Error(`sherpa-onnx-offline-tts binary not found at: ${this.config.binaryPath}`);
|
|
30
|
+
}
|
|
31
|
+
if (!existsSync(this.config.modelDir)) {
|
|
32
|
+
throw new Error(`TTS model directory not found at: ${this.config.modelDir}`);
|
|
33
|
+
}
|
|
34
|
+
// Find the model files in the directory
|
|
35
|
+
const modelDir = this.config.modelDir;
|
|
36
|
+
// Look for .onnx file
|
|
37
|
+
const onnxFiles = ['en_US-lessac-medium.onnx', 'model.onnx']
|
|
38
|
+
.map(f => join(modelDir, f))
|
|
39
|
+
.filter(f => existsSync(f));
|
|
40
|
+
if (onnxFiles.length === 0) {
|
|
41
|
+
throw new Error(`No .onnx model file found in: ${modelDir}`);
|
|
42
|
+
}
|
|
43
|
+
this.modelPath = onnxFiles[0];
|
|
44
|
+
// Look for tokens.txt
|
|
45
|
+
this.tokensPath = join(modelDir, 'tokens.txt');
|
|
46
|
+
if (!existsSync(this.tokensPath)) {
|
|
47
|
+
throw new Error(`tokens.txt not found in: ${modelDir}`);
|
|
48
|
+
}
|
|
49
|
+
// Look for espeak-ng-data directory
|
|
50
|
+
this.dataDir = join(modelDir, 'espeak-ng-data');
|
|
51
|
+
if (!existsSync(this.dataDir)) {
|
|
52
|
+
throw new Error(`espeak-ng-data directory not found in: ${modelDir}`);
|
|
53
|
+
}
|
|
54
|
+
this.ready = true;
|
|
55
|
+
console.log('Native TTS (sherpa-onnx) ready.');
|
|
56
|
+
}
|
|
57
|
+
async synthesize(text) {
|
|
58
|
+
if (!this.ready) {
|
|
59
|
+
throw new Error('TTS pipeline not initialized');
|
|
60
|
+
}
|
|
61
|
+
// sherpa-onnx outputs to a file, so we use a temp file
|
|
62
|
+
const tmpFile = join(tmpdir(), `sherpa-tts-${Date.now()}.wav`);
|
|
63
|
+
try {
|
|
64
|
+
const escapedText = text.replace(/'/g, "'\\''");
|
|
65
|
+
execSync(`"${this.config.binaryPath}" ` +
|
|
66
|
+
`--vits-model="${this.modelPath}" ` +
|
|
67
|
+
`--vits-tokens="${this.tokensPath}" ` +
|
|
68
|
+
`--vits-data-dir="${this.dataDir}" ` +
|
|
69
|
+
`--sid=${this.config.speakerId} ` +
|
|
70
|
+
`--output-filename="${tmpFile}" ` +
|
|
71
|
+
`'${escapedText}'`, { stdio: ['pipe', 'pipe', 'pipe'], maxBuffer: 50 * 1024 * 1024 });
|
|
72
|
+
// Read the WAV file and extract PCM data
|
|
73
|
+
const wavBuffer = readFileSync(tmpFile);
|
|
74
|
+
const { audio, sampleRate } = this.parseWav(wavBuffer);
|
|
75
|
+
return new BufferedAudioPlayable(audio, sampleRate);
|
|
76
|
+
}
|
|
77
|
+
finally {
|
|
78
|
+
// Clean up temp file
|
|
79
|
+
if (existsSync(tmpFile)) {
|
|
80
|
+
unlinkSync(tmpFile);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
parseWav(buffer) {
|
|
85
|
+
// Simple WAV parser - assumes 16-bit PCM
|
|
86
|
+
// WAV header is typically 44 bytes
|
|
87
|
+
const dataOffset = buffer.indexOf(Buffer.from('data')) + 8;
|
|
88
|
+
const sampleRate = buffer.readUInt32LE(24);
|
|
89
|
+
const bitsPerSample = buffer.readUInt16LE(34);
|
|
90
|
+
if (bitsPerSample !== 16) {
|
|
91
|
+
throw new Error(`Unsupported bits per sample: ${bitsPerSample}`);
|
|
92
|
+
}
|
|
93
|
+
const pcmData = buffer.subarray(dataOffset);
|
|
94
|
+
const int16 = new Int16Array(pcmData.buffer, pcmData.byteOffset, pcmData.byteLength / 2);
|
|
95
|
+
const float32 = new Float32Array(int16.length);
|
|
96
|
+
for (let i = 0; i < int16.length; i++) {
|
|
97
|
+
float32[i] = int16[i] / 32768.0;
|
|
98
|
+
}
|
|
99
|
+
return { audio: float32, sampleRate };
|
|
100
|
+
}
|
|
101
|
+
isReady() {
|
|
102
|
+
return this.ready;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=tts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tts.js","sourceRoot":"","sources":["../../../src/backends/native/tts.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAC1D,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAE5B,OAAO,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AAEpD,MAAM,OAAO,SAAS;IACZ,MAAM,CAAsB;IAC5B,KAAK,GAAG,KAAK,CAAC;IACd,SAAS,GAAW,EAAE,CAAC;IACvB,UAAU,GAAW,EAAE,CAAC;IACxB,OAAO,GAAW,EAAE,CAAC;IAE7B,YAAY,MAA2B;QACrC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,GAAG;YACf,GAAG,MAAM;SACV,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,WAA8B;QAC7C,OAAO,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC;QAExD,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,gDAAgD,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC;QAC5F,CAAC;QACD,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,qCAAqC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC/E,CAAC;QAED,wCAAwC;QACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC;QAEtC,sBAAsB;QACtB,MAAM,SAAS,GAAG,CAAC,0BAA0B,EAAE,YAAY,CAAC;aACzD,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;aAC3B,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAE9B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,iCAAiC,QAAQ,EAAE,CAAC,CAAC;QAC/D,CAAC;QACD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAE9B,sBAAsB;QACtB,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QAC/C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,EAAE,CAAC,CAAC;QAC1D,CAAC;QAED,oCAAoC;QACpC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QAChD,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,0CAA0C,QAAQ,EAAE,CAAC,CAAC;QACxE,CAAC;QAED,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,OAAO,CAAC,GAAG,CAAC,iCAAiC,CAAC,CAAC;IACjD,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,IAAY;QAC3B,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAClD,CAAC;QAED,uDAAuD;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QAE/D,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAEhD,QAAQ,CACN,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI;gBAC9B,iBAAiB,IAAI,CAAC,SAAS,IAAI;gBACnC,kBAAkB,IAAI,CAAC,UAAU,IAAI;gBACrC,oBAAoB,IAAI,CAAC,OAAO,IAAI;gBACpC,SAAS,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG;gBACjC,sBAAsB,OAAO,IAAI;gBACjC,IAAI,WAAW,GAAG,EAClB,EAAE,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,SAAS,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CACjE,CAAC;YAEF,yCAAyC;YACzC,MAAM,SAAS,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;YACxC,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YAEvD,OAAO,IAAI,qBAAqB,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;QACtD,CAAC;gBAAS,CAAC;YACT,qBAAqB;YACrB,IAAI,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;gBACxB,UAAU,CAAC,OAAO,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;IACH,CAAC;IAEO,QAAQ,CAAC,MAAc;QAC7B,yCAAyC;QACzC,mCAAmC;QACnC,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3D,MAAM,UAAU,GAAG,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;QAC3C,MAAM,aAAa,GAAG,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;QAE9C,IAAI,aAAa,KAAK,EAAE,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,gCAAgC,aAAa,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;QAC5C,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;QACzF,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAE/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,OAAO,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC;QAClC,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;IACxC,CAAC;IAED,OAAO;QACL,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/backends/transformers/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,OAAO,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,MAAM,OAAO,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,MAAM,OAAO,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/backends/transformers/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,OAAO,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,MAAM,OAAO,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,MAAM,OAAO,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transformers.js LLM Pipeline
|
|
3
|
+
* Isomorphic - works in browser (WebGPU) and Node.js
|
|
4
|
+
*
|
|
5
|
+
* Supports any causal LLM model from Hugging Face that works with Transformers.js,
|
|
6
|
+
* including SmolLM, Phi, Qwen, Gemma, and others.
|
|
7
|
+
*
|
|
8
|
+
* Note: This backend does not support native tool calling.
|
|
9
|
+
* When tools are provided, it injects instructions into the system prompt
|
|
10
|
+
* for JSON-based tool calling, parsed by VoicePipeline.
|
|
11
|
+
*/
|
|
12
|
+
import type { LLMPipeline, TransformersLLMConfig, ProgressCallback, Message, LLMGenerateOptions, LLMGenerateResult } from '../../types';
|
|
13
|
+
export declare class TransformersLLM implements LLMPipeline {
|
|
14
|
+
private config;
|
|
15
|
+
private pipe;
|
|
16
|
+
private ready;
|
|
17
|
+
private tracker;
|
|
18
|
+
constructor(config: TransformersLLMConfig);
|
|
19
|
+
initialize(onProgress?: ProgressCallback): Promise<void>;
|
|
20
|
+
supportsTools(): boolean;
|
|
21
|
+
generate(messages: Message[], options?: LLMGenerateOptions): Promise<LLMGenerateResult>;
|
|
22
|
+
/**
|
|
23
|
+
* Build tool instructions to inject into system prompt
|
|
24
|
+
*/
|
|
25
|
+
private buildToolInstructions;
|
|
26
|
+
private formatChatPrompt;
|
|
27
|
+
isReady(): boolean;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=llm.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/backends/transformers/llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,KAAK,EACV,WAAW,EACX,qBAAqB,EACrB,gBAAgB,EAChB,OAAO,EACP,kBAAkB,EAClB,iBAAiB,EAGlB,MAAM,aAAa,CAAC;AAGrB,qBAAa,eAAgB,YAAW,WAAW;IACjD,OAAO,CAAC,MAAM,CAAwB;IAEtC,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAyB;gBAE5B,MAAM,EAAE,qBAAqB;IAKnC,UAAU,CAAC,UAAU,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,aAAa,IAAI,OAAO;IAMlB,QAAQ,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAqC7F;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAyB7B,OAAO,CAAC,gBAAgB;IAwBxB,OAAO,IAAI,OAAO;CAGnB"}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transformers.js LLM Pipeline
|
|
3
|
+
* Isomorphic - works in browser (WebGPU) and Node.js
|
|
4
|
+
*
|
|
5
|
+
* Supports any causal LLM model from Hugging Face that works with Transformers.js,
|
|
6
|
+
* including SmolLM, Phi, Qwen, Gemma, and others.
|
|
7
|
+
*
|
|
8
|
+
* Note: This backend does not support native tool calling.
|
|
9
|
+
* When tools are provided, it injects instructions into the system prompt
|
|
10
|
+
* for JSON-based tool calling, parsed by VoicePipeline.
|
|
11
|
+
*/
|
|
12
|
+
import { pipeline } from '@huggingface/transformers';
|
|
13
|
+
import { LLMLogger, LLMConversationTracker } from '../../services';
|
|
14
|
+
export class TransformersLLM {
|
|
15
|
+
config;
|
|
16
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
17
|
+
pipe = null;
|
|
18
|
+
ready = false;
|
|
19
|
+
tracker;
|
|
20
|
+
constructor(config) {
|
|
21
|
+
this.config = config;
|
|
22
|
+
this.tracker = new LLMConversationTracker(new LLMLogger());
|
|
23
|
+
}
|
|
24
|
+
async initialize(onProgress) {
|
|
25
|
+
console.log(`Loading LLM model (${this.config.model})...`);
|
|
26
|
+
this.pipe = await pipeline('text-generation', this.config.model, {
|
|
27
|
+
dtype: this.config.dtype,
|
|
28
|
+
device: this.config.device,
|
|
29
|
+
progress_callback: onProgress,
|
|
30
|
+
});
|
|
31
|
+
this.ready = true;
|
|
32
|
+
console.log('LLM model loaded.');
|
|
33
|
+
}
|
|
34
|
+
supportsTools() {
|
|
35
|
+
// Transformers backend doesn't support tool calling natively
|
|
36
|
+
// Tools are handled via prompt injection at the VoicePipeline level
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
async generate(messages, options) {
|
|
40
|
+
if (!this.pipe) {
|
|
41
|
+
throw new Error('LLM pipeline not initialized');
|
|
42
|
+
}
|
|
43
|
+
// Use conversation ID if provided, else default
|
|
44
|
+
const conversationId = options?.conversationId ?? 'default';
|
|
45
|
+
// Log input messages
|
|
46
|
+
this.tracker.logInput(conversationId, messages);
|
|
47
|
+
const prompt = this.formatChatPrompt(messages, options?.tools);
|
|
48
|
+
const result = await this.pipe(prompt, {
|
|
49
|
+
max_new_tokens: this.config.maxNewTokens,
|
|
50
|
+
do_sample: true,
|
|
51
|
+
temperature: this.config.temperature,
|
|
52
|
+
return_full_text: false,
|
|
53
|
+
});
|
|
54
|
+
let response = result[0]?.generated_text?.trim() || '';
|
|
55
|
+
response = response.replace(/<\|im_end\|>.*$/s, '').trim();
|
|
56
|
+
// Log the response
|
|
57
|
+
this.tracker.logOutput(conversationId, response);
|
|
58
|
+
// Stream character by character
|
|
59
|
+
for (const char of response) {
|
|
60
|
+
options?.onToken?.(char);
|
|
61
|
+
}
|
|
62
|
+
return {
|
|
63
|
+
content: response,
|
|
64
|
+
finishReason: 'stop',
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Build tool instructions to inject into system prompt
|
|
69
|
+
*/
|
|
70
|
+
buildToolInstructions(tools) {
|
|
71
|
+
const toolsJson = JSON.stringify(tools.map(t => ({
|
|
72
|
+
name: t.name,
|
|
73
|
+
description: t.description,
|
|
74
|
+
parameters: t.parameters,
|
|
75
|
+
})), null, 2);
|
|
76
|
+
return `
|
|
77
|
+
|
|
78
|
+
You have access to tools. When you need to use a tool, respond with ONLY this JSON (no other text before or after):
|
|
79
|
+
{"tool_call": {"name": "tool_name", "arguments": {...}}}
|
|
80
|
+
|
|
81
|
+
Available tools:
|
|
82
|
+
${toolsJson}
|
|
83
|
+
|
|
84
|
+
IMPORTANT:
|
|
85
|
+
- If using a tool, respond ONLY with the JSON tool_call object, nothing else.
|
|
86
|
+
- After you receive a tool result, provide your natural language response to the user.
|
|
87
|
+
- Only use tools when necessary. For simple questions, respond directly.`;
|
|
88
|
+
}
|
|
89
|
+
formatChatPrompt(messages, tools) {
|
|
90
|
+
let prompt = '';
|
|
91
|
+
for (const msg of messages) {
|
|
92
|
+
if (msg.role === 'system') {
|
|
93
|
+
// Inject tool instructions into system message if tools are provided
|
|
94
|
+
const content = tools && tools.length > 0
|
|
95
|
+
? msg.content + this.buildToolInstructions(tools)
|
|
96
|
+
: msg.content;
|
|
97
|
+
prompt += `<|im_start|>system\n${content}<|im_end|>\n`;
|
|
98
|
+
}
|
|
99
|
+
else if (msg.role === 'user') {
|
|
100
|
+
prompt += `<|im_start|>user\n${msg.content}<|im_end|>\n`;
|
|
101
|
+
}
|
|
102
|
+
else if (msg.role === 'assistant') {
|
|
103
|
+
prompt += `<|im_start|>assistant\n${msg.content}<|im_end|>\n`;
|
|
104
|
+
}
|
|
105
|
+
else if (msg.role === 'tool') {
|
|
106
|
+
const toolMsg = msg;
|
|
107
|
+
prompt += `<|im_start|>tool\n[Tool Result: ${toolMsg.toolCallId}]\n${msg.content}<|im_end|>\n`;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
prompt += '<|im_start|>assistant\n';
|
|
111
|
+
return prompt;
|
|
112
|
+
}
|
|
113
|
+
isReady() {
|
|
114
|
+
return this.ready;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
//# sourceMappingURL=llm.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm.js","sourceRoot":"","sources":["../../../src/backends/transformers/llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AAWrD,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAuB,MAAM,gBAAgB,CAAC;AAExF,MAAM,OAAO,eAAe;IAClB,MAAM,CAAwB;IACtC,8DAA8D;IACtD,IAAI,GAAQ,IAAI,CAAC;IACjB,KAAK,GAAG,KAAK,CAAC;IACd,OAAO,CAAyB;IAExC,YAAY,MAA6B;QACvC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,IAAI,sBAAsB,CAAC,IAAI,SAAS,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,UAA6B;QAC5C,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,CAAC,MAAM,CAAC,KAAK,MAAM,CAAC,CAAC;QAE3D,IAAI,CAAC,IAAI,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE;YAC/D,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAsC;YACzD,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;YAC1B,iBAAiB,EAAE,UAAU;SAC9B,CAAC,CAAC;QAEH,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;IAED,aAAa;QACX,6DAA6D;QAC7D,oEAAoE;QACpE,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,QAAmB,EAAE,OAA4B;QAC9D,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAClD,CAAC;QAED,gDAAgD;QAChD,MAAM,cAAc,GAAG,OAAO,EAAE,cAAc,IAAI,SAAS,CAAC;QAE5D,qBAAqB;QACrB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,cAAc,EAAE,QAA4B,CAAC,CAAC;QAEpE,MAAM,MAAM,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;QAE/D,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YACrC,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY;YACxC,SAAS,EAAE,IAAI;YACf,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACpC,gBAAgB,EAAE,KAAK;SACxB,CAAC,CAAC;QAEH,IAAI,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACvD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAE3D,mBAAmB;QACnB,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;QAEjD,gCAAgC;QAChC,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,OAAO,EAAE,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC;QAC3B,CAAC;QAED,OAAO;YACL,OAAO,EAAE,QAAQ;YACjB,YAAY,EAAE,MAAM;SACrB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,KAAuB;QACnD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAC9B,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACd,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,WAAW,EAAE,CAAC,CAAC,WAAW;YAC1B,UAAU,EAAE,CAAC,CAAC,UAAU;SACzB,CAAC,CAAC,EACH,IAAI,EACJ,CAAC,CACF,CAAC;QAEF,OAAO;;;;;;EAMT,SAAS;;;;;yEAK8D,CAAC;IACxE,CAAC;IAEO,gBAAgB,CAAC,QAAmB,EAAE,KAAwB;QACpE,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,qEAAqE;gBACrE,MAAM,OAAO,GAAG,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;oBACvC,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,qBAAqB,CAAC,KAAK,CAAC;oBACjD,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC;gBAChB,MAAM,IAAI,uBAAuB,OAAO,cAAc,CAAC;YACzD,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,qBAAqB,GAAG,CAAC,OAAO,cAAc,CAAC;YAC3D,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,0BAA0B,GAAG,CAAC,OAAO,cAAc,CAAC;YAChE,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,OAAO,GAAG,GAAkB,CAAC;gBACnC,MAAM,IAAI,mCAAmC,OAAO,CAAC,UAAU,MAAM,GAAG,CAAC,OAAO,cAAc,CAAC;YACjG,CAAC;QACH,CAAC;QAED,MAAM,IAAI,yBAAyB,CAAC;QACpC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,OAAO;QACL,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;CACF"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transformers.js STT Pipeline
|
|
3
|
+
* Isomorphic - works in browser (WebGPU) and Node.js
|
|
4
|
+
*
|
|
5
|
+
* Supports Whisper, Moonshine, Wav2Vec2, and other ASR models from Hugging Face.
|
|
6
|
+
*/
|
|
7
|
+
import type { STTPipeline, TransformersSTTConfig, ProgressCallback } from '../../types';
|
|
8
|
+
export declare class TransformersSTT implements STTPipeline {
|
|
9
|
+
private config;
|
|
10
|
+
private pipe;
|
|
11
|
+
private ready;
|
|
12
|
+
constructor(config: TransformersSTTConfig);
|
|
13
|
+
initialize(onProgress?: ProgressCallback): Promise<void>;
|
|
14
|
+
transcribe(audio: Float32Array): Promise<string>;
|
|
15
|
+
isReady(): boolean;
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=stt.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../../src/backends/transformers/stt.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,WAAW,EAAE,qBAAqB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAExF,qBAAa,eAAgB,YAAW,WAAW;IACjD,OAAO,CAAC,MAAM,CAAwB;IAEtC,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,KAAK,CAAS;gBAEV,MAAM,EAAE,qBAAqB;IAInC,UAAU,CAAC,UAAU,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAaxD,UAAU,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC;IAiBtD,OAAO,IAAI,OAAO;CAGnB"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transformers.js STT Pipeline
|
|
3
|
+
* Isomorphic - works in browser (WebGPU) and Node.js
|
|
4
|
+
*
|
|
5
|
+
* Supports Whisper, Moonshine, Wav2Vec2, and other ASR models from Hugging Face.
|
|
6
|
+
*/
|
|
7
|
+
import { pipeline } from '@huggingface/transformers';
|
|
8
|
+
export class TransformersSTT {
|
|
9
|
+
config;
|
|
10
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
11
|
+
pipe = null;
|
|
12
|
+
ready = false;
|
|
13
|
+
constructor(config) {
|
|
14
|
+
this.config = config;
|
|
15
|
+
}
|
|
16
|
+
async initialize(onProgress) {
|
|
17
|
+
console.log(`Loading STT model (${this.config.model})...`);
|
|
18
|
+
this.pipe = await pipeline('automatic-speech-recognition', this.config.model, {
|
|
19
|
+
dtype: this.config.dtype,
|
|
20
|
+
device: this.config.device,
|
|
21
|
+
progress_callback: onProgress,
|
|
22
|
+
});
|
|
23
|
+
this.ready = true;
|
|
24
|
+
console.log('STT model loaded.');
|
|
25
|
+
}
|
|
26
|
+
async transcribe(audio) {
|
|
27
|
+
if (!this.pipe) {
|
|
28
|
+
throw new Error('STT pipeline not initialized');
|
|
29
|
+
}
|
|
30
|
+
const options = this.config.language
|
|
31
|
+
? { language: this.config.language, task: 'transcribe' }
|
|
32
|
+
: {};
|
|
33
|
+
const result = await this.pipe(audio, options);
|
|
34
|
+
if (Array.isArray(result)) {
|
|
35
|
+
return result[0]?.text?.trim() || '';
|
|
36
|
+
}
|
|
37
|
+
return result.text?.trim() || '';
|
|
38
|
+
}
|
|
39
|
+
isReady() {
|
|
40
|
+
return this.ready;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=stt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stt.js","sourceRoot":"","sources":["../../../src/backends/transformers/stt.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AAGrD,MAAM,OAAO,eAAe;IAClB,MAAM,CAAwB;IACtC,8DAA8D;IACtD,IAAI,GAAQ,IAAI,CAAC;IACjB,KAAK,GAAG,KAAK,CAAC;IAEtB,YAAY,MAA6B;QACvC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,UAA6B;QAC5C,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,CAAC,MAAM,CAAC,KAAK,MAAM,CAAC,CAAC;QAE3D,IAAI,CAAC,IAAI,GAAG,MAAM,QAAQ,CAAC,8BAA8B,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE;YAC5E,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAsC;YACzD,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;YAC1B,iBAAiB,EAAE,UAAU;SAC9B,CAAC,CAAC;QAEH,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAmB;QAClC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAClD,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ;YAClC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,EAAE,YAAqB,EAAE;YACjE,CAAC,CAAC,EAAE,CAAC;QAEP,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAE/C,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACvC,CAAC;QACD,OAAQ,MAA2B,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACzD,CAAC;IAED,OAAO;QACL,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;CACF"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transformers.js TTS Pipeline
|
|
3
|
+
* Isomorphic - works in browser (WebGPU) and Node.js
|
|
4
|
+
*
|
|
5
|
+
* Supports SpeechT5 and other TTS models from Hugging Face.
|
|
6
|
+
*/
|
|
7
|
+
import type { TTSPipeline, TransformersTTSConfig, ProgressCallback, AudioPlayable } from '../../types';
|
|
8
|
+
export declare class TransformersTTS implements TTSPipeline {
|
|
9
|
+
private config;
|
|
10
|
+
private pipe;
|
|
11
|
+
private ready;
|
|
12
|
+
constructor(config: TransformersTTSConfig);
|
|
13
|
+
initialize(onProgress?: ProgressCallback): Promise<void>;
|
|
14
|
+
synthesize(text: string): Promise<AudioPlayable>;
|
|
15
|
+
isReady(): boolean;
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=tts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../../src/backends/transformers/tts.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,WAAW,EAAE,qBAAqB,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAGvG,qBAAa,eAAgB,YAAW,WAAW;IACjD,OAAO,CAAC,MAAM,CAAwB;IAEtC,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,KAAK,CAAS;gBAEV,MAAM,EAAE,qBAAqB;IAInC,UAAU,CAAC,UAAU,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAaxD,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IAYtD,OAAO,IAAI,OAAO;CAGnB"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transformers.js TTS Pipeline
|
|
3
|
+
* Isomorphic - works in browser (WebGPU) and Node.js
|
|
4
|
+
*
|
|
5
|
+
* Supports SpeechT5 and other TTS models from Hugging Face.
|
|
6
|
+
*/
|
|
7
|
+
import { pipeline } from '@huggingface/transformers';
|
|
8
|
+
import { BufferedAudioPlayable } from '../../types';
|
|
9
|
+
export class TransformersTTS {
|
|
10
|
+
config;
|
|
11
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
12
|
+
pipe = null;
|
|
13
|
+
ready = false;
|
|
14
|
+
constructor(config) {
|
|
15
|
+
this.config = config;
|
|
16
|
+
}
|
|
17
|
+
async initialize(onProgress) {
|
|
18
|
+
console.log(`Loading TTS model (${this.config.model})...`);
|
|
19
|
+
this.pipe = await pipeline('text-to-speech', this.config.model, {
|
|
20
|
+
dtype: this.config.dtype,
|
|
21
|
+
device: this.config.device,
|
|
22
|
+
progress_callback: onProgress,
|
|
23
|
+
});
|
|
24
|
+
this.ready = true;
|
|
25
|
+
console.log('TTS model loaded.');
|
|
26
|
+
}
|
|
27
|
+
async synthesize(text) {
|
|
28
|
+
if (!this.pipe) {
|
|
29
|
+
throw new Error('TTS pipeline not initialized');
|
|
30
|
+
}
|
|
31
|
+
const result = await this.pipe(text, {
|
|
32
|
+
speaker_embeddings: this.config.speakerEmbeddings,
|
|
33
|
+
});
|
|
34
|
+
return new BufferedAudioPlayable(result.audio, result.sampling_rate);
|
|
35
|
+
}
|
|
36
|
+
isReady() {
|
|
37
|
+
return this.ready;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=tts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tts.js","sourceRoot":"","sources":["../../../src/backends/transformers/tts.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AAErD,OAAO,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AAEpD,MAAM,OAAO,eAAe;IAClB,MAAM,CAAwB;IACtC,8DAA8D;IACtD,IAAI,GAAQ,IAAI,CAAC;IACjB,KAAK,GAAG,KAAK,CAAC;IAEtB,YAAY,MAA6B;QACvC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,UAA6B;QAC5C,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,CAAC,MAAM,CAAC,KAAK,MAAM,CAAC,CAAC;QAE3D,IAAI,CAAC,IAAI,GAAG,MAAM,QAAQ,CAAC,gBAAgB,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE;YAC9D,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAsC;YACzD,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;YAC1B,iBAAiB,EAAE,UAAU;SAC9B,CAAC,CAAC;QAEH,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,IAAY;QAC3B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAClD,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,iBAAiB;SAClD,CAAC,CAAC;QAEH,OAAO,IAAI,qBAAqB,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC;IACvE,CAAC;IAED,OAAO;QACL,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;CACF"}
|
package/dist/cache.d.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cache utilities for modular-voice-agent-sdk
|
|
3
|
+
* Models and binaries are stored in ~/.cache/mvas/ by default
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Get the cache directory for mvas assets.
|
|
7
|
+
* Default: ~/.cache/mvas
|
|
8
|
+
* Override with MVAS_CACHE environment variable.
|
|
9
|
+
*/
|
|
10
|
+
export declare function getCacheDir(): string;
|
|
11
|
+
/**
|
|
12
|
+
* Get the path to the models directory
|
|
13
|
+
*/
|
|
14
|
+
export declare function getModelsDir(): string;
|
|
15
|
+
/**
|
|
16
|
+
* Get the path to the binaries directory
|
|
17
|
+
*/
|
|
18
|
+
export declare function getBinDir(): string;
|
|
19
|
+
/**
|
|
20
|
+
* Get the full path to a model file in the cache.
|
|
21
|
+
* @param filename - The model filename (e.g., 'whisper-large-v3-turbo-q8.bin')
|
|
22
|
+
*/
|
|
23
|
+
export declare function getModelPath(filename: string): string;
|
|
24
|
+
/**
|
|
25
|
+
* Get the full path to a binary in the cache.
|
|
26
|
+
* @param name - The binary name (e.g., 'whisper-cli', 'llama-completion')
|
|
27
|
+
*/
|
|
28
|
+
export declare function getBinaryPath(name: string): string;
|
|
29
|
+
/**
|
|
30
|
+
* Default binary names for native backends.
|
|
31
|
+
*/
|
|
32
|
+
export declare const defaultBinaries: {
|
|
33
|
+
whisperCli: string;
|
|
34
|
+
llamaCompletion: string;
|
|
35
|
+
sherpaOnnxTts: string;
|
|
36
|
+
};
|
|
37
|
+
//# sourceMappingURL=cache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH;;;;GAIG;AACH,wBAAgB,WAAW,IAAI,MAAM,CAEpC;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;GAEG;AACH,wBAAgB,SAAS,IAAI,MAAM,CAElC;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAErD;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAElD;AAED;;GAEG;AACH,eAAO,MAAM,eAAe;;;;CAI3B,CAAC"}
|
package/dist/cache.js
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cache utilities for modular-voice-agent-sdk
|
|
3
|
+
* Models and binaries are stored in ~/.cache/mvas/ by default
|
|
4
|
+
*/
|
|
5
|
+
import { homedir } from 'os';
|
|
6
|
+
import { join } from 'path';
|
|
7
|
+
/**
|
|
8
|
+
* Get the cache directory for mvas assets.
|
|
9
|
+
* Default: ~/.cache/mvas
|
|
10
|
+
* Override with MVAS_CACHE environment variable.
|
|
11
|
+
*/
|
|
12
|
+
export function getCacheDir() {
|
|
13
|
+
return process.env.MVAS_CACHE || join(homedir(), '.cache', 'mvas');
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Get the path to the models directory
|
|
17
|
+
*/
|
|
18
|
+
export function getModelsDir() {
|
|
19
|
+
return join(getCacheDir(), 'models');
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Get the path to the binaries directory
|
|
23
|
+
*/
|
|
24
|
+
export function getBinDir() {
|
|
25
|
+
return join(getCacheDir(), 'bin');
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Get the full path to a model file in the cache.
|
|
29
|
+
* @param filename - The model filename (e.g., 'whisper-large-v3-turbo-q8.bin')
|
|
30
|
+
*/
|
|
31
|
+
export function getModelPath(filename) {
|
|
32
|
+
return join(getModelsDir(), filename);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Get the full path to a binary in the cache.
|
|
36
|
+
* @param name - The binary name (e.g., 'whisper-cli', 'llama-completion')
|
|
37
|
+
*/
|
|
38
|
+
export function getBinaryPath(name) {
|
|
39
|
+
return join(getBinDir(), name);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Default binary names for native backends.
|
|
43
|
+
*/
|
|
44
|
+
export const defaultBinaries = {
|
|
45
|
+
whisperCli: 'whisper-cli',
|
|
46
|
+
llamaCompletion: 'llama-completion',
|
|
47
|
+
sherpaOnnxTts: 'sherpa-onnx-offline-tts',
|
|
48
|
+
};
|
|
49
|
+
//# sourceMappingURL=cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAC7B,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAE5B;;;;GAIG;AACH,MAAM,UAAU,WAAW;IACzB,OAAO,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;AACrE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY;IAC1B,OAAO,IAAI,CAAC,WAAW,EAAE,EAAE,QAAQ,CAAC,CAAC;AACvC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS;IACvB,OAAO,IAAI,CAAC,WAAW,EAAE,EAAE,KAAK,CAAC,CAAC;AACpC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,QAAgB;IAC3C,OAAO,IAAI,CAAC,YAAY,EAAE,EAAE,QAAQ,CAAC,CAAC;AACxC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,OAAO,IAAI,CAAC,SAAS,EAAE,EAAE,IAAI,CAAC,CAAC;AACjC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG;IAC7B,UAAU,EAAE,aAAa;IACzB,eAAe,EAAE,kBAAkB;IACnC,aAAa,EAAE,yBAAyB;CACzC,CAAC"}
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Modular Voice Agent SDK CLI
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* npx mvas setup <config.json> - Download models from config file
|
|
7
|
+
* npx mvas setup --binaries-only - Set up native binaries only
|
|
8
|
+
* npx mvas help - Show help
|
|
9
|
+
*/
|
|
10
|
+
export {};
|
|
11
|
+
//# sourceMappingURL=cli.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA;;;;;;;GAOG"}
|