@actrun_ai/tastekit-voice 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/elevenlabs-smoke.d.ts +7 -0
- package/dist/__tests__/elevenlabs-smoke.d.ts.map +1 -0
- package/dist/__tests__/elevenlabs-smoke.js +157 -0
- package/dist/__tests__/elevenlabs-smoke.js.map +1 -0
- package/dist/audio/player.d.ts +27 -0
- package/dist/audio/player.d.ts.map +1 -0
- package/dist/audio/player.js +69 -0
- package/dist/audio/player.js.map +1 -0
- package/dist/audio/recorder.d.ts +22 -0
- package/dist/audio/recorder.d.ts.map +1 -0
- package/dist/audio/recorder.js +45 -0
- package/dist/audio/recorder.js.map +1 -0
- package/dist/config.d.ts +99 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +36 -0
- package/dist/config.js.map +1 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -0
- package/dist/resolve.d.ts +16 -0
- package/dist/resolve.d.ts.map +1 -0
- package/dist/resolve.js +77 -0
- package/dist/resolve.js.map +1 -0
- package/dist/stt/elevenlabs-stt.d.ts +25 -0
- package/dist/stt/elevenlabs-stt.d.ts.map +1 -0
- package/dist/stt/elevenlabs-stt.js +140 -0
- package/dist/stt/elevenlabs-stt.js.map +1 -0
- package/dist/stt/provider.d.ts +25 -0
- package/dist/stt/provider.d.ts.map +1 -0
- package/dist/stt/provider.js +8 -0
- package/dist/stt/provider.js.map +1 -0
- package/dist/stt/whisper-stt.d.ts +27 -0
- package/dist/stt/whisper-stt.d.ts.map +1 -0
- package/dist/stt/whisper-stt.js +95 -0
- package/dist/stt/whisper-stt.js.map +1 -0
- package/dist/tts/elevenlabs-tts.d.ts +24 -0
- package/dist/tts/elevenlabs-tts.d.ts.map +1 -0
- package/dist/tts/elevenlabs-tts.js +67 -0
- package/dist/tts/elevenlabs-tts.js.map +1 -0
- package/dist/tts/piper-tts.d.ts +23 -0
- package/dist/tts/piper-tts.d.ts.map +1 -0
- package/dist/tts/piper-tts.js +59 -0
- package/dist/tts/piper-tts.js.map +1 -0
- package/dist/tts/provider.d.ts +17 -0
- package/dist/tts/provider.d.ts.map +1 -0
- package/dist/tts/provider.js +8 -0
- package/dist/tts/provider.js.map +1 -0
- package/dist/voice-io.d.ts +48 -0
- package/dist/voice-io.d.ts.map +1 -0
- package/dist/voice-io.js +110 -0
- package/dist/voice-io.js.map +1 -0
- package/package.json +47 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"elevenlabs-smoke.d.ts","sourceRoot":"","sources":["../../src/__tests__/elevenlabs-smoke.ts"],"names":[],"mappings":";AACA;;;GAGG"}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* ElevenLabs smoke test — validates API key, TTS, and STT round-trip.
|
|
4
|
+
* Run: ELEVENLABS_API_KEY="your-key" npx tsx packages/voice/src/__tests__/elevenlabs-smoke.ts
|
|
5
|
+
*/
|
|
6
|
+
const apiKey = process.env.ELEVENLABS_API_KEY;
|
|
7
|
+
if (!apiKey) {
|
|
8
|
+
console.error('❌ ELEVENLABS_API_KEY not set');
|
|
9
|
+
process.exit(1);
|
|
10
|
+
}
|
|
11
|
+
async function collectStream(stream) {
|
|
12
|
+
const chunks = [];
|
|
13
|
+
for await (const chunk of stream) {
|
|
14
|
+
chunks.push(chunk);
|
|
15
|
+
}
|
|
16
|
+
return Buffer.concat(chunks);
|
|
17
|
+
}
|
|
18
|
+
async function testTTS() {
|
|
19
|
+
console.log('\n── TTS Test ──────────────────────────────');
|
|
20
|
+
// Longer text so VAD has enough speech + silence gap to commit
|
|
21
|
+
const testText = 'Hello, welcome to TasteKit! This is a voice system smoke test. We are verifying that text to speech and speech to text both work correctly with the ElevenLabs API.';
|
|
22
|
+
console.log(`Synthesizing: "${testText}"`);
|
|
23
|
+
// Request PCM at 16kHz directly so STT can consume it without downsampling
|
|
24
|
+
const start = Date.now();
|
|
25
|
+
// output_format is a QUERY parameter, not a body parameter
|
|
26
|
+
const url = `https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM/stream?output_format=pcm_16000`;
|
|
27
|
+
const response = await fetch(url, {
|
|
28
|
+
method: 'POST',
|
|
29
|
+
headers: {
|
|
30
|
+
'Content-Type': 'application/json',
|
|
31
|
+
'xi-api-key': apiKey,
|
|
32
|
+
},
|
|
33
|
+
body: JSON.stringify({
|
|
34
|
+
text: testText,
|
|
35
|
+
model_id: 'eleven_multilingual_v2',
|
|
36
|
+
voice_settings: {
|
|
37
|
+
stability: 0.5,
|
|
38
|
+
similarity_boost: 0.75,
|
|
39
|
+
style: 0.0,
|
|
40
|
+
use_speaker_boost: true,
|
|
41
|
+
},
|
|
42
|
+
}),
|
|
43
|
+
});
|
|
44
|
+
if (!response.ok) {
|
|
45
|
+
const errorText = await response.text();
|
|
46
|
+
throw new Error(`TTS error (${response.status}): ${errorText}`);
|
|
47
|
+
}
|
|
48
|
+
const audio = Buffer.from(await response.arrayBuffer());
|
|
49
|
+
const elapsed = Date.now() - start;
|
|
50
|
+
console.log(`✅ TTS OK — ${audio.length} bytes of PCM audio in ${elapsed}ms`);
|
|
51
|
+
console.log(` Sample rate: 16000Hz, 16-bit signed, mono`);
|
|
52
|
+
console.log(` Duration: ~${(audio.length / (16000 * 2)).toFixed(1)}s`);
|
|
53
|
+
return audio;
|
|
54
|
+
}
|
|
55
|
+
async function testSTT(pcmAudio) {
|
|
56
|
+
console.log('\n── STT Test (direct WebSocket) ──────────');
|
|
57
|
+
// TTS output is already 16kHz PCM — no downsampling needed
|
|
58
|
+
const downsampled = pcmAudio;
|
|
59
|
+
console.log(`Audio: ${downsampled.length} bytes, ~${(downsampled.length / (16000 * 2)).toFixed(1)}s`);
|
|
60
|
+
// Direct WebSocket test with full debug logging
|
|
61
|
+
const WebSocket = (await import('ws')).default;
|
|
62
|
+
const sampleRate = 16000;
|
|
63
|
+
const url = `wss://api.elevenlabs.io/v1/speech-to-text/realtime?model_id=scribe_v2_realtime&language_code=en&audio_format=pcm_${sampleRate}`;
|
|
64
|
+
console.log(`Connecting to: ${url.replace(/\?.*/, '?...')}`);
|
|
65
|
+
const ws = new WebSocket(url, {
|
|
66
|
+
headers: { 'xi-api-key': apiKey },
|
|
67
|
+
});
|
|
68
|
+
let finalText = '';
|
|
69
|
+
const start = Date.now();
|
|
70
|
+
ws.on('open', () => {
|
|
71
|
+
console.log(` [ws] Connected (${Date.now() - start}ms)`);
|
|
72
|
+
// Send audio in 1-second chunks (32000 bytes at 16kHz 16-bit) with real-time pacing
|
|
73
|
+
const chunkSize = 32000; // 1 second of 16kHz 16-bit mono
|
|
74
|
+
let offset = 0;
|
|
75
|
+
const sendNext = () => {
|
|
76
|
+
if (offset < downsampled.length && ws.readyState === WebSocket.OPEN) {
|
|
77
|
+
const chunk = downsampled.subarray(offset, Math.min(offset + chunkSize, downsampled.length));
|
|
78
|
+
ws.send(JSON.stringify({
|
|
79
|
+
message_type: 'input_audio_chunk',
|
|
80
|
+
audio_base_64: chunk.toString('base64'),
|
|
81
|
+
sample_rate: sampleRate,
|
|
82
|
+
commit: false,
|
|
83
|
+
}));
|
|
84
|
+
offset += chunkSize;
|
|
85
|
+
// Pace at ~1 chunk per second (real-time) but slightly faster
|
|
86
|
+
setTimeout(sendNext, 500);
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
console.log(` [ws] Audio sent (${offset} bytes, ${Date.now() - start}ms)`);
|
|
90
|
+
// Small delay before committing to let last chunk process
|
|
91
|
+
setTimeout(() => {
|
|
92
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
93
|
+
console.log(` [ws] Committing (${Date.now() - start}ms)`);
|
|
94
|
+
ws.send(JSON.stringify({
|
|
95
|
+
message_type: 'input_audio_chunk',
|
|
96
|
+
audio_base_64: '',
|
|
97
|
+
sample_rate: sampleRate,
|
|
98
|
+
commit: true,
|
|
99
|
+
}));
|
|
100
|
+
// Wait for response before closing
|
|
101
|
+
setTimeout(() => {
|
|
102
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
103
|
+
console.log(` [ws] Timeout — closing (${Date.now() - start}ms)`);
|
|
104
|
+
ws.close();
|
|
105
|
+
}
|
|
106
|
+
}, 5000);
|
|
107
|
+
}
|
|
108
|
+
}, 1000);
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
sendNext();
|
|
112
|
+
});
|
|
113
|
+
ws.on('message', (data) => {
|
|
114
|
+
const msg = JSON.parse(data.toString());
|
|
115
|
+
const ts = Date.now() - start;
|
|
116
|
+
// Log every message type for debugging
|
|
117
|
+
console.log(` [${ts}ms] ${msg.message_type}: ${JSON.stringify(msg).slice(0, 200)}`);
|
|
118
|
+
if (msg.message_type === 'committed_transcript' && msg.text) {
|
|
119
|
+
finalText = msg.text;
|
|
120
|
+
ws.close();
|
|
121
|
+
}
|
|
122
|
+
});
|
|
123
|
+
await new Promise((resolve) => {
|
|
124
|
+
ws.on('close', (code, reason) => {
|
|
125
|
+
console.log(` [ws] Closed: code=${code} reason="${reason}" (${Date.now() - start}ms)`);
|
|
126
|
+
resolve();
|
|
127
|
+
});
|
|
128
|
+
ws.on('error', (err) => {
|
|
129
|
+
console.error(` [ws] Error: ${err.message}`);
|
|
130
|
+
resolve();
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
if (finalText) {
|
|
134
|
+
console.log(`\n✅ STT OK — "${finalText}"`);
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
console.log(`\n⚠️ STT returned no final transcript`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
async function main() {
|
|
141
|
+
console.log('🔑 ElevenLabs Smoke Test');
|
|
142
|
+
console.log(` API Key: ${apiKey.slice(0, 6)}...${apiKey.slice(-4)}`);
|
|
143
|
+
try {
|
|
144
|
+
// Test 1: TTS
|
|
145
|
+
const audio = await testTTS();
|
|
146
|
+
// Test 2: STT (round-trip — feed TTS output back)
|
|
147
|
+
await testSTT(audio);
|
|
148
|
+
console.log('\n✅ All ElevenLabs tests passed!\n');
|
|
149
|
+
}
|
|
150
|
+
catch (err) {
|
|
151
|
+
console.error('\n❌ Test failed:', err);
|
|
152
|
+
process.exit(1);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
main();
|
|
156
|
+
export {};
|
|
157
|
+
//# sourceMappingURL=elevenlabs-smoke.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"elevenlabs-smoke.js","sourceRoot":"","sources":["../../src/__tests__/elevenlabs-smoke.ts"],"names":[],"mappings":";AACA;;;GAGG;AAKH,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC;AAC9C,IAAI,CAAC,MAAM,EAAE,CAAC;IACZ,OAAO,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,MAA6B;IACxD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACrB,CAAC;IACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,KAAK,UAAU,OAAO;IACpB,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;IAE5D,+DAA+D;IAC/D,MAAM,QAAQ,GAAG,qKAAqK,CAAC;IACvL,OAAO,CAAC,GAAG,CAAC,kBAAkB,QAAQ,GAAG,CAAC,CAAC;IAE3C,2EAA2E;IAC3E,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,2DAA2D;IAC3D,MAAM,GAAG,GAAG,iGAAiG,CAAC;IAC9G,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAChC,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACP,cAAc,EAAE,kBAAkB;YAClC,YAAY,EAAE,MAAO;SACtB;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACnB,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,wBAAwB;YAClC,cAAc,EAAE;gBACd,SAAS,EAAE,GAAG;gBACd,gBAAgB,EAAE,IAAI;gBACtB,KAAK,EAAE,GAAG;gBACV,iBAAiB,EAAE,IAAI;aACxB;SACF,CAAC;KACH,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,cAAc,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CAAC,CAAC;IAClE,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;IACxD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;IAEnC,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,CAAC,MAAM,0BAA0B,OAAO,IAAI,CAAC,CAAC;IAC7E,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;IAC5D,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAEzE,OAAO,KAAK,CAAC;AACf,CAAC;AAED,KAAK,UAAU,OAAO,CAAC,QAAgB;IACrC,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;IAC3D,2DAA2D;IAC3D,MAAM,WAAW,GAAG,QAAQ,CAAC;IAC7B,OAAO,CAAC,GAAG,CAAC,UAAU,WAAW,CAAC,MAAM,YAAY,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAEtG,gDAAgD;IAChD,MAAM,SAAS,GAAG,CAAC,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC;IAE/C,MAAM,UAAU,GAAG,KAAK,CAAC;IACzB,MAAM,GAAG,GAAG,oHAAoH,UAAU,EAAE,CAAC;IAE7I,OAAO,CAAC,GAAG,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;IAE7D,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC,GAAG,EAAE;QAC5B,OAAO,EAAE,EAAE,YAAY,EAAE,MAAO,EAAE;KACnC,CAAC,CAAC;IAEH,IAAI,SAAS,GAAG,EAAE,CAAC;IACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;QACjB,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,KAAK,CAAC,CAAC;QAE1D,oFAAoF;QACpF,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,gCAAgC;QACzD,IAAI,MAAM,GAAG,CAAC,CAAC;QAEf,MAAM,QAAQ,GAAG,GAAG,EAAE;YACpB,IAAI,MAAM,GAAG,WAAW,CAAC,MAAM,IAAI,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;gBACpE,MAAM,KAAK,GAAG,WAAW,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,SAAS,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC;gBAC7F,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC;oBACrB,YAAY,EAAE,mBAAmB;oBACjC,aAAa,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;oBACvC,WAAW,EAAE,UAAU;oBACvB,MAAM,EAAE,KAAK;iBACd,CAAC,CAAC,CAAC;gBACJ,MAAM,IAAI,SAAS,CAAC;gBACpB,8DAA8D;gBAC9D,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;YAC5B,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,sBAAsB,MAAM,WAAW,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,KAAK,CAAC,CAAC;gBAE5E,0DAA0D;gBAC1D,UAAU,CAAC,GAAG,EAAE;oBACd,IAAI,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;wBACrC,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,KAAK,CAAC,CAAC;wBAC3D,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC;4BACrB,YAAY,EAAE,mBAAmB;4BACjC,aAAa,EAAE,EAAE;4BACjB,WAAW,EAAE,UAAU;4BACvB,MAAM,EAAE,IAAI;yBACb,CAAC,CAAC,CAAC;wBACJ,mCAAmC;wBACnC,UAAU,CAAC,GAAG,EAAE;4BACd,IAAI,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;gCACrC,OAAO,CAAC,GAAG,CAAC,6BAA6B,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,KAAK,CAAC,CAAC;gCAClE,EAAE,CAAC,KAAK,EAAE,CAAC;4BACb,CAAC;wBACH,CAAC,EAAE,IAAI,CAAC,CAAC;oBACX,CAAC;gBACH,CAAC,EAAE,IAAI,CAAC,CAAC;YACX,CAAC;QACH,CAAC,CAAC;QACF,QAAQ,EAAE,CAAC;IACb,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE;QACxB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QACxC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAC9B,uCAAuC;QACvC,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,GAAG,CAAC,YAAY,KAAK,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACrF,IAAI,GAAG,CAAC,YAAY,KAAK,sBAAsB,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;YAC5D,SAAS,GAAG,GAAG,CAAC,IAAI,CAAC;YACrB,EAAE,CAAC,KAAK,EAAE,CAAC;QACb,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;QAClC,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;YAC9B,OAAO,CAAC,GAAG,CAAC,uBAAuB,IAAI,YAAY,MAAM,MAAM,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,KAAK,CAAC,CAAC;YACxF,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACrB,OAAO,CAAC,KAAK,CAAC,iBAAiB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;YAC9C,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,iBAAiB,SAAS,GAAG,CAAC,CAAC;IAC7C,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;IACxD,CAAC;AACH,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;IACxC,OAAO,CAAC,GAAG,CAAC,eAAe,MAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,MAAM,MAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAEzE,IAAI,CAAC;QACH,cAAc;QACd,MAAM,KAAK,GAAG,MAAM,OAAO,EAAE,CAAC;QAE9B,kDAAkD;QAClD,MAAM,OAAO,CAAC,KAAK,CAAC,CAAC;QAErB,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;IACpD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio player for TTS output.
|
|
3
|
+
*
|
|
4
|
+
* Supports two modes:
|
|
5
|
+
* - Streaming: pipes raw PCM to `speaker` for low-latency playback
|
|
6
|
+
* - File: plays a WAV/MP3 file via `play-sound`
|
|
7
|
+
*/
|
|
8
|
+
export declare class AudioPlayer {
|
|
9
|
+
private activeSpeaker;
|
|
10
|
+
/**
|
|
11
|
+
* Play an audio stream (raw PCM) through the speaker.
|
|
12
|
+
* Resolves when playback completes.
|
|
13
|
+
*/
|
|
14
|
+
playStream(chunks: AsyncIterable<Buffer>, options?: {
|
|
15
|
+
sampleRate?: number;
|
|
16
|
+
channels?: number;
|
|
17
|
+
bitDepth?: number;
|
|
18
|
+
}): Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* Play an audio file (WAV, MP3) through the system player.
|
|
21
|
+
* Fallback for when streaming isn't available.
|
|
22
|
+
*/
|
|
23
|
+
playFile(filePath: string): Promise<void>;
|
|
24
|
+
/** Stop any active playback. */
|
|
25
|
+
stop(): void;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=player.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"player.d.ts","sourceRoot":"","sources":["../../src/audio/player.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,aAAa,CAAyB;IAE9C;;;OAGG;IACG,UAAU,CACd,MAAM,EAAE,aAAa,CAAC,MAAM,CAAC,EAC7B,OAAO,CAAC,EAAE;QAAE,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,GACtE,OAAO,CAAC,IAAI,CAAC;IAkChB;;;OAGG;IACG,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY/C,gCAAgC;IAChC,IAAI,IAAI,IAAI;CAMb"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio player for TTS output.
|
|
3
|
+
*
|
|
4
|
+
* Supports two modes:
|
|
5
|
+
* - Streaming: pipes raw PCM to `speaker` for low-latency playback
|
|
6
|
+
* - File: plays a WAV/MP3 file via `play-sound`
|
|
7
|
+
*/
|
|
8
|
+
export class AudioPlayer {
|
|
9
|
+
activeSpeaker = null;
|
|
10
|
+
/**
|
|
11
|
+
* Play an audio stream (raw PCM) through the speaker.
|
|
12
|
+
* Resolves when playback completes.
|
|
13
|
+
*/
|
|
14
|
+
async playStream(chunks, options) {
|
|
15
|
+
const SpeakerModule = await import('speaker');
|
|
16
|
+
const Speaker = SpeakerModule.default ?? SpeakerModule;
|
|
17
|
+
const speaker = new Speaker({
|
|
18
|
+
sampleRate: options?.sampleRate ?? 22050,
|
|
19
|
+
channels: options?.channels ?? 1,
|
|
20
|
+
bitDepth: options?.bitDepth ?? 16,
|
|
21
|
+
});
|
|
22
|
+
this.activeSpeaker = speaker;
|
|
23
|
+
return new Promise((resolve, reject) => {
|
|
24
|
+
speaker.on('close', resolve);
|
|
25
|
+
speaker.on('error', reject);
|
|
26
|
+
(async () => {
|
|
27
|
+
try {
|
|
28
|
+
for await (const chunk of chunks) {
|
|
29
|
+
if (!speaker.writable)
|
|
30
|
+
break;
|
|
31
|
+
const canContinue = speaker.write(chunk);
|
|
32
|
+
if (!canContinue) {
|
|
33
|
+
await new Promise((r) => speaker.once('drain', r));
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
speaker.end();
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
speaker.destroy();
|
|
40
|
+
reject(err);
|
|
41
|
+
}
|
|
42
|
+
})();
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Play an audio file (WAV, MP3) through the system player.
|
|
47
|
+
* Fallback for when streaming isn't available.
|
|
48
|
+
*/
|
|
49
|
+
async playFile(filePath) {
|
|
50
|
+
const playSound = await import('play-sound');
|
|
51
|
+
const player = (playSound.default ?? playSound)({});
|
|
52
|
+
return new Promise((resolve, reject) => {
|
|
53
|
+
player.play(filePath, (err) => {
|
|
54
|
+
if (err)
|
|
55
|
+
reject(err);
|
|
56
|
+
else
|
|
57
|
+
resolve();
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
/** Stop any active playback. */
|
|
62
|
+
stop() {
|
|
63
|
+
if (this.activeSpeaker) {
|
|
64
|
+
this.activeSpeaker.destroy();
|
|
65
|
+
this.activeSpeaker = null;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=player.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"player.js","sourceRoot":"","sources":["../../src/audio/player.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,MAAM,OAAO,WAAW;IACd,aAAa,GAAoB,IAAI,CAAC;IAE9C;;;OAGG;IACH,KAAK,CAAC,UAAU,CACd,MAA6B,EAC7B,OAAuE;QAEvE,MAAM,aAAa,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;QAC9C,MAAM,OAAO,GAAG,aAAa,CAAC,OAAO,IAAI,aAAa,CAAC;QAEvD,MAAM,OAAO,GAAG,IAAI,OAAO,CAAC;YAC1B,UAAU,EAAE,OAAO,EAAE,UAAU,IAAI,KAAK;YACxC,QAAQ,EAAE,OAAO,EAAE,QAAQ,IAAI,CAAC;YAChC,QAAQ,EAAE,OAAO,EAAE,QAAQ,IAAI,EAAE;SAClC,CAAC,CAAC;QAEH,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC;QAE7B,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC3C,OAAO,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAC7B,OAAO,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAE5B,CAAC,KAAK,IAAI,EAAE;gBACV,IAAI,CAAC;oBACH,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;wBACjC,IAAI,CAAC,OAAO,CAAC,QAAQ;4BAAE,MAAM;wBAC7B,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;wBACzC,IAAI,CAAC,WAAW,EAAE,CAAC;4BACjB,MAAM,IAAI,OAAO,CAAO,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;wBAC3D,CAAC;oBACH,CAAC;oBACD,OAAO,CAAC,GAAG,EAAE,CAAC;gBAChB,CAAC;gBAAC,OAAO,GAAG,EAAE,CAAC;oBACb,OAAO,CAAC,OAAO,EAAE,CAAC;oBAClB,MAAM,CAAC,GAAG,CAAC,CAAC;gBACd,CAAC;YACH,CAAC,CAAC,EAAE,CAAC;QACP,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CAAC,QAAgB;QAC7B,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,CAAC,SAAS,CAAC,OAAO,IAAI,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC;QAEpD,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC3C,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,GAAiB,EAAE,EAAE;gBAC1C,IAAI,GAAG;oBAAE,MAAM,CAAC,GAAG,CAAC,CAAC;;oBAChB,OAAO,EAAE,CAAC;YACjB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED,gCAAgC;IAChC,IAAI;QACF,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;QAC5B,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Microphone recorder producing raw PCM audio chunks.
|
|
3
|
+
*
|
|
4
|
+
* Wraps `node-record-lpcm16` (requires `sox` on macOS, `arecord` on Linux).
|
|
5
|
+
* Output: 16-bit signed integer, 16kHz, mono.
|
|
6
|
+
*/
|
|
7
|
+
export declare class MicRecorder {
|
|
8
|
+
private recording;
|
|
9
|
+
private stream;
|
|
10
|
+
readonly sampleRate: number;
|
|
11
|
+
constructor(options?: {
|
|
12
|
+
sampleRate?: number;
|
|
13
|
+
});
|
|
14
|
+
/**
|
|
15
|
+
* Start recording from the default microphone.
|
|
16
|
+
* Returns an AsyncIterable of PCM audio buffers.
|
|
17
|
+
*/
|
|
18
|
+
start(): AsyncIterable<Buffer>;
|
|
19
|
+
/** Stop recording and release the microphone. */
|
|
20
|
+
stop(): void;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=recorder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recorder.d.ts","sourceRoot":"","sources":["../../src/audio/recorder.ts"],"names":[],"mappings":"AAGA;;;;;GAKG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,SAAS,CAA0B;IAC3C,OAAO,CAAC,MAAM,CAAyB;IAEvC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,OAAO,CAAC,EAAE;QAAE,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE;IAI7C;;;OAGG;IACI,KAAK,IAAI,aAAa,CAAC,MAAM,CAAC;IAmBrC,iDAAiD;IACjD,IAAI,IAAI,IAAI;CAUb"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Microphone recorder producing raw PCM audio chunks.
|
|
3
|
+
*
|
|
4
|
+
* Wraps `node-record-lpcm16` (requires `sox` on macOS, `arecord` on Linux).
|
|
5
|
+
* Output: 16-bit signed integer, 16kHz, mono.
|
|
6
|
+
*/
|
|
7
|
+
export class MicRecorder {
|
|
8
|
+
recording = null;
|
|
9
|
+
stream = null;
|
|
10
|
+
sampleRate;
|
|
11
|
+
constructor(options) {
|
|
12
|
+
this.sampleRate = options?.sampleRate ?? 16000;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Start recording from the default microphone.
|
|
16
|
+
* Returns an AsyncIterable of PCM audio buffers.
|
|
17
|
+
*/
|
|
18
|
+
async *start() {
|
|
19
|
+
// Dynamic import so the package is truly optional
|
|
20
|
+
const record = await import('node-record-lpcm16');
|
|
21
|
+
this.recording = record.record({
|
|
22
|
+
sampleRate: this.sampleRate,
|
|
23
|
+
channels: 1,
|
|
24
|
+
audioType: 'raw',
|
|
25
|
+
recorder: process.platform === 'darwin' ? 'sox' : 'arecord',
|
|
26
|
+
});
|
|
27
|
+
const stream = this.recording.stream();
|
|
28
|
+
this.stream = stream;
|
|
29
|
+
for await (const chunk of stream) {
|
|
30
|
+
yield Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/** Stop recording and release the microphone. */
|
|
34
|
+
stop() {
|
|
35
|
+
if (this.recording) {
|
|
36
|
+
this.recording.stop();
|
|
37
|
+
this.recording = null;
|
|
38
|
+
}
|
|
39
|
+
if (this.stream) {
|
|
40
|
+
this.stream.destroy();
|
|
41
|
+
this.stream = null;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=recorder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recorder.js","sourceRoot":"","sources":["../../src/audio/recorder.ts"],"names":[],"mappings":"AAGA;;;;;GAKG;AACH,MAAM,OAAO,WAAW;IACd,SAAS,GAAqB,IAAI,CAAC;IACnC,MAAM,GAAoB,IAAI,CAAC;IAE9B,UAAU,CAAS;IAE5B,YAAY,OAAiC;QAC3C,IAAI,CAAC,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,KAAK,CAAC;IACjD,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,CAAC,KAAK;QACV,kDAAkD;QAClD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;QAElD,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC;YAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,QAAQ,EAAE,CAAC;YACX,SAAS,EAAE,KAAK;YAChB,QAAQ,EAAE,OAAO,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;SAC5D,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;QACvC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QAErB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YACjC,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,iDAAiD;IACjD,IAAI;QACF,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;YACtB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACtB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;CACF"}
|
package/dist/config.d.ts
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
/**
|
|
3
|
+
* Voice configuration schema.
|
|
4
|
+
* Stored in tastekit.yaml under the `voice` key.
|
|
5
|
+
* The core WorkspaceConfigSchema uses z.any().optional() for this field —
|
|
6
|
+
* detailed validation happens here at runtime.
|
|
7
|
+
*/
|
|
8
|
+
export declare const VoiceConfigSchema: z.ZodObject<{
|
|
9
|
+
stt: z.ZodDefault<z.ZodObject<{
|
|
10
|
+
provider: z.ZodDefault<z.ZodEnum<["elevenlabs", "whisper"]>>;
|
|
11
|
+
/** Environment variable holding the API key (ElevenLabs) */
|
|
12
|
+
api_key_env: z.ZodDefault<z.ZodString>;
|
|
13
|
+
/** ISO 639-1 language code */
|
|
14
|
+
language: z.ZodDefault<z.ZodString>;
|
|
15
|
+
/** VAD silence threshold in milliseconds (ElevenLabs) */
|
|
16
|
+
vad_silence_threshold_ms: z.ZodDefault<z.ZodNumber>;
|
|
17
|
+
/** Whisper model name (e.g., 'base.en', 'small.en') */
|
|
18
|
+
whisper_model: z.ZodDefault<z.ZodString>;
|
|
19
|
+
/** Path to whisper.cpp binary (local mode) */
|
|
20
|
+
whisper_binary_path: z.ZodOptional<z.ZodString>;
|
|
21
|
+
}, "strip", z.ZodTypeAny, {
|
|
22
|
+
provider: "elevenlabs" | "whisper";
|
|
23
|
+
api_key_env: string;
|
|
24
|
+
language: string;
|
|
25
|
+
vad_silence_threshold_ms: number;
|
|
26
|
+
whisper_model: string;
|
|
27
|
+
whisper_binary_path?: string | undefined;
|
|
28
|
+
}, {
|
|
29
|
+
provider?: "elevenlabs" | "whisper" | undefined;
|
|
30
|
+
api_key_env?: string | undefined;
|
|
31
|
+
language?: string | undefined;
|
|
32
|
+
vad_silence_threshold_ms?: number | undefined;
|
|
33
|
+
whisper_model?: string | undefined;
|
|
34
|
+
whisper_binary_path?: string | undefined;
|
|
35
|
+
}>>;
|
|
36
|
+
tts: z.ZodDefault<z.ZodObject<{
|
|
37
|
+
provider: z.ZodDefault<z.ZodEnum<["elevenlabs", "piper"]>>;
|
|
38
|
+
/** Environment variable holding the API key (ElevenLabs) */
|
|
39
|
+
api_key_env: z.ZodDefault<z.ZodString>;
|
|
40
|
+
/** ElevenLabs voice ID */
|
|
41
|
+
voice_id: z.ZodDefault<z.ZodString>;
|
|
42
|
+
/** ElevenLabs model ID */
|
|
43
|
+
model_id: z.ZodDefault<z.ZodString>;
|
|
44
|
+
/** Path to piper binary (local mode) */
|
|
45
|
+
piper_binary_path: z.ZodOptional<z.ZodString>;
|
|
46
|
+
/** Piper voice model name */
|
|
47
|
+
piper_model: z.ZodOptional<z.ZodString>;
|
|
48
|
+
}, "strip", z.ZodTypeAny, {
|
|
49
|
+
provider: "elevenlabs" | "piper";
|
|
50
|
+
api_key_env: string;
|
|
51
|
+
voice_id: string;
|
|
52
|
+
model_id: string;
|
|
53
|
+
piper_binary_path?: string | undefined;
|
|
54
|
+
piper_model?: string | undefined;
|
|
55
|
+
}, {
|
|
56
|
+
provider?: "elevenlabs" | "piper" | undefined;
|
|
57
|
+
api_key_env?: string | undefined;
|
|
58
|
+
voice_id?: string | undefined;
|
|
59
|
+
model_id?: string | undefined;
|
|
60
|
+
piper_binary_path?: string | undefined;
|
|
61
|
+
piper_model?: string | undefined;
|
|
62
|
+
}>>;
|
|
63
|
+
}, "strip", z.ZodTypeAny, {
|
|
64
|
+
stt: {
|
|
65
|
+
provider: "elevenlabs" | "whisper";
|
|
66
|
+
api_key_env: string;
|
|
67
|
+
language: string;
|
|
68
|
+
vad_silence_threshold_ms: number;
|
|
69
|
+
whisper_model: string;
|
|
70
|
+
whisper_binary_path?: string | undefined;
|
|
71
|
+
};
|
|
72
|
+
tts: {
|
|
73
|
+
provider: "elevenlabs" | "piper";
|
|
74
|
+
api_key_env: string;
|
|
75
|
+
voice_id: string;
|
|
76
|
+
model_id: string;
|
|
77
|
+
piper_binary_path?: string | undefined;
|
|
78
|
+
piper_model?: string | undefined;
|
|
79
|
+
};
|
|
80
|
+
}, {
|
|
81
|
+
stt?: {
|
|
82
|
+
provider?: "elevenlabs" | "whisper" | undefined;
|
|
83
|
+
api_key_env?: string | undefined;
|
|
84
|
+
language?: string | undefined;
|
|
85
|
+
vad_silence_threshold_ms?: number | undefined;
|
|
86
|
+
whisper_model?: string | undefined;
|
|
87
|
+
whisper_binary_path?: string | undefined;
|
|
88
|
+
} | undefined;
|
|
89
|
+
tts?: {
|
|
90
|
+
provider?: "elevenlabs" | "piper" | undefined;
|
|
91
|
+
api_key_env?: string | undefined;
|
|
92
|
+
voice_id?: string | undefined;
|
|
93
|
+
model_id?: string | undefined;
|
|
94
|
+
piper_binary_path?: string | undefined;
|
|
95
|
+
piper_model?: string | undefined;
|
|
96
|
+
} | undefined;
|
|
97
|
+
}>;
|
|
98
|
+
export type VoiceConfig = z.infer<typeof VoiceConfigSchema>;
|
|
99
|
+
//# sourceMappingURL=config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB;;;;;GAKG;AACH,eAAO,MAAM,iBAAiB;;;QAG1B,4DAA4D;;QAE5D,8BAA8B;;QAE9B,yDAAyD;;QAEzD,uDAAuD;;QAEvD,8CAA8C;;;;;;;;;;;;;;;;;;;QAM9C,4DAA4D;;QAE5D,0BAA0B;;QAE1B,0BAA0B;;QAE1B,wCAAwC;;QAExC,6BAA6B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAG/B,CAAC;AAEH,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,iBAAiB,CAAC,CAAC"}
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
/**
|
|
3
|
+
* Voice configuration schema.
|
|
4
|
+
* Stored in tastekit.yaml under the `voice` key.
|
|
5
|
+
* The core WorkspaceConfigSchema uses z.any().optional() for this field —
|
|
6
|
+
* detailed validation happens here at runtime.
|
|
7
|
+
*/
|
|
8
|
+
export const VoiceConfigSchema = z.object({
|
|
9
|
+
stt: z.object({
|
|
10
|
+
provider: z.enum(['elevenlabs', 'whisper']).default('elevenlabs'),
|
|
11
|
+
/** Environment variable holding the API key (ElevenLabs) */
|
|
12
|
+
api_key_env: z.string().default('ELEVENLABS_API_KEY'),
|
|
13
|
+
/** ISO 639-1 language code */
|
|
14
|
+
language: z.string().default('en'),
|
|
15
|
+
/** VAD silence threshold in milliseconds (ElevenLabs) */
|
|
16
|
+
vad_silence_threshold_ms: z.number().default(1500),
|
|
17
|
+
/** Whisper model name (e.g., 'base.en', 'small.en') */
|
|
18
|
+
whisper_model: z.string().default('base.en'),
|
|
19
|
+
/** Path to whisper.cpp binary (local mode) */
|
|
20
|
+
whisper_binary_path: z.string().optional(),
|
|
21
|
+
}).default({}),
|
|
22
|
+
tts: z.object({
|
|
23
|
+
provider: z.enum(['elevenlabs', 'piper']).default('elevenlabs'),
|
|
24
|
+
/** Environment variable holding the API key (ElevenLabs) */
|
|
25
|
+
api_key_env: z.string().default('ELEVENLABS_API_KEY'),
|
|
26
|
+
/** ElevenLabs voice ID */
|
|
27
|
+
voice_id: z.string().default('21m00Tcm4TlvDq8ikWAM'),
|
|
28
|
+
/** ElevenLabs model ID */
|
|
29
|
+
model_id: z.string().default('eleven_multilingual_v2'),
|
|
30
|
+
/** Path to piper binary (local mode) */
|
|
31
|
+
piper_binary_path: z.string().optional(),
|
|
32
|
+
/** Piper voice model name */
|
|
33
|
+
piper_model: z.string().optional(),
|
|
34
|
+
}).default({}),
|
|
35
|
+
});
|
|
36
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB;;;;;GAKG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACxC,GAAG,EAAE,CAAC,CAAC,MAAM,CAAC;QACZ,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,YAAY,EAAE,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC;QACjE,4DAA4D;QAC5D,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,oBAAoB,CAAC;QACrD,8BAA8B;QAC9B,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;QAClC,yDAAyD;QACzD,wBAAwB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;QAClD,uDAAuD;QACvD,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC;QAC5C,8CAA8C;QAC9C,mBAAmB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAC3C,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;IAEd,GAAG,EAAE,CAAC,CAAC,MAAM,CAAC;QACZ,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC;QAC/D,4DAA4D;QAC5D,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,oBAAoB,CAAC;QACrD,0BAA0B;QAC1B,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,sBAAsB,CAAC;QACpD,0BAA0B;QAC1B,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,wBAAwB,CAAC;QACtD,wCAAwC;QACxC,iBAAiB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QACxC,6BAA6B;QAC7B,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KACnC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;CACf,CAAC,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @actrun_ai/tastekit-voice — Voice I/O layer for TasteKit onboarding
|
|
3
|
+
*
|
|
4
|
+
* Adds speech-to-text and text-to-speech to the interview,
|
|
5
|
+
* turning the text questionnaire into a natural voice conversation.
|
|
6
|
+
*
|
|
7
|
+
* Supported providers:
|
|
8
|
+
* - ElevenLabs (hosted, SOTA quality): Scribe v2 STT + streaming TTS
|
|
9
|
+
* - Whisper.cpp + Piper (local, offline): quality-first local option
|
|
10
|
+
*/
|
|
11
|
+
export type { STTProvider, TranscriptEvent } from './stt/provider.js';
|
|
12
|
+
export type { TTSProvider } from './tts/provider.js';
|
|
13
|
+
export { VoiceConfigSchema, type VoiceConfig } from './config.js';
|
|
14
|
+
export { VoiceIO } from './voice-io.js';
|
|
15
|
+
export { resolveVoiceProviders, createVoiceIO } from './resolve.js';
|
|
16
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,YAAY,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACtE,YAAY,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,iBAAiB,EAAE,KAAK,WAAW,EAAE,MAAM,aAAa,CAAC;AAClE,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @actrun_ai/tastekit-voice — Voice I/O layer for TasteKit onboarding
|
|
3
|
+
*
|
|
4
|
+
* Adds speech-to-text and text-to-speech to the interview,
|
|
5
|
+
* turning the text questionnaire into a natural voice conversation.
|
|
6
|
+
*
|
|
7
|
+
* Supported providers:
|
|
8
|
+
* - ElevenLabs (hosted, SOTA quality): Scribe v2 STT + streaming TTS
|
|
9
|
+
* - Whisper.cpp + Piper (local, offline): quality-first local option
|
|
10
|
+
*/
|
|
11
|
+
export { VoiceConfigSchema } from './config.js';
|
|
12
|
+
export { VoiceIO } from './voice-io.js';
|
|
13
|
+
export { resolveVoiceProviders, createVoiceIO } from './resolve.js';
|
|
14
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAIH,OAAO,EAAE,iBAAiB,EAAoB,MAAM,aAAa,CAAC;AAClE,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { STTProvider } from './stt/provider.js';
|
|
2
|
+
import type { TTSProvider } from './tts/provider.js';
|
|
3
|
+
import { VoiceIO, type VoiceIOOptions } from './voice-io.js';
|
|
4
|
+
/**
|
|
5
|
+
* Resolve STT and TTS providers from voice configuration.
|
|
6
|
+
*/
|
|
7
|
+
export declare function resolveVoiceProviders(rawConfig: unknown): Promise<{
|
|
8
|
+
stt: STTProvider;
|
|
9
|
+
tts: TTSProvider;
|
|
10
|
+
}>;
|
|
11
|
+
/**
|
|
12
|
+
* Create a fully-wired VoiceIO instance from raw config.
|
|
13
|
+
* This is the main entry point used by the CLI.
|
|
14
|
+
*/
|
|
15
|
+
export declare function createVoiceIO(rawConfig: unknown, options?: VoiceIOOptions): Promise<VoiceIO>;
|
|
16
|
+
//# sourceMappingURL=resolve.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"resolve.d.ts","sourceRoot":"","sources":["../src/resolve.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,OAAO,EAAE,KAAK,cAAc,EAAE,MAAM,eAAe,CAAC;AAI7D;;GAEG;AACH,wBAAsB,qBAAqB,CACzC,SAAS,EAAE,OAAO,GACjB,OAAO,CAAC;IAAE,GAAG,EAAE,WAAW,CAAC;IAAC,GAAG,EAAE,WAAW,CAAA;CAAE,CAAC,CAOjD;AAED;;;GAGG;AACH,wBAAsB,aAAa,CACjC,SAAS,EAAE,OAAO,EAClB,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,OAAO,CAAC,CAMlB"}
|
package/dist/resolve.js
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { VoiceConfigSchema } from './config.js';
|
|
2
|
+
import { VoiceIO } from './voice-io.js';
|
|
3
|
+
import { MicRecorder } from './audio/recorder.js';
|
|
4
|
+
import { AudioPlayer } from './audio/player.js';
|
|
5
|
+
/**
|
|
6
|
+
* Resolve STT and TTS providers from voice configuration.
|
|
7
|
+
*/
|
|
8
|
+
export async function resolveVoiceProviders(rawConfig) {
|
|
9
|
+
const config = VoiceConfigSchema.parse(rawConfig ?? {});
|
|
10
|
+
const stt = await resolveSTT(config);
|
|
11
|
+
const tts = await resolveTTS(config);
|
|
12
|
+
return { stt, tts };
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Create a fully-wired VoiceIO instance from raw config.
|
|
16
|
+
* This is the main entry point used by the CLI.
|
|
17
|
+
*/
|
|
18
|
+
export async function createVoiceIO(rawConfig, options) {
|
|
19
|
+
const { stt, tts } = await resolveVoiceProviders(rawConfig);
|
|
20
|
+
const recorder = new MicRecorder();
|
|
21
|
+
const player = new AudioPlayer();
|
|
22
|
+
return new VoiceIO(stt, tts, recorder, player, options);
|
|
23
|
+
}
|
|
24
|
+
async function resolveSTT(config) {
|
|
25
|
+
switch (config.stt.provider) {
|
|
26
|
+
case 'elevenlabs': {
|
|
27
|
+
const apiKey = process.env[config.stt.api_key_env];
|
|
28
|
+
if (!apiKey) {
|
|
29
|
+
throw new Error(`Missing environment variable: ${config.stt.api_key_env}\n` +
|
|
30
|
+
'Set your ElevenLabs API key to enable voice STT.');
|
|
31
|
+
}
|
|
32
|
+
const { ElevenLabsSTT } = await import('./stt/elevenlabs-stt.js');
|
|
33
|
+
return new ElevenLabsSTT({
|
|
34
|
+
apiKey,
|
|
35
|
+
language: config.stt.language,
|
|
36
|
+
vadSilenceThreshold: config.stt.vad_silence_threshold_ms / 1000,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
case 'whisper': {
|
|
40
|
+
const { WhisperSTT } = await import('./stt/whisper-stt.js');
|
|
41
|
+
return new WhisperSTT({
|
|
42
|
+
binaryPath: config.stt.whisper_binary_path,
|
|
43
|
+
model: config.stt.whisper_model,
|
|
44
|
+
language: config.stt.language,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
default:
|
|
48
|
+
throw new Error(`Unknown STT provider: ${config.stt.provider}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
async function resolveTTS(config) {
|
|
52
|
+
switch (config.tts.provider) {
|
|
53
|
+
case 'elevenlabs': {
|
|
54
|
+
const apiKey = process.env[config.tts.api_key_env];
|
|
55
|
+
if (!apiKey) {
|
|
56
|
+
throw new Error(`Missing environment variable: ${config.tts.api_key_env}\n` +
|
|
57
|
+
'Set your ElevenLabs API key to enable voice TTS.');
|
|
58
|
+
}
|
|
59
|
+
const { ElevenLabsTTS } = await import('./tts/elevenlabs-tts.js');
|
|
60
|
+
return new ElevenLabsTTS({
|
|
61
|
+
apiKey,
|
|
62
|
+
voiceId: config.tts.voice_id,
|
|
63
|
+
modelId: config.tts.model_id,
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
case 'piper': {
|
|
67
|
+
const { PiperTTS } = await import('./tts/piper-tts.js');
|
|
68
|
+
return new PiperTTS({
|
|
69
|
+
binaryPath: config.tts.piper_binary_path,
|
|
70
|
+
model: config.tts.piper_model,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
default:
|
|
74
|
+
throw new Error(`Unknown TTS provider: ${config.tts.provider}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
//# sourceMappingURL=resolve.js.map
|