opc-agent 4.1.6 → 4.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,20 @@
1
1
  "use strict";
2
+ /**
3
+ * Voice processing module — STT (Speech-to-Text) and TTS (Text-to-Speech)
4
+ *
5
+ * STT providers:
6
+ * 1. Edge STT (free, via edge-stt or Whisper local)
7
+ * 2. Volcano Engine / Doubao STT (best Chinese, ~¥0.01/req)
8
+ * 3. OpenAI Whisper API ($0.006/min)
9
+ * 4. Local Whisper (free, needs model download)
10
+ *
11
+ * TTS providers:
12
+ * 1. edge-tts (free, Microsoft voices, excellent Chinese)
13
+ * 2. Volcano Engine TTS (Doubao voices)
14
+ * 3. OpenAI TTS ($0.015/1K chars)
15
+ *
16
+ * Default: Whisper API for STT + edge-tts for TTS (best quality/cost ratio)
17
+ */
2
18
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
19
  if (k2 === undefined) k2 = k;
4
20
  var desc = Object.getOwnPropertyDescriptor(m, k);
@@ -33,385 +49,244 @@ var __importStar = (this && this.__importStar) || (function () {
33
49
  };
34
50
  })();
35
51
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.VoiceChannel = exports.ElevenLabsTTSProvider = exports.OpenAITTSProvider = exports.EdgeTTSProvider = exports.DeepgramSTTProvider = exports.WhisperSTTProvider = void 0;
37
- exports.createVoiceProviders = createVoiceProviders;
38
- const index_1 = require("./index");
39
- const logger_1 = require("../core/logger");
52
+ exports.VoiceProcessor = void 0;
53
+ exports.createVoiceProcessor = createVoiceProcessor;
54
+ const child_process_1 = require("child_process");
55
+ const fs = __importStar(require("fs"));
56
+ const path = __importStar(require("path"));
40
57
  const https = __importStar(require("https"));
41
- // ── Whisper STT Provider ────────────────────────────────────
42
- class WhisperSTTProvider {
43
- name = 'whisper';
44
- apiKey;
45
- constructor(apiKey) {
46
- this.apiKey = apiKey;
58
+ const http = __importStar(require("http"));
59
+ const DEFAULT_CONFIG = {
60
+ sttProvider: 'whisper-api',
61
+ ttsProvider: 'edge-tts',
62
+ ttsVoice: 'zh-CN-XiaoxiaoNeural',
63
+ ttsLang: 'zh-CN',
64
+ tempDir: '.opc/voice-tmp',
65
+ };
66
+ class VoiceProcessor {
67
+ config;
68
+ constructor(config) {
69
+ this.config = { ...DEFAULT_CONFIG, ...config };
70
+ const dir = this.config.tempDir || '.opc/voice-tmp';
71
+ if (!fs.existsSync(dir))
72
+ fs.mkdirSync(dir, { recursive: true });
73
+ }
74
+ /**
75
+ * Convert speech audio to text (STT)
76
+ * @param audioPath Path to audio file (.ogg, .mp3, .wav, .m4a)
77
+ * @returns Transcribed text
78
+ */
79
+ async speechToText(audioPath) {
80
+ switch (this.config.sttProvider) {
81
+ case 'whisper-api':
82
+ return this.whisperApiSTT(audioPath);
83
+ case 'whisper-local':
84
+ return this.whisperLocalSTT(audioPath);
85
+ case 'volcano':
86
+ return this.volcanoSTT(audioPath);
87
+ default:
88
+ throw new Error(`STT not configured. Set voice.sttProvider in config.`);
89
+ }
90
+ }
91
+ /**
92
+ * Convert text to speech audio (TTS)
93
+ * @param text Text to convert
94
+ * @returns Path to generated audio file (.mp3)
95
+ */
96
+ async textToSpeech(text) {
97
+ switch (this.config.ttsProvider) {
98
+ case 'edge-tts':
99
+ return this.edgeTTS(text);
100
+ case 'openai-tts':
101
+ return this.openaiTTS(text);
102
+ case 'volcano':
103
+ return this.volcanoTTS(text);
104
+ default:
105
+ throw new Error(`TTS not configured. Set voice.ttsProvider in config.`);
106
+ }
107
+ }
108
+ /** Check if voice processing is available */
109
+ isSTTAvailable() {
110
+ if (this.config.sttProvider === 'none')
111
+ return false;
112
+ if (this.config.sttProvider === 'whisper-api') {
113
+ return !!(this.config.openaiApiKey || process.env.OPENAI_API_KEY);
114
+ }
115
+ if (this.config.sttProvider === 'whisper-local') {
116
+ return this.checkOllamaWhisper();
117
+ }
118
+ return true;
47
119
  }
48
- async transcribe(audio, options) {
49
- const FormData = (await Promise.resolve(`${'form-data'}`).then(s => __importStar(require(s))).catch(() => null));
50
- // Build multipart form data manually
51
- const boundary = '----OPCBoundary' + Date.now().toString(36);
120
+ isTTSAvailable() {
121
+ if (this.config.ttsProvider === 'none')
122
+ return false;
123
+ if (this.config.ttsProvider === 'edge-tts')
124
+ return this.checkEdgeTTS();
125
+ return true;
126
+ }
127
+ // ─── STT Providers ───
128
+ async whisperApiSTT(audioPath) {
129
+ const apiKey = this.config.openaiApiKey || process.env.OPENAI_API_KEY;
130
+ const baseUrl = this.config.openaiBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1';
131
+ if (!apiKey)
132
+ throw new Error('OpenAI API key required for Whisper STT');
133
+ // Use multipart/form-data with fetch
134
+ const fileBuffer = fs.readFileSync(audioPath);
135
+ const fileName = path.basename(audioPath);
136
+ // Build multipart body manually
137
+ const boundary = '----OPCVoice' + Date.now();
52
138
  const parts = [];
53
- // file field
54
- parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="audio.wav"\r\nContent-Type: audio/wav\r\n\r\n`));
55
- parts.push(audio);
139
+ // file part
140
+ parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="${fileName}"\r\nContent-Type: audio/ogg\r\n\r\n`));
141
+ parts.push(fileBuffer);
56
142
  parts.push(Buffer.from('\r\n'));
57
- // model field
143
+ // model part
58
144
  parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="model"\r\n\r\nwhisper-1\r\n`));
59
- // language field
60
- if (options?.language) {
61
- parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="language"\r\n\r\n${options.language}\r\n`));
62
- }
145
+ // language part (optimize for Chinese)
146
+ parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="language"\r\n\r\nzh\r\n`));
63
147
  parts.push(Buffer.from(`--${boundary}--\r\n`));
64
148
  const body = Buffer.concat(parts);
65
- return new Promise((resolve, reject) => {
66
- const req = https.request({
67
- hostname: 'api.openai.com',
68
- path: '/v1/audio/transcriptions',
69
- method: 'POST',
70
- headers: {
71
- 'Authorization': `Bearer ${this.apiKey}`,
72
- 'Content-Type': `multipart/form-data; boundary=${boundary}`,
73
- 'Content-Length': body.length,
74
- },
75
- }, (res) => {
76
- const chunks = [];
77
- res.on('data', (c) => chunks.push(c));
78
- res.on('end', () => {
79
- try {
80
- const data = JSON.parse(Buffer.concat(chunks).toString());
81
- resolve(data.text ?? '');
82
- }
83
- catch (e) {
84
- reject(new Error('Failed to parse Whisper response'));
85
- }
86
- });
87
- });
88
- req.on('error', reject);
89
- req.write(body);
90
- req.end();
149
+ const url = `${baseUrl}/audio/transcriptions`;
150
+ const response = await fetch(url, {
151
+ method: 'POST',
152
+ headers: {
153
+ 'Authorization': `Bearer ${apiKey}`,
154
+ 'Content-Type': `multipart/form-data; boundary=${boundary}`,
155
+ },
156
+ body,
91
157
  });
158
+ if (!response.ok) {
159
+ const err = await response.text();
160
+ throw new Error(`Whisper API error (${response.status}): ${err}`);
161
+ }
162
+ const result = await response.json();
163
+ return result.text?.trim() || '';
92
164
  }
93
- }
94
- exports.WhisperSTTProvider = WhisperSTTProvider;
95
- // ── Deepgram STT Provider ───────────────────────────────────
96
- class DeepgramSTTProvider {
97
- name = 'deepgram';
98
- apiKey;
99
- constructor(apiKey) {
100
- this.apiKey = apiKey;
101
- }
102
- async transcribe(audio, options) {
103
- const lang = options?.language ?? 'en';
104
- return new Promise((resolve, reject) => {
105
- const req = https.request({
106
- hostname: 'api.deepgram.com',
107
- path: `/v1/listen?language=${lang}&model=nova-2`,
108
- method: 'POST',
109
- headers: {
110
- 'Authorization': `Token ${this.apiKey}`,
111
- 'Content-Type': 'audio/wav',
112
- 'Content-Length': audio.length,
113
- },
114
- }, (res) => {
115
- const chunks = [];
116
- res.on('data', (c) => chunks.push(c));
117
- res.on('end', () => {
118
- try {
119
- const data = JSON.parse(Buffer.concat(chunks).toString());
120
- const transcript = data?.results?.channels?.[0]?.alternatives?.[0]?.transcript ?? '';
121
- resolve(transcript);
122
- }
123
- catch {
124
- reject(new Error('Failed to parse Deepgram response'));
125
- }
126
- });
165
+ async whisperLocalSTT(audioPath) {
166
+ // Use Ollama's audio models or local whisper.cpp
167
+ const ollamaUrl = this.config.ollamaUrl || 'http://localhost:11434';
168
+ try {
169
+ // Try whisper via Ollama (if audio model available)
170
+ // Fallback: use whisper.cpp CLI
171
+ const result = (0, child_process_1.execSync)(`whisper "${audioPath}" --language zh --output_format txt`, {
172
+ encoding: 'utf-8',
173
+ timeout: 30000,
127
174
  });
128
- req.on('error', reject);
129
- req.write(audio);
130
- req.end();
131
- });
175
+ return result.trim();
176
+ }
177
+ catch {
178
+ throw new Error('Local Whisper not available. Install whisper.cpp or use whisper-api provider.');
179
+ }
132
180
  }
133
- }
134
- exports.DeepgramSTTProvider = DeepgramSTTProvider;
135
- // ── Edge TTS Provider (free, no API key) ────────────────────
136
- class EdgeTTSProvider {
137
- name = 'edge-tts';
138
- defaultVoice;
139
- constructor(voice) {
140
- this.defaultVoice = voice ?? 'en-US-AriaNeural';
181
+ async volcanoSTT(_audioPath) {
182
+ // Volcano Engine STT (豆包同源)
183
+ // TODO: Implement when credentials provided
184
+ throw new Error('Volcano STT not yet implemented. Use whisper-api for now.');
141
185
  }
142
- async synthesize(text, options) {
143
- const WebSocket = (await Promise.resolve(`${'ws'}`).then(s => __importStar(require(s))).catch(() => null))?.default;
144
- if (!WebSocket) {
145
- throw new Error('ws package required for Edge TTS. Install with: npm i ws');
146
- }
147
- const voice = options?.voice ?? this.defaultVoice;
148
- const requestId = [...Array(32)].map(() => Math.random().toString(16)[2]).join('');
149
- const timestamp = new Date().toISOString();
150
- const endpoint = `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4&ConnectionId=${requestId}`;
186
+ // ─── TTS Providers ───
187
+ async edgeTTS(text) {
188
+ const voice = this.config.ttsVoice || 'zh-CN-XiaoxiaoNeural';
189
+ const outPath = path.join(this.config.tempDir || '.opc/voice-tmp', `tts-${Date.now()}.mp3`);
190
+ // edge-tts is a Python package: pip install edge-tts
151
191
  return new Promise((resolve, reject) => {
152
- const ws = new WebSocket(endpoint, {
153
- headers: {
154
- 'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
155
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
156
- },
157
- });
158
- const audioChunks = [];
159
- let headerSent = false;
160
- ws.on('open', () => {
161
- // Send config
162
- ws.send(`Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"false"},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"}}}}`);
163
- // Send SSML
164
- const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='${voice}'>${escapeXml(text)}</voice></speak>`;
165
- ws.send(`X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:${timestamp}\r\nPath:ssml\r\n\r\n${ssml}`);
166
- });
167
- ws.on('message', (data) => {
168
- if (typeof data === 'string' || (Buffer.isBuffer(data) && data.toString().includes('Path:turn.end'))) {
169
- if (typeof data === 'string' && data.includes('Path:turn.end')) {
170
- ws.close();
171
- resolve(Buffer.concat(audioChunks));
172
- }
173
- }
174
- else if (Buffer.isBuffer(data)) {
175
- // Binary message — extract audio after header
176
- const headerEnd = data.indexOf(Buffer.from('\r\n\r\n'));
177
- if (headerEnd !== -1) {
178
- audioChunks.push(data.slice(headerEnd + 4));
179
- }
180
- }
181
- });
182
- ws.on('error', (err) => {
183
- reject(new Error(`Edge TTS WebSocket error: ${err.message}`));
184
- });
185
- ws.on('close', () => {
186
- if (audioChunks.length > 0) {
187
- resolve(Buffer.concat(audioChunks));
188
- }
189
- });
190
- // Timeout
191
- setTimeout(() => {
192
- ws.close();
193
- if (audioChunks.length > 0) {
194
- resolve(Buffer.concat(audioChunks));
192
+ const escaped = text.replace(/"/g, '\\"').replace(/\n/g, ' ');
193
+ (0, child_process_1.exec)(`edge-tts --voice "${voice}" --text "${escaped}" --write-media "${outPath}"`, {
194
+ timeout: 30000,
195
+ }, (err) => {
196
+ if (err) {
197
+ reject(new Error(`edge-tts failed: ${err.message}. Install with: pip install edge-tts`));
195
198
  }
196
199
  else {
197
- reject(new Error('Edge TTS timeout'));
200
+ resolve(outPath);
198
201
  }
199
- }, 30000);
200
- });
201
- }
202
- }
203
- exports.EdgeTTSProvider = EdgeTTSProvider;
204
- function escapeXml(text) {
205
- return text.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;').replace(/'/g, '&apos;');
206
- }
207
- // ── OpenAI TTS Provider ─────────────────────────────────────
208
- class OpenAITTSProvider {
209
- name = 'openai-tts';
210
- apiKey;
211
- defaultVoice;
212
- constructor(apiKey, voice) {
213
- this.apiKey = apiKey;
214
- this.defaultVoice = voice ?? 'alloy';
215
- }
216
- async synthesize(text, options) {
217
- const voice = options?.voice ?? this.defaultVoice;
218
- const body = JSON.stringify({
219
- model: 'tts-1',
220
- input: text,
221
- voice,
222
- speed: options?.speed ?? 1.0,
223
- });
224
- return new Promise((resolve, reject) => {
225
- const req = https.request({
226
- hostname: 'api.openai.com',
227
- path: '/v1/audio/speech',
228
- method: 'POST',
229
- headers: {
230
- 'Authorization': `Bearer ${this.apiKey}`,
231
- 'Content-Type': 'application/json',
232
- 'Content-Length': Buffer.byteLength(body),
233
- },
234
- }, (res) => {
235
- const chunks = [];
236
- res.on('data', (c) => chunks.push(c));
237
- res.on('end', () => resolve(Buffer.concat(chunks)));
238
- });
239
- req.on('error', reject);
240
- req.write(body);
241
- req.end();
242
- });
243
- }
244
- }
245
- exports.OpenAITTSProvider = OpenAITTSProvider;
246
- // ── ElevenLabs TTS Provider ─────────────────────────────────
247
- class ElevenLabsTTSProvider {
248
- name = 'elevenlabs';
249
- apiKey;
250
- defaultVoice;
251
- constructor(apiKey, voice) {
252
- this.apiKey = apiKey;
253
- this.defaultVoice = voice ?? '21m00Tcm4TlvDq8ikWAM'; // Rachel
254
- }
255
- async synthesize(text, options) {
256
- const voiceId = options?.voice ?? this.defaultVoice;
257
- const body = JSON.stringify({
258
- text,
259
- model_id: 'eleven_monolingual_v1',
260
- });
261
- return new Promise((resolve, reject) => {
262
- const req = https.request({
263
- hostname: 'api.elevenlabs.io',
264
- path: `/v1/text-to-speech/${voiceId}`,
265
- method: 'POST',
266
- headers: {
267
- 'xi-api-key': this.apiKey,
268
- 'Content-Type': 'application/json',
269
- 'Content-Length': Buffer.byteLength(body),
270
- },
271
- }, (res) => {
272
- const chunks = [];
273
- res.on('data', (c) => chunks.push(c));
274
- res.on('end', () => resolve(Buffer.concat(chunks)));
275
202
  });
276
- req.on('error', reject);
277
- req.write(body);
278
- req.end();
279
203
  });
280
204
  }
281
- }
282
- exports.ElevenLabsTTSProvider = ElevenLabsTTSProvider;
283
- // ── Voice Config Factory ────────────────────────────────────
284
- function createVoiceProviders(config) {
285
- let stt;
286
- let tts;
287
- switch (config.sttProvider) {
288
- case 'whisper':
289
- if (config.sttApiKey)
290
- stt = new WhisperSTTProvider(config.sttApiKey);
291
- break;
292
- case 'deepgram':
293
- if (config.sttApiKey)
294
- stt = new DeepgramSTTProvider(config.sttApiKey);
295
- break;
296
- case 'web-speech':
297
- // Browser only — not available in Node.js
298
- break;
299
- }
300
- switch (config.ttsProvider) {
301
- case 'edge-tts':
302
- tts = new EdgeTTSProvider(config.voice);
303
- break;
304
- case 'openai-tts':
305
- if (config.ttsApiKey)
306
- tts = new OpenAITTSProvider(config.ttsApiKey, config.voice);
307
- break;
308
- case 'elevenlabs':
309
- if (config.ttsApiKey)
310
- tts = new ElevenLabsTTSProvider(config.ttsApiKey, config.voice);
311
- break;
312
- }
313
- return { stt, tts };
314
- }
315
- // ── Voice Channel ───────────────────────────────────────────
316
- class VoiceChannel extends index_1.BaseChannel {
317
- type = 'voice';
318
- config;
319
- logger = new logger_1.Logger('voice-channel');
320
- running = false;
321
- conversationActive = false;
322
- constructor(config) {
323
- super();
324
- this.config = config ?? {};
325
- }
326
- async start() {
327
- this.running = true;
328
- this.logger.info('Voice channel started', {
329
- stt: this.config.sttProvider?.name ?? 'none',
330
- tts: this.config.ttsProvider?.name ?? 'none',
205
+ async openaiTTS(text) {
206
+ const apiKey = this.config.openaiApiKey || process.env.OPENAI_API_KEY;
207
+ const baseUrl = this.config.openaiBaseUrl || process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1';
208
+ if (!apiKey)
209
+ throw new Error('OpenAI API key required for TTS');
210
+ const outPath = path.join(this.config.tempDir || '.opc/voice-tmp', `tts-${Date.now()}.mp3`);
211
+ const response = await fetch(`${baseUrl}/audio/speech`, {
212
+ method: 'POST',
213
+ headers: {
214
+ 'Authorization': `Bearer ${apiKey}`,
215
+ 'Content-Type': 'application/json',
216
+ },
217
+ body: JSON.stringify({
218
+ model: 'tts-1',
219
+ voice: 'nova',
220
+ input: text,
221
+ }),
331
222
  });
223
+ if (!response.ok)
224
+ throw new Error(`OpenAI TTS error: ${response.status}`);
225
+ const buffer = Buffer.from(await response.arrayBuffer());
226
+ fs.writeFileSync(outPath, buffer);
227
+ return outPath;
332
228
  }
333
- async stop() {
334
- this.running = false;
335
- this.conversationActive = false;
336
- this.logger.info('Voice channel stopped');
337
- }
338
- isRunning() {
339
- return this.running;
229
+ async volcanoTTS(_text) {
230
+ throw new Error('Volcano TTS not yet implemented.');
340
231
  }
341
- /** Transcribe audio to text */
342
- async transcribe(audio, format) {
343
- if (!this.config.sttProvider) {
344
- throw new Error('No STT provider configured');
232
+ // ─── Helpers ───
233
+ checkEdgeTTS() {
234
+ try {
235
+ (0, child_process_1.execSync)('edge-tts --version', { stdio: 'pipe', timeout: 5000 });
236
+ return true;
345
237
  }
346
- return this.config.sttProvider.transcribe(audio, { language: this.config.language });
347
- }
348
- /** Synthesize text to audio */
349
- async synthesize(text, voice) {
350
- if (!this.config.ttsProvider) {
351
- throw new Error('No TTS provider configured');
238
+ catch {
239
+ return false;
352
240
  }
353
- return this.config.ttsProvider.synthesize(text, { voice });
354
241
  }
355
- /** Start real-time conversation mode */
356
- async startConversation(onMessage) {
357
- if (!this.running)
358
- await this.start();
359
- this.conversationActive = true;
360
- this.logger.info('Conversation mode started');
361
- // In a real implementation, this would set up a microphone stream.
362
- // For now, expose the conversation loop for programmatic use.
363
- }
364
- /** Process a single turn in conversation mode */
365
- async processConversationTurn(audio, onMessage) {
366
- const text = await this.transcribe(audio);
367
- const response = await onMessage(text);
368
- let audioResponse;
369
- if (this.config.ttsProvider) {
370
- audioResponse = await this.synthesize(response);
371
- }
372
- return { text, response, audioResponse };
373
- }
374
- stopConversation() {
375
- this.conversationActive = false;
376
- }
377
- isConversationActive() {
378
- return this.conversationActive;
379
- }
380
- /** Process audio input: STT → Agent → TTS */
381
- async processAudio(audio) {
382
- if (!this.handler)
383
- throw new Error('No message handler set');
384
- // STT
385
- let text;
386
- if (this.config.sttProvider) {
387
- text = await this.config.sttProvider.transcribe(audio, { language: this.config.language });
388
- }
389
- else {
390
- text = audio.toString('utf-8'); // Fallback: treat as text
242
+ checkOllamaWhisper() {
243
+ try {
244
+ (0, child_process_1.execSync)('whisper --help', { stdio: 'pipe', timeout: 5000 });
245
+ return true;
391
246
  }
392
- this.logger.debug('STT result', { text });
393
- // Create message and send to agent
394
- const message = {
395
- id: `voice_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
396
- role: 'user',
397
- content: text,
398
- timestamp: Date.now(),
399
- metadata: { channel: 'voice' },
400
- };
401
- const response = await this.handler(message);
402
- // TTS
403
- let audioResponse;
404
- if (this.config.ttsProvider) {
405
- audioResponse = await this.config.ttsProvider.synthesize(response.content, { language: this.config.language });
247
+ catch {
248
+ return false;
406
249
  }
407
- return { text, response: response.content, audioResponse };
408
250
  }
409
- setSTTProvider(provider) {
410
- this.config.sttProvider = provider;
251
+ /** Download a file from URL to local path */
252
+ async downloadFile(url, destPath) {
253
+ return new Promise((resolve, reject) => {
254
+ const client = url.startsWith('https') ? https : http;
255
+ const req = client.get(url, (res) => {
256
+ if (res.statusCode === 301 || res.statusCode === 302) {
257
+ // Follow redirect
258
+ this.downloadFile(res.headers.location, destPath).then(resolve).catch(reject);
259
+ return;
260
+ }
261
+ const ws = fs.createWriteStream(destPath);
262
+ res.pipe(ws);
263
+ ws.on('finish', () => { ws.close(); resolve(); });
264
+ ws.on('error', reject);
265
+ });
266
+ req.on('error', reject);
267
+ req.setTimeout(30000, () => { req.destroy(); reject(new Error('Download timeout')); });
268
+ });
411
269
  }
412
- setTTSProvider(provider) {
413
- this.config.ttsProvider = provider;
270
+ /** Cleanup temp files */
271
+ cleanup() {
272
+ const dir = this.config.tempDir || '.opc/voice-tmp';
273
+ try {
274
+ const files = fs.readdirSync(dir);
275
+ const now = Date.now();
276
+ for (const f of files) {
277
+ const fp = path.join(dir, f);
278
+ const stat = fs.statSync(fp);
279
+ // Remove files older than 1 hour
280
+ if (now - stat.mtimeMs > 3600000) {
281
+ fs.unlinkSync(fp);
282
+ }
283
+ }
284
+ }
285
+ catch { /* ignore */ }
414
286
  }
415
287
  }
416
- exports.VoiceChannel = VoiceChannel;
288
+ exports.VoiceProcessor = VoiceProcessor;
289
+ function createVoiceProcessor(config) {
290
+ return new VoiceProcessor(config);
291
+ }
417
292
  //# sourceMappingURL=voice.js.map