opc-agent 2.0.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +603 -545
- package/dist/channels/voice.d.ts +59 -0
- package/dist/channels/voice.js +351 -1
- package/dist/cli.js +284 -5
- package/dist/core/agent.d.ts +9 -0
- package/dist/core/agent.js +49 -0
- package/dist/core/collaboration.d.ts +89 -0
- package/dist/core/collaboration.js +201 -0
- package/dist/deploy/index.d.ts +40 -0
- package/dist/deploy/index.js +261 -0
- package/dist/index.d.ts +7 -1
- package/dist/index.js +47 -3
- package/dist/mcp/servers/calculator-mcp.d.ts +3 -0
- package/dist/mcp/servers/calculator-mcp.js +65 -0
- package/dist/mcp/servers/crypto-mcp.d.ts +3 -0
- package/dist/mcp/servers/crypto-mcp.js +108 -0
- package/dist/mcp/servers/database-mcp.d.ts +3 -0
- package/dist/mcp/servers/database-mcp.js +73 -0
- package/dist/mcp/servers/datetime-mcp.d.ts +3 -0
- package/dist/mcp/servers/datetime-mcp.js +71 -0
- package/dist/mcp/servers/filesystem.d.ts +3 -0
- package/dist/mcp/servers/filesystem.js +101 -0
- package/dist/mcp/servers/github-mcp.d.ts +3 -0
- package/dist/mcp/servers/github-mcp.js +60 -0
- package/dist/mcp/servers/index.d.ts +21 -0
- package/dist/mcp/servers/index.js +50 -0
- package/dist/mcp/servers/json-mcp.d.ts +3 -0
- package/dist/mcp/servers/json-mcp.js +126 -0
- package/dist/mcp/servers/memory-mcp.d.ts +3 -0
- package/dist/mcp/servers/memory-mcp.js +60 -0
- package/dist/mcp/servers/regex-mcp.d.ts +3 -0
- package/dist/mcp/servers/regex-mcp.js +56 -0
- package/dist/mcp/servers/web-mcp.d.ts +3 -0
- package/dist/mcp/servers/web-mcp.js +51 -0
- package/dist/memory/index.d.ts +2 -0
- package/dist/memory/index.js +4 -1
- package/dist/memory/seed-loader.d.ts +51 -0
- package/dist/memory/seed-loader.js +200 -0
- package/dist/schema/oad.d.ts +292 -12
- package/dist/schema/oad.js +12 -1
- package/dist/security/guardrails.d.ts +50 -0
- package/dist/security/guardrails.js +197 -0
- package/dist/studio/server.d.ts +31 -1
- package/dist/studio/server.js +154 -3
- package/dist/studio-ui/index.html +1278 -662
- package/dist/tools/integrations/calendar.d.ts +3 -0
- package/dist/tools/integrations/calendar.js +73 -0
- package/dist/tools/integrations/code-exec.d.ts +3 -0
- package/dist/tools/integrations/code-exec.js +42 -0
- package/dist/tools/integrations/csv-analyzer.d.ts +3 -0
- package/dist/tools/integrations/csv-analyzer.js +142 -0
- package/dist/tools/integrations/database.d.ts +3 -0
- package/dist/tools/integrations/database.js +44 -0
- package/dist/tools/integrations/email-send.d.ts +3 -0
- package/dist/tools/integrations/email-send.js +104 -0
- package/dist/tools/integrations/git-tool.d.ts +3 -0
- package/dist/tools/integrations/git-tool.js +49 -0
- package/dist/tools/integrations/github-tool.d.ts +3 -0
- package/dist/tools/integrations/github-tool.js +77 -0
- package/dist/tools/integrations/image-gen.d.ts +3 -0
- package/dist/tools/integrations/image-gen.js +58 -0
- package/dist/tools/integrations/index.d.ts +30 -0
- package/dist/tools/integrations/index.js +107 -0
- package/dist/tools/integrations/jira.d.ts +3 -0
- package/dist/tools/integrations/jira.js +85 -0
- package/dist/tools/integrations/notion.d.ts +3 -0
- package/dist/tools/integrations/notion.js +71 -0
- package/dist/tools/integrations/npm-tool.d.ts +3 -0
- package/dist/tools/integrations/npm-tool.js +49 -0
- package/dist/tools/integrations/pdf-reader.d.ts +3 -0
- package/dist/tools/integrations/pdf-reader.js +91 -0
- package/dist/tools/integrations/slack.d.ts +3 -0
- package/dist/tools/integrations/slack.js +67 -0
- package/dist/tools/integrations/summarizer.d.ts +3 -0
- package/dist/tools/integrations/summarizer.js +49 -0
- package/dist/tools/integrations/translator.d.ts +3 -0
- package/dist/tools/integrations/translator.js +48 -0
- package/dist/tools/integrations/trello.d.ts +3 -0
- package/dist/tools/integrations/trello.js +60 -0
- package/dist/tools/integrations/vector-search.d.ts +3 -0
- package/dist/tools/integrations/vector-search.js +44 -0
- package/dist/tools/integrations/web-scraper.d.ts +3 -0
- package/dist/tools/integrations/web-scraper.js +48 -0
- package/dist/tools/integrations/web-search.d.ts +3 -0
- package/dist/tools/integrations/web-search.js +60 -0
- package/dist/tools/integrations/webhook.d.ts +3 -0
- package/dist/tools/integrations/webhook.js +39 -0
- package/dist/ui/components.d.ts +10 -0
- package/dist/ui/components.js +123 -0
- package/package.json +1 -1
- package/src/channels/voice.ts +365 -0
- package/src/cli.ts +294 -6
- package/src/core/agent.ts +56 -0
- package/src/core/collaboration.ts +275 -0
- package/src/deploy/index.ts +255 -0
- package/src/index.ts +21 -1
- package/src/mcp/servers/calculator-mcp.ts +65 -0
- package/src/mcp/servers/crypto-mcp.ts +73 -0
- package/src/mcp/servers/database-mcp.ts +72 -0
- package/src/mcp/servers/datetime-mcp.ts +69 -0
- package/src/mcp/servers/filesystem.ts +66 -0
- package/src/mcp/servers/github-mcp.ts +58 -0
- package/src/mcp/servers/index.ts +63 -0
- package/src/mcp/servers/json-mcp.ts +102 -0
- package/src/mcp/servers/memory-mcp.ts +56 -0
- package/src/mcp/servers/regex-mcp.ts +53 -0
- package/src/mcp/servers/web-mcp.ts +49 -0
- package/src/memory/index.ts +3 -0
- package/src/memory/seed-loader.ts +212 -0
- package/src/schema/oad.ts +13 -0
- package/src/security/guardrails.ts +248 -0
- package/src/studio/server.ts +166 -4
- package/src/studio-ui/index.html +1278 -662
- package/src/tools/integrations/calendar.ts +73 -0
- package/src/tools/integrations/code-exec.ts +39 -0
- package/src/tools/integrations/csv-analyzer.ts +92 -0
- package/src/tools/integrations/database.ts +44 -0
- package/src/tools/integrations/email-send.ts +76 -0
- package/src/tools/integrations/git-tool.ts +42 -0
- package/src/tools/integrations/github-tool.ts +76 -0
- package/src/tools/integrations/image-gen.ts +56 -0
- package/src/tools/integrations/index.ts +92 -0
- package/src/tools/integrations/jira.ts +83 -0
- package/src/tools/integrations/notion.ts +71 -0
- package/src/tools/integrations/npm-tool.ts +48 -0
- package/src/tools/integrations/pdf-reader.ts +58 -0
- package/src/tools/integrations/slack.ts +65 -0
- package/src/tools/integrations/summarizer.ts +49 -0
- package/src/tools/integrations/translator.ts +48 -0
- package/src/tools/integrations/trello.ts +60 -0
- package/src/tools/integrations/vector-search.ts +42 -0
- package/src/tools/integrations/web-scraper.ts +47 -0
- package/src/tools/integrations/web-search.ts +58 -0
- package/src/tools/integrations/webhook.ts +38 -0
- package/src/ui/components.ts +127 -0
- package/tests/brain-seed-extended.test.ts +490 -0
- package/tests/brain-seed.test.ts +239 -0
- package/tests/collaboration.test.ts +319 -0
- package/tests/deploy-and-dag.test.ts +196 -0
- package/tests/guardrails.test.ts +177 -0
- package/tests/integrations.test.ts +249 -0
- package/tests/mcp-servers.test.ts +260 -0
- package/tests/voice-enhanced.test.ts +169 -0
- package/dist/dtv/data.d.ts +0 -18
- package/dist/dtv/data.js +0 -25
- package/dist/dtv/trust.d.ts +0 -19
- package/dist/dtv/trust.js +0 -40
- package/dist/dtv/value.d.ts +0 -23
- package/dist/dtv/value.js +0 -38
- package/dist/marketplace/index.d.ts +0 -34
- package/dist/marketplace/index.js +0 -202
package/src/channels/voice.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { BaseChannel } from './index';
|
|
2
2
|
import type { Message } from '../core/types';
|
|
3
3
|
import { Logger } from '../core/logger';
|
|
4
|
+
import * as https from 'https';
|
|
4
5
|
|
|
5
6
|
// ── Voice Channel Types ─────────────────────────────────────
|
|
6
7
|
|
|
@@ -32,6 +33,320 @@ export interface VoiceChannelConfig {
|
|
|
32
33
|
language?: string;
|
|
33
34
|
}
|
|
34
35
|
|
|
36
|
+
export interface VoiceConfig {
|
|
37
|
+
sttProvider: 'whisper' | 'web-speech' | 'deepgram';
|
|
38
|
+
ttsProvider: 'edge-tts' | 'openai-tts' | 'elevenlabs';
|
|
39
|
+
sttApiKey?: string;
|
|
40
|
+
ttsApiKey?: string;
|
|
41
|
+
voice?: string;
|
|
42
|
+
language?: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── Whisper STT Provider ────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
export class WhisperSTTProvider implements STTProvider {
|
|
48
|
+
name = 'whisper';
|
|
49
|
+
private apiKey: string;
|
|
50
|
+
|
|
51
|
+
constructor(apiKey: string) {
|
|
52
|
+
this.apiKey = apiKey;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async transcribe(audio: Buffer, options?: STTOptions): Promise<string> {
|
|
56
|
+
const FormData = (await import('form-data' as string).catch(() => null));
|
|
57
|
+
|
|
58
|
+
// Build multipart form data manually
|
|
59
|
+
const boundary = '----OPCBoundary' + Date.now().toString(36);
|
|
60
|
+
const parts: Buffer[] = [];
|
|
61
|
+
|
|
62
|
+
// file field
|
|
63
|
+
parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="audio.wav"\r\nContent-Type: audio/wav\r\n\r\n`));
|
|
64
|
+
parts.push(audio);
|
|
65
|
+
parts.push(Buffer.from('\r\n'));
|
|
66
|
+
|
|
67
|
+
// model field
|
|
68
|
+
parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="model"\r\n\r\nwhisper-1\r\n`));
|
|
69
|
+
|
|
70
|
+
// language field
|
|
71
|
+
if (options?.language) {
|
|
72
|
+
parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="language"\r\n\r\n${options.language}\r\n`));
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
parts.push(Buffer.from(`--${boundary}--\r\n`));
|
|
76
|
+
const body = Buffer.concat(parts);
|
|
77
|
+
|
|
78
|
+
return new Promise((resolve, reject) => {
|
|
79
|
+
const req = https.request({
|
|
80
|
+
hostname: 'api.openai.com',
|
|
81
|
+
path: '/v1/audio/transcriptions',
|
|
82
|
+
method: 'POST',
|
|
83
|
+
headers: {
|
|
84
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
85
|
+
'Content-Type': `multipart/form-data; boundary=${boundary}`,
|
|
86
|
+
'Content-Length': body.length,
|
|
87
|
+
},
|
|
88
|
+
}, (res) => {
|
|
89
|
+
const chunks: Buffer[] = [];
|
|
90
|
+
res.on('data', (c: Buffer) => chunks.push(c));
|
|
91
|
+
res.on('end', () => {
|
|
92
|
+
try {
|
|
93
|
+
const data = JSON.parse(Buffer.concat(chunks).toString());
|
|
94
|
+
resolve(data.text ?? '');
|
|
95
|
+
} catch (e) {
|
|
96
|
+
reject(new Error('Failed to parse Whisper response'));
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
req.on('error', reject);
|
|
101
|
+
req.write(body);
|
|
102
|
+
req.end();
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ── Deepgram STT Provider ───────────────────────────────────
|
|
108
|
+
|
|
109
|
+
export class DeepgramSTTProvider implements STTProvider {
|
|
110
|
+
name = 'deepgram';
|
|
111
|
+
private apiKey: string;
|
|
112
|
+
|
|
113
|
+
constructor(apiKey: string) {
|
|
114
|
+
this.apiKey = apiKey;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
async transcribe(audio: Buffer, options?: STTOptions): Promise<string> {
|
|
118
|
+
const lang = options?.language ?? 'en';
|
|
119
|
+
return new Promise((resolve, reject) => {
|
|
120
|
+
const req = https.request({
|
|
121
|
+
hostname: 'api.deepgram.com',
|
|
122
|
+
path: `/v1/listen?language=${lang}&model=nova-2`,
|
|
123
|
+
method: 'POST',
|
|
124
|
+
headers: {
|
|
125
|
+
'Authorization': `Token ${this.apiKey}`,
|
|
126
|
+
'Content-Type': 'audio/wav',
|
|
127
|
+
'Content-Length': audio.length,
|
|
128
|
+
},
|
|
129
|
+
}, (res) => {
|
|
130
|
+
const chunks: Buffer[] = [];
|
|
131
|
+
res.on('data', (c: Buffer) => chunks.push(c));
|
|
132
|
+
res.on('end', () => {
|
|
133
|
+
try {
|
|
134
|
+
const data = JSON.parse(Buffer.concat(chunks).toString());
|
|
135
|
+
const transcript = data?.results?.channels?.[0]?.alternatives?.[0]?.transcript ?? '';
|
|
136
|
+
resolve(transcript);
|
|
137
|
+
} catch {
|
|
138
|
+
reject(new Error('Failed to parse Deepgram response'));
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
req.on('error', reject);
|
|
143
|
+
req.write(audio);
|
|
144
|
+
req.end();
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// ── Edge TTS Provider (free, no API key) ────────────────────
|
|
150
|
+
|
|
151
|
+
export class EdgeTTSProvider implements TTSProvider {
|
|
152
|
+
name = 'edge-tts';
|
|
153
|
+
private defaultVoice: string;
|
|
154
|
+
|
|
155
|
+
constructor(voice?: string) {
|
|
156
|
+
this.defaultVoice = voice ?? 'en-US-AriaNeural';
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
async synthesize(text: string, options?: TTSOptions): Promise<Buffer> {
|
|
160
|
+
const WebSocket = (await import('ws' as string).catch(() => null))?.default;
|
|
161
|
+
if (!WebSocket) {
|
|
162
|
+
throw new Error('ws package required for Edge TTS. Install with: npm i ws');
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const voice = options?.voice ?? this.defaultVoice;
|
|
166
|
+
const requestId = [...Array(32)].map(() => Math.random().toString(16)[2]).join('');
|
|
167
|
+
const timestamp = new Date().toISOString();
|
|
168
|
+
|
|
169
|
+
const endpoint = `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4&ConnectionId=${requestId}`;
|
|
170
|
+
|
|
171
|
+
return new Promise((resolve, reject) => {
|
|
172
|
+
const ws = new WebSocket(endpoint, {
|
|
173
|
+
headers: {
|
|
174
|
+
'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
|
|
175
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
176
|
+
},
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
const audioChunks: Buffer[] = [];
|
|
180
|
+
let headerSent = false;
|
|
181
|
+
|
|
182
|
+
ws.on('open', () => {
|
|
183
|
+
// Send config
|
|
184
|
+
ws.send(`Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"false"},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"}}}}`);
|
|
185
|
+
|
|
186
|
+
// Send SSML
|
|
187
|
+
const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='${voice}'>${escapeXml(text)}</voice></speak>`;
|
|
188
|
+
ws.send(`X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:${timestamp}\r\nPath:ssml\r\n\r\n${ssml}`);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
ws.on('message', (data: Buffer | string) => {
|
|
192
|
+
if (typeof data === 'string' || (Buffer.isBuffer(data) && data.toString().includes('Path:turn.end'))) {
|
|
193
|
+
if (typeof data === 'string' && data.includes('Path:turn.end')) {
|
|
194
|
+
ws.close();
|
|
195
|
+
resolve(Buffer.concat(audioChunks));
|
|
196
|
+
}
|
|
197
|
+
} else if (Buffer.isBuffer(data)) {
|
|
198
|
+
// Binary message — extract audio after header
|
|
199
|
+
const headerEnd = data.indexOf(Buffer.from('\r\n\r\n'));
|
|
200
|
+
if (headerEnd !== -1) {
|
|
201
|
+
audioChunks.push(data.slice(headerEnd + 4));
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
ws.on('error', (err: Error) => {
|
|
207
|
+
reject(new Error(`Edge TTS WebSocket error: ${err.message}`));
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
ws.on('close', () => {
|
|
211
|
+
if (audioChunks.length > 0) {
|
|
212
|
+
resolve(Buffer.concat(audioChunks));
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// Timeout
|
|
217
|
+
setTimeout(() => {
|
|
218
|
+
ws.close();
|
|
219
|
+
if (audioChunks.length > 0) {
|
|
220
|
+
resolve(Buffer.concat(audioChunks));
|
|
221
|
+
} else {
|
|
222
|
+
reject(new Error('Edge TTS timeout'));
|
|
223
|
+
}
|
|
224
|
+
}, 30000);
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function escapeXml(text: string): string {
|
|
230
|
+
return text.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, ''');
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// ── OpenAI TTS Provider ─────────────────────────────────────
|
|
234
|
+
|
|
235
|
+
export class OpenAITTSProvider implements TTSProvider {
|
|
236
|
+
name = 'openai-tts';
|
|
237
|
+
private apiKey: string;
|
|
238
|
+
private defaultVoice: string;
|
|
239
|
+
|
|
240
|
+
constructor(apiKey: string, voice?: string) {
|
|
241
|
+
this.apiKey = apiKey;
|
|
242
|
+
this.defaultVoice = voice ?? 'alloy';
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
async synthesize(text: string, options?: TTSOptions): Promise<Buffer> {
|
|
246
|
+
const voice = options?.voice ?? this.defaultVoice;
|
|
247
|
+
const body = JSON.stringify({
|
|
248
|
+
model: 'tts-1',
|
|
249
|
+
input: text,
|
|
250
|
+
voice,
|
|
251
|
+
speed: options?.speed ?? 1.0,
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
return new Promise((resolve, reject) => {
|
|
255
|
+
const req = https.request({
|
|
256
|
+
hostname: 'api.openai.com',
|
|
257
|
+
path: '/v1/audio/speech',
|
|
258
|
+
method: 'POST',
|
|
259
|
+
headers: {
|
|
260
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
261
|
+
'Content-Type': 'application/json',
|
|
262
|
+
'Content-Length': Buffer.byteLength(body),
|
|
263
|
+
},
|
|
264
|
+
}, (res) => {
|
|
265
|
+
const chunks: Buffer[] = [];
|
|
266
|
+
res.on('data', (c: Buffer) => chunks.push(c));
|
|
267
|
+
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
268
|
+
});
|
|
269
|
+
req.on('error', reject);
|
|
270
|
+
req.write(body);
|
|
271
|
+
req.end();
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// ── ElevenLabs TTS Provider ─────────────────────────────────
|
|
277
|
+
|
|
278
|
+
export class ElevenLabsTTSProvider implements TTSProvider {
|
|
279
|
+
name = 'elevenlabs';
|
|
280
|
+
private apiKey: string;
|
|
281
|
+
private defaultVoice: string;
|
|
282
|
+
|
|
283
|
+
constructor(apiKey: string, voice?: string) {
|
|
284
|
+
this.apiKey = apiKey;
|
|
285
|
+
this.defaultVoice = voice ?? '21m00Tcm4TlvDq8ikWAM'; // Rachel
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
async synthesize(text: string, options?: TTSOptions): Promise<Buffer> {
|
|
289
|
+
const voiceId = options?.voice ?? this.defaultVoice;
|
|
290
|
+
const body = JSON.stringify({
|
|
291
|
+
text,
|
|
292
|
+
model_id: 'eleven_monolingual_v1',
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
return new Promise((resolve, reject) => {
|
|
296
|
+
const req = https.request({
|
|
297
|
+
hostname: 'api.elevenlabs.io',
|
|
298
|
+
path: `/v1/text-to-speech/${voiceId}`,
|
|
299
|
+
method: 'POST',
|
|
300
|
+
headers: {
|
|
301
|
+
'xi-api-key': this.apiKey,
|
|
302
|
+
'Content-Type': 'application/json',
|
|
303
|
+
'Content-Length': Buffer.byteLength(body),
|
|
304
|
+
},
|
|
305
|
+
}, (res) => {
|
|
306
|
+
const chunks: Buffer[] = [];
|
|
307
|
+
res.on('data', (c: Buffer) => chunks.push(c));
|
|
308
|
+
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
309
|
+
});
|
|
310
|
+
req.on('error', reject);
|
|
311
|
+
req.write(body);
|
|
312
|
+
req.end();
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// ── Voice Config Factory ────────────────────────────────────
|
|
318
|
+
|
|
319
|
+
export function createVoiceProviders(config: VoiceConfig): { stt?: STTProvider; tts?: TTSProvider } {
|
|
320
|
+
let stt: STTProvider | undefined;
|
|
321
|
+
let tts: TTSProvider | undefined;
|
|
322
|
+
|
|
323
|
+
switch (config.sttProvider) {
|
|
324
|
+
case 'whisper':
|
|
325
|
+
if (config.sttApiKey) stt = new WhisperSTTProvider(config.sttApiKey);
|
|
326
|
+
break;
|
|
327
|
+
case 'deepgram':
|
|
328
|
+
if (config.sttApiKey) stt = new DeepgramSTTProvider(config.sttApiKey);
|
|
329
|
+
break;
|
|
330
|
+
case 'web-speech':
|
|
331
|
+
// Browser only — not available in Node.js
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
switch (config.ttsProvider) {
|
|
336
|
+
case 'edge-tts':
|
|
337
|
+
tts = new EdgeTTSProvider(config.voice);
|
|
338
|
+
break;
|
|
339
|
+
case 'openai-tts':
|
|
340
|
+
if (config.ttsApiKey) tts = new OpenAITTSProvider(config.ttsApiKey, config.voice);
|
|
341
|
+
break;
|
|
342
|
+
case 'elevenlabs':
|
|
343
|
+
if (config.ttsApiKey) tts = new ElevenLabsTTSProvider(config.ttsApiKey, config.voice);
|
|
344
|
+
break;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return { stt, tts };
|
|
348
|
+
}
|
|
349
|
+
|
|
35
350
|
// ── Voice Channel ───────────────────────────────────────────
|
|
36
351
|
|
|
37
352
|
export class VoiceChannel extends BaseChannel {
|
|
@@ -39,6 +354,7 @@ export class VoiceChannel extends BaseChannel {
|
|
|
39
354
|
private config: VoiceChannelConfig;
|
|
40
355
|
private logger = new Logger('voice-channel');
|
|
41
356
|
private running = false;
|
|
357
|
+
private conversationActive = false;
|
|
42
358
|
|
|
43
359
|
constructor(config?: VoiceChannelConfig) {
|
|
44
360
|
super();
|
|
@@ -55,6 +371,7 @@ export class VoiceChannel extends BaseChannel {
|
|
|
55
371
|
|
|
56
372
|
async stop(): Promise<void> {
|
|
57
373
|
this.running = false;
|
|
374
|
+
this.conversationActive = false;
|
|
58
375
|
this.logger.info('Voice channel stopped');
|
|
59
376
|
}
|
|
60
377
|
|
|
@@ -62,6 +379,54 @@ export class VoiceChannel extends BaseChannel {
|
|
|
62
379
|
return this.running;
|
|
63
380
|
}
|
|
64
381
|
|
|
382
|
+
/** Transcribe audio to text */
|
|
383
|
+
async transcribe(audio: Buffer, format?: string): Promise<string> {
|
|
384
|
+
if (!this.config.sttProvider) {
|
|
385
|
+
throw new Error('No STT provider configured');
|
|
386
|
+
}
|
|
387
|
+
return this.config.sttProvider.transcribe(audio, { language: this.config.language });
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/** Synthesize text to audio */
|
|
391
|
+
async synthesize(text: string, voice?: string): Promise<Buffer> {
|
|
392
|
+
if (!this.config.ttsProvider) {
|
|
393
|
+
throw new Error('No TTS provider configured');
|
|
394
|
+
}
|
|
395
|
+
return this.config.ttsProvider.synthesize(text, { voice });
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/** Start real-time conversation mode */
|
|
399
|
+
async startConversation(onMessage: (text: string) => Promise<string>): Promise<void> {
|
|
400
|
+
if (!this.running) await this.start();
|
|
401
|
+
this.conversationActive = true;
|
|
402
|
+
this.logger.info('Conversation mode started');
|
|
403
|
+
|
|
404
|
+
// In a real implementation, this would set up a microphone stream.
|
|
405
|
+
// For now, expose the conversation loop for programmatic use.
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/** Process a single turn in conversation mode */
|
|
409
|
+
async processConversationTurn(
|
|
410
|
+
audio: Buffer,
|
|
411
|
+
onMessage: (text: string) => Promise<string>,
|
|
412
|
+
): Promise<{ text: string; response: string; audioResponse?: Buffer }> {
|
|
413
|
+
const text = await this.transcribe(audio);
|
|
414
|
+
const response = await onMessage(text);
|
|
415
|
+
let audioResponse: Buffer | undefined;
|
|
416
|
+
if (this.config.ttsProvider) {
|
|
417
|
+
audioResponse = await this.synthesize(response);
|
|
418
|
+
}
|
|
419
|
+
return { text, response, audioResponse };
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
stopConversation(): void {
|
|
423
|
+
this.conversationActive = false;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
isConversationActive(): boolean {
|
|
427
|
+
return this.conversationActive;
|
|
428
|
+
}
|
|
429
|
+
|
|
65
430
|
/** Process audio input: STT → Agent → TTS */
|
|
66
431
|
async processAudio(audio: Buffer): Promise<{ text: string; response: string; audioResponse?: Buffer }> {
|
|
67
432
|
if (!this.handler) throw new Error('No message handler set');
|