opc-agent 2.0.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +603 -545
- package/dist/channels/voice.d.ts +59 -0
- package/dist/channels/voice.js +351 -1
- package/dist/cli.js +284 -5
- package/dist/core/agent.d.ts +9 -0
- package/dist/core/agent.js +49 -0
- package/dist/core/collaboration.d.ts +89 -0
- package/dist/core/collaboration.js +201 -0
- package/dist/deploy/index.d.ts +40 -0
- package/dist/deploy/index.js +261 -0
- package/dist/index.d.ts +7 -1
- package/dist/index.js +47 -3
- package/dist/mcp/servers/calculator-mcp.d.ts +3 -0
- package/dist/mcp/servers/calculator-mcp.js +65 -0
- package/dist/mcp/servers/crypto-mcp.d.ts +3 -0
- package/dist/mcp/servers/crypto-mcp.js +108 -0
- package/dist/mcp/servers/database-mcp.d.ts +3 -0
- package/dist/mcp/servers/database-mcp.js +73 -0
- package/dist/mcp/servers/datetime-mcp.d.ts +3 -0
- package/dist/mcp/servers/datetime-mcp.js +71 -0
- package/dist/mcp/servers/filesystem.d.ts +3 -0
- package/dist/mcp/servers/filesystem.js +101 -0
- package/dist/mcp/servers/github-mcp.d.ts +3 -0
- package/dist/mcp/servers/github-mcp.js +60 -0
- package/dist/mcp/servers/index.d.ts +21 -0
- package/dist/mcp/servers/index.js +50 -0
- package/dist/mcp/servers/json-mcp.d.ts +3 -0
- package/dist/mcp/servers/json-mcp.js +126 -0
- package/dist/mcp/servers/memory-mcp.d.ts +3 -0
- package/dist/mcp/servers/memory-mcp.js +60 -0
- package/dist/mcp/servers/regex-mcp.d.ts +3 -0
- package/dist/mcp/servers/regex-mcp.js +56 -0
- package/dist/mcp/servers/web-mcp.d.ts +3 -0
- package/dist/mcp/servers/web-mcp.js +51 -0
- package/dist/memory/index.d.ts +2 -0
- package/dist/memory/index.js +4 -1
- package/dist/memory/seed-loader.d.ts +51 -0
- package/dist/memory/seed-loader.js +200 -0
- package/dist/schema/oad.d.ts +292 -12
- package/dist/schema/oad.js +12 -1
- package/dist/security/guardrails.d.ts +50 -0
- package/dist/security/guardrails.js +197 -0
- package/dist/studio/server.d.ts +31 -1
- package/dist/studio/server.js +154 -3
- package/dist/studio-ui/index.html +1278 -662
- package/dist/tools/integrations/calendar.d.ts +3 -0
- package/dist/tools/integrations/calendar.js +73 -0
- package/dist/tools/integrations/code-exec.d.ts +3 -0
- package/dist/tools/integrations/code-exec.js +42 -0
- package/dist/tools/integrations/csv-analyzer.d.ts +3 -0
- package/dist/tools/integrations/csv-analyzer.js +142 -0
- package/dist/tools/integrations/database.d.ts +3 -0
- package/dist/tools/integrations/database.js +44 -0
- package/dist/tools/integrations/email-send.d.ts +3 -0
- package/dist/tools/integrations/email-send.js +104 -0
- package/dist/tools/integrations/git-tool.d.ts +3 -0
- package/dist/tools/integrations/git-tool.js +49 -0
- package/dist/tools/integrations/github-tool.d.ts +3 -0
- package/dist/tools/integrations/github-tool.js +77 -0
- package/dist/tools/integrations/image-gen.d.ts +3 -0
- package/dist/tools/integrations/image-gen.js +58 -0
- package/dist/tools/integrations/index.d.ts +30 -0
- package/dist/tools/integrations/index.js +107 -0
- package/dist/tools/integrations/jira.d.ts +3 -0
- package/dist/tools/integrations/jira.js +85 -0
- package/dist/tools/integrations/notion.d.ts +3 -0
- package/dist/tools/integrations/notion.js +71 -0
- package/dist/tools/integrations/npm-tool.d.ts +3 -0
- package/dist/tools/integrations/npm-tool.js +49 -0
- package/dist/tools/integrations/pdf-reader.d.ts +3 -0
- package/dist/tools/integrations/pdf-reader.js +91 -0
- package/dist/tools/integrations/slack.d.ts +3 -0
- package/dist/tools/integrations/slack.js +67 -0
- package/dist/tools/integrations/summarizer.d.ts +3 -0
- package/dist/tools/integrations/summarizer.js +49 -0
- package/dist/tools/integrations/translator.d.ts +3 -0
- package/dist/tools/integrations/translator.js +48 -0
- package/dist/tools/integrations/trello.d.ts +3 -0
- package/dist/tools/integrations/trello.js +60 -0
- package/dist/tools/integrations/vector-search.d.ts +3 -0
- package/dist/tools/integrations/vector-search.js +44 -0
- package/dist/tools/integrations/web-scraper.d.ts +3 -0
- package/dist/tools/integrations/web-scraper.js +48 -0
- package/dist/tools/integrations/web-search.d.ts +3 -0
- package/dist/tools/integrations/web-search.js +60 -0
- package/dist/tools/integrations/webhook.d.ts +3 -0
- package/dist/tools/integrations/webhook.js +39 -0
- package/dist/ui/components.d.ts +10 -0
- package/dist/ui/components.js +123 -0
- package/package.json +1 -1
- package/src/channels/voice.ts +365 -0
- package/src/cli.ts +294 -6
- package/src/core/agent.ts +56 -0
- package/src/core/collaboration.ts +275 -0
- package/src/deploy/index.ts +255 -0
- package/src/index.ts +21 -1
- package/src/mcp/servers/calculator-mcp.ts +65 -0
- package/src/mcp/servers/crypto-mcp.ts +73 -0
- package/src/mcp/servers/database-mcp.ts +72 -0
- package/src/mcp/servers/datetime-mcp.ts +69 -0
- package/src/mcp/servers/filesystem.ts +66 -0
- package/src/mcp/servers/github-mcp.ts +58 -0
- package/src/mcp/servers/index.ts +63 -0
- package/src/mcp/servers/json-mcp.ts +102 -0
- package/src/mcp/servers/memory-mcp.ts +56 -0
- package/src/mcp/servers/regex-mcp.ts +53 -0
- package/src/mcp/servers/web-mcp.ts +49 -0
- package/src/memory/index.ts +3 -0
- package/src/memory/seed-loader.ts +212 -0
- package/src/schema/oad.ts +13 -0
- package/src/security/guardrails.ts +248 -0
- package/src/studio/server.ts +166 -4
- package/src/studio-ui/index.html +1278 -662
- package/src/tools/integrations/calendar.ts +73 -0
- package/src/tools/integrations/code-exec.ts +39 -0
- package/src/tools/integrations/csv-analyzer.ts +92 -0
- package/src/tools/integrations/database.ts +44 -0
- package/src/tools/integrations/email-send.ts +76 -0
- package/src/tools/integrations/git-tool.ts +42 -0
- package/src/tools/integrations/github-tool.ts +76 -0
- package/src/tools/integrations/image-gen.ts +56 -0
- package/src/tools/integrations/index.ts +92 -0
- package/src/tools/integrations/jira.ts +83 -0
- package/src/tools/integrations/notion.ts +71 -0
- package/src/tools/integrations/npm-tool.ts +48 -0
- package/src/tools/integrations/pdf-reader.ts +58 -0
- package/src/tools/integrations/slack.ts +65 -0
- package/src/tools/integrations/summarizer.ts +49 -0
- package/src/tools/integrations/translator.ts +48 -0
- package/src/tools/integrations/trello.ts +60 -0
- package/src/tools/integrations/vector-search.ts +42 -0
- package/src/tools/integrations/web-scraper.ts +47 -0
- package/src/tools/integrations/web-search.ts +58 -0
- package/src/tools/integrations/webhook.ts +38 -0
- package/src/ui/components.ts +127 -0
- package/tests/brain-seed-extended.test.ts +490 -0
- package/tests/brain-seed.test.ts +239 -0
- package/tests/collaboration.test.ts +319 -0
- package/tests/deploy-and-dag.test.ts +196 -0
- package/tests/guardrails.test.ts +177 -0
- package/tests/integrations.test.ts +249 -0
- package/tests/mcp-servers.test.ts +260 -0
- package/tests/voice-enhanced.test.ts +169 -0
- package/dist/dtv/data.d.ts +0 -18
- package/dist/dtv/data.js +0 -25
- package/dist/dtv/trust.d.ts +0 -19
- package/dist/dtv/trust.js +0 -40
- package/dist/dtv/value.d.ts +0 -23
- package/dist/dtv/value.js +0 -38
- package/dist/marketplace/index.d.ts +0 -34
- package/dist/marketplace/index.js +0 -202
package/dist/channels/voice.d.ts
CHANGED
|
@@ -22,15 +22,74 @@ export interface VoiceChannelConfig {
|
|
|
22
22
|
sampleRate?: number;
|
|
23
23
|
language?: string;
|
|
24
24
|
}
|
|
25
|
+
export interface VoiceConfig {
|
|
26
|
+
sttProvider: 'whisper' | 'web-speech' | 'deepgram';
|
|
27
|
+
ttsProvider: 'edge-tts' | 'openai-tts' | 'elevenlabs';
|
|
28
|
+
sttApiKey?: string;
|
|
29
|
+
ttsApiKey?: string;
|
|
30
|
+
voice?: string;
|
|
31
|
+
language?: string;
|
|
32
|
+
}
|
|
33
|
+
export declare class WhisperSTTProvider implements STTProvider {
|
|
34
|
+
name: string;
|
|
35
|
+
private apiKey;
|
|
36
|
+
constructor(apiKey: string);
|
|
37
|
+
transcribe(audio: Buffer, options?: STTOptions): Promise<string>;
|
|
38
|
+
}
|
|
39
|
+
export declare class DeepgramSTTProvider implements STTProvider {
|
|
40
|
+
name: string;
|
|
41
|
+
private apiKey;
|
|
42
|
+
constructor(apiKey: string);
|
|
43
|
+
transcribe(audio: Buffer, options?: STTOptions): Promise<string>;
|
|
44
|
+
}
|
|
45
|
+
export declare class EdgeTTSProvider implements TTSProvider {
|
|
46
|
+
name: string;
|
|
47
|
+
private defaultVoice;
|
|
48
|
+
constructor(voice?: string);
|
|
49
|
+
synthesize(text: string, options?: TTSOptions): Promise<Buffer>;
|
|
50
|
+
}
|
|
51
|
+
export declare class OpenAITTSProvider implements TTSProvider {
|
|
52
|
+
name: string;
|
|
53
|
+
private apiKey;
|
|
54
|
+
private defaultVoice;
|
|
55
|
+
constructor(apiKey: string, voice?: string);
|
|
56
|
+
synthesize(text: string, options?: TTSOptions): Promise<Buffer>;
|
|
57
|
+
}
|
|
58
|
+
export declare class ElevenLabsTTSProvider implements TTSProvider {
|
|
59
|
+
name: string;
|
|
60
|
+
private apiKey;
|
|
61
|
+
private defaultVoice;
|
|
62
|
+
constructor(apiKey: string, voice?: string);
|
|
63
|
+
synthesize(text: string, options?: TTSOptions): Promise<Buffer>;
|
|
64
|
+
}
|
|
65
|
+
export declare function createVoiceProviders(config: VoiceConfig): {
|
|
66
|
+
stt?: STTProvider;
|
|
67
|
+
tts?: TTSProvider;
|
|
68
|
+
};
|
|
25
69
|
export declare class VoiceChannel extends BaseChannel {
|
|
26
70
|
type: string;
|
|
27
71
|
private config;
|
|
28
72
|
private logger;
|
|
29
73
|
private running;
|
|
74
|
+
private conversationActive;
|
|
30
75
|
constructor(config?: VoiceChannelConfig);
|
|
31
76
|
start(): Promise<void>;
|
|
32
77
|
stop(): Promise<void>;
|
|
33
78
|
isRunning(): boolean;
|
|
79
|
+
/** Transcribe audio to text */
|
|
80
|
+
transcribe(audio: Buffer, format?: string): Promise<string>;
|
|
81
|
+
/** Synthesize text to audio */
|
|
82
|
+
synthesize(text: string, voice?: string): Promise<Buffer>;
|
|
83
|
+
/** Start real-time conversation mode */
|
|
84
|
+
startConversation(onMessage: (text: string) => Promise<string>): Promise<void>;
|
|
85
|
+
/** Process a single turn in conversation mode */
|
|
86
|
+
processConversationTurn(audio: Buffer, onMessage: (text: string) => Promise<string>): Promise<{
|
|
87
|
+
text: string;
|
|
88
|
+
response: string;
|
|
89
|
+
audioResponse?: Buffer;
|
|
90
|
+
}>;
|
|
91
|
+
stopConversation(): void;
|
|
92
|
+
isConversationActive(): boolean;
|
|
34
93
|
/** Process audio input: STT → Agent → TTS */
|
|
35
94
|
processAudio(audio: Buffer): Promise<{
|
|
36
95
|
text: string;
|
package/dist/channels/voice.js
CHANGED
|
@@ -1,14 +1,324 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
2
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.VoiceChannel = void 0;
|
|
36
|
+
exports.VoiceChannel = exports.ElevenLabsTTSProvider = exports.OpenAITTSProvider = exports.EdgeTTSProvider = exports.DeepgramSTTProvider = exports.WhisperSTTProvider = void 0;
|
|
37
|
+
exports.createVoiceProviders = createVoiceProviders;
|
|
4
38
|
const index_1 = require("./index");
|
|
5
39
|
const logger_1 = require("../core/logger");
|
|
40
|
+
const https = __importStar(require("https"));
|
|
41
|
+
// ── Whisper STT Provider ────────────────────────────────────
|
|
42
|
+
class WhisperSTTProvider {
|
|
43
|
+
name = 'whisper';
|
|
44
|
+
apiKey;
|
|
45
|
+
constructor(apiKey) {
|
|
46
|
+
this.apiKey = apiKey;
|
|
47
|
+
}
|
|
48
|
+
async transcribe(audio, options) {
|
|
49
|
+
const FormData = (await Promise.resolve(`${'form-data'}`).then(s => __importStar(require(s))).catch(() => null));
|
|
50
|
+
// Build multipart form data manually
|
|
51
|
+
const boundary = '----OPCBoundary' + Date.now().toString(36);
|
|
52
|
+
const parts = [];
|
|
53
|
+
// file field
|
|
54
|
+
parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="audio.wav"\r\nContent-Type: audio/wav\r\n\r\n`));
|
|
55
|
+
parts.push(audio);
|
|
56
|
+
parts.push(Buffer.from('\r\n'));
|
|
57
|
+
// model field
|
|
58
|
+
parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="model"\r\n\r\nwhisper-1\r\n`));
|
|
59
|
+
// language field
|
|
60
|
+
if (options?.language) {
|
|
61
|
+
parts.push(Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="language"\r\n\r\n${options.language}\r\n`));
|
|
62
|
+
}
|
|
63
|
+
parts.push(Buffer.from(`--${boundary}--\r\n`));
|
|
64
|
+
const body = Buffer.concat(parts);
|
|
65
|
+
return new Promise((resolve, reject) => {
|
|
66
|
+
const req = https.request({
|
|
67
|
+
hostname: 'api.openai.com',
|
|
68
|
+
path: '/v1/audio/transcriptions',
|
|
69
|
+
method: 'POST',
|
|
70
|
+
headers: {
|
|
71
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
72
|
+
'Content-Type': `multipart/form-data; boundary=${boundary}`,
|
|
73
|
+
'Content-Length': body.length,
|
|
74
|
+
},
|
|
75
|
+
}, (res) => {
|
|
76
|
+
const chunks = [];
|
|
77
|
+
res.on('data', (c) => chunks.push(c));
|
|
78
|
+
res.on('end', () => {
|
|
79
|
+
try {
|
|
80
|
+
const data = JSON.parse(Buffer.concat(chunks).toString());
|
|
81
|
+
resolve(data.text ?? '');
|
|
82
|
+
}
|
|
83
|
+
catch (e) {
|
|
84
|
+
reject(new Error('Failed to parse Whisper response'));
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
req.on('error', reject);
|
|
89
|
+
req.write(body);
|
|
90
|
+
req.end();
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
exports.WhisperSTTProvider = WhisperSTTProvider;
|
|
95
|
+
// ── Deepgram STT Provider ───────────────────────────────────
|
|
96
|
+
class DeepgramSTTProvider {
|
|
97
|
+
name = 'deepgram';
|
|
98
|
+
apiKey;
|
|
99
|
+
constructor(apiKey) {
|
|
100
|
+
this.apiKey = apiKey;
|
|
101
|
+
}
|
|
102
|
+
async transcribe(audio, options) {
|
|
103
|
+
const lang = options?.language ?? 'en';
|
|
104
|
+
return new Promise((resolve, reject) => {
|
|
105
|
+
const req = https.request({
|
|
106
|
+
hostname: 'api.deepgram.com',
|
|
107
|
+
path: `/v1/listen?language=${lang}&model=nova-2`,
|
|
108
|
+
method: 'POST',
|
|
109
|
+
headers: {
|
|
110
|
+
'Authorization': `Token ${this.apiKey}`,
|
|
111
|
+
'Content-Type': 'audio/wav',
|
|
112
|
+
'Content-Length': audio.length,
|
|
113
|
+
},
|
|
114
|
+
}, (res) => {
|
|
115
|
+
const chunks = [];
|
|
116
|
+
res.on('data', (c) => chunks.push(c));
|
|
117
|
+
res.on('end', () => {
|
|
118
|
+
try {
|
|
119
|
+
const data = JSON.parse(Buffer.concat(chunks).toString());
|
|
120
|
+
const transcript = data?.results?.channels?.[0]?.alternatives?.[0]?.transcript ?? '';
|
|
121
|
+
resolve(transcript);
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
reject(new Error('Failed to parse Deepgram response'));
|
|
125
|
+
}
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
req.on('error', reject);
|
|
129
|
+
req.write(audio);
|
|
130
|
+
req.end();
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
exports.DeepgramSTTProvider = DeepgramSTTProvider;
|
|
135
|
+
// ── Edge TTS Provider (free, no API key) ────────────────────
|
|
136
|
+
class EdgeTTSProvider {
|
|
137
|
+
name = 'edge-tts';
|
|
138
|
+
defaultVoice;
|
|
139
|
+
constructor(voice) {
|
|
140
|
+
this.defaultVoice = voice ?? 'en-US-AriaNeural';
|
|
141
|
+
}
|
|
142
|
+
async synthesize(text, options) {
|
|
143
|
+
const WebSocket = (await Promise.resolve(`${'ws'}`).then(s => __importStar(require(s))).catch(() => null))?.default;
|
|
144
|
+
if (!WebSocket) {
|
|
145
|
+
throw new Error('ws package required for Edge TTS. Install with: npm i ws');
|
|
146
|
+
}
|
|
147
|
+
const voice = options?.voice ?? this.defaultVoice;
|
|
148
|
+
const requestId = [...Array(32)].map(() => Math.random().toString(16)[2]).join('');
|
|
149
|
+
const timestamp = new Date().toISOString();
|
|
150
|
+
const endpoint = `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4&ConnectionId=${requestId}`;
|
|
151
|
+
return new Promise((resolve, reject) => {
|
|
152
|
+
const ws = new WebSocket(endpoint, {
|
|
153
|
+
headers: {
|
|
154
|
+
'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
|
|
155
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
156
|
+
},
|
|
157
|
+
});
|
|
158
|
+
const audioChunks = [];
|
|
159
|
+
let headerSent = false;
|
|
160
|
+
ws.on('open', () => {
|
|
161
|
+
// Send config
|
|
162
|
+
ws.send(`Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"false"},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"}}}}`);
|
|
163
|
+
// Send SSML
|
|
164
|
+
const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='${voice}'>${escapeXml(text)}</voice></speak>`;
|
|
165
|
+
ws.send(`X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:${timestamp}\r\nPath:ssml\r\n\r\n${ssml}`);
|
|
166
|
+
});
|
|
167
|
+
ws.on('message', (data) => {
|
|
168
|
+
if (typeof data === 'string' || (Buffer.isBuffer(data) && data.toString().includes('Path:turn.end'))) {
|
|
169
|
+
if (typeof data === 'string' && data.includes('Path:turn.end')) {
|
|
170
|
+
ws.close();
|
|
171
|
+
resolve(Buffer.concat(audioChunks));
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
else if (Buffer.isBuffer(data)) {
|
|
175
|
+
// Binary message — extract audio after header
|
|
176
|
+
const headerEnd = data.indexOf(Buffer.from('\r\n\r\n'));
|
|
177
|
+
if (headerEnd !== -1) {
|
|
178
|
+
audioChunks.push(data.slice(headerEnd + 4));
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
});
|
|
182
|
+
ws.on('error', (err) => {
|
|
183
|
+
reject(new Error(`Edge TTS WebSocket error: ${err.message}`));
|
|
184
|
+
});
|
|
185
|
+
ws.on('close', () => {
|
|
186
|
+
if (audioChunks.length > 0) {
|
|
187
|
+
resolve(Buffer.concat(audioChunks));
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
// Timeout
|
|
191
|
+
setTimeout(() => {
|
|
192
|
+
ws.close();
|
|
193
|
+
if (audioChunks.length > 0) {
|
|
194
|
+
resolve(Buffer.concat(audioChunks));
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
reject(new Error('Edge TTS timeout'));
|
|
198
|
+
}
|
|
199
|
+
}, 30000);
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
exports.EdgeTTSProvider = EdgeTTSProvider;
|
|
204
|
+
function escapeXml(text) {
|
|
205
|
+
return text.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, ''');
|
|
206
|
+
}
|
|
207
|
+
// ── OpenAI TTS Provider ─────────────────────────────────────
|
|
208
|
+
class OpenAITTSProvider {
|
|
209
|
+
name = 'openai-tts';
|
|
210
|
+
apiKey;
|
|
211
|
+
defaultVoice;
|
|
212
|
+
constructor(apiKey, voice) {
|
|
213
|
+
this.apiKey = apiKey;
|
|
214
|
+
this.defaultVoice = voice ?? 'alloy';
|
|
215
|
+
}
|
|
216
|
+
async synthesize(text, options) {
|
|
217
|
+
const voice = options?.voice ?? this.defaultVoice;
|
|
218
|
+
const body = JSON.stringify({
|
|
219
|
+
model: 'tts-1',
|
|
220
|
+
input: text,
|
|
221
|
+
voice,
|
|
222
|
+
speed: options?.speed ?? 1.0,
|
|
223
|
+
});
|
|
224
|
+
return new Promise((resolve, reject) => {
|
|
225
|
+
const req = https.request({
|
|
226
|
+
hostname: 'api.openai.com',
|
|
227
|
+
path: '/v1/audio/speech',
|
|
228
|
+
method: 'POST',
|
|
229
|
+
headers: {
|
|
230
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
231
|
+
'Content-Type': 'application/json',
|
|
232
|
+
'Content-Length': Buffer.byteLength(body),
|
|
233
|
+
},
|
|
234
|
+
}, (res) => {
|
|
235
|
+
const chunks = [];
|
|
236
|
+
res.on('data', (c) => chunks.push(c));
|
|
237
|
+
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
238
|
+
});
|
|
239
|
+
req.on('error', reject);
|
|
240
|
+
req.write(body);
|
|
241
|
+
req.end();
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
exports.OpenAITTSProvider = OpenAITTSProvider;
|
|
246
|
+
// ── ElevenLabs TTS Provider ─────────────────────────────────
|
|
247
|
+
class ElevenLabsTTSProvider {
|
|
248
|
+
name = 'elevenlabs';
|
|
249
|
+
apiKey;
|
|
250
|
+
defaultVoice;
|
|
251
|
+
constructor(apiKey, voice) {
|
|
252
|
+
this.apiKey = apiKey;
|
|
253
|
+
this.defaultVoice = voice ?? '21m00Tcm4TlvDq8ikWAM'; // Rachel
|
|
254
|
+
}
|
|
255
|
+
async synthesize(text, options) {
|
|
256
|
+
const voiceId = options?.voice ?? this.defaultVoice;
|
|
257
|
+
const body = JSON.stringify({
|
|
258
|
+
text,
|
|
259
|
+
model_id: 'eleven_monolingual_v1',
|
|
260
|
+
});
|
|
261
|
+
return new Promise((resolve, reject) => {
|
|
262
|
+
const req = https.request({
|
|
263
|
+
hostname: 'api.elevenlabs.io',
|
|
264
|
+
path: `/v1/text-to-speech/${voiceId}`,
|
|
265
|
+
method: 'POST',
|
|
266
|
+
headers: {
|
|
267
|
+
'xi-api-key': this.apiKey,
|
|
268
|
+
'Content-Type': 'application/json',
|
|
269
|
+
'Content-Length': Buffer.byteLength(body),
|
|
270
|
+
},
|
|
271
|
+
}, (res) => {
|
|
272
|
+
const chunks = [];
|
|
273
|
+
res.on('data', (c) => chunks.push(c));
|
|
274
|
+
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
275
|
+
});
|
|
276
|
+
req.on('error', reject);
|
|
277
|
+
req.write(body);
|
|
278
|
+
req.end();
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
exports.ElevenLabsTTSProvider = ElevenLabsTTSProvider;
|
|
283
|
+
// ── Voice Config Factory ────────────────────────────────────
|
|
284
|
+
function createVoiceProviders(config) {
|
|
285
|
+
let stt;
|
|
286
|
+
let tts;
|
|
287
|
+
switch (config.sttProvider) {
|
|
288
|
+
case 'whisper':
|
|
289
|
+
if (config.sttApiKey)
|
|
290
|
+
stt = new WhisperSTTProvider(config.sttApiKey);
|
|
291
|
+
break;
|
|
292
|
+
case 'deepgram':
|
|
293
|
+
if (config.sttApiKey)
|
|
294
|
+
stt = new DeepgramSTTProvider(config.sttApiKey);
|
|
295
|
+
break;
|
|
296
|
+
case 'web-speech':
|
|
297
|
+
// Browser only — not available in Node.js
|
|
298
|
+
break;
|
|
299
|
+
}
|
|
300
|
+
switch (config.ttsProvider) {
|
|
301
|
+
case 'edge-tts':
|
|
302
|
+
tts = new EdgeTTSProvider(config.voice);
|
|
303
|
+
break;
|
|
304
|
+
case 'openai-tts':
|
|
305
|
+
if (config.ttsApiKey)
|
|
306
|
+
tts = new OpenAITTSProvider(config.ttsApiKey, config.voice);
|
|
307
|
+
break;
|
|
308
|
+
case 'elevenlabs':
|
|
309
|
+
if (config.ttsApiKey)
|
|
310
|
+
tts = new ElevenLabsTTSProvider(config.ttsApiKey, config.voice);
|
|
311
|
+
break;
|
|
312
|
+
}
|
|
313
|
+
return { stt, tts };
|
|
314
|
+
}
|
|
6
315
|
// ── Voice Channel ───────────────────────────────────────────
|
|
7
316
|
class VoiceChannel extends index_1.BaseChannel {
|
|
8
317
|
type = 'voice';
|
|
9
318
|
config;
|
|
10
319
|
logger = new logger_1.Logger('voice-channel');
|
|
11
320
|
running = false;
|
|
321
|
+
conversationActive = false;
|
|
12
322
|
constructor(config) {
|
|
13
323
|
super();
|
|
14
324
|
this.config = config ?? {};
|
|
@@ -22,11 +332,51 @@ class VoiceChannel extends index_1.BaseChannel {
|
|
|
22
332
|
}
|
|
23
333
|
async stop() {
|
|
24
334
|
this.running = false;
|
|
335
|
+
this.conversationActive = false;
|
|
25
336
|
this.logger.info('Voice channel stopped');
|
|
26
337
|
}
|
|
27
338
|
isRunning() {
|
|
28
339
|
return this.running;
|
|
29
340
|
}
|
|
341
|
+
/** Transcribe audio to text */
|
|
342
|
+
async transcribe(audio, format) {
|
|
343
|
+
if (!this.config.sttProvider) {
|
|
344
|
+
throw new Error('No STT provider configured');
|
|
345
|
+
}
|
|
346
|
+
return this.config.sttProvider.transcribe(audio, { language: this.config.language });
|
|
347
|
+
}
|
|
348
|
+
/** Synthesize text to audio */
|
|
349
|
+
async synthesize(text, voice) {
|
|
350
|
+
if (!this.config.ttsProvider) {
|
|
351
|
+
throw new Error('No TTS provider configured');
|
|
352
|
+
}
|
|
353
|
+
return this.config.ttsProvider.synthesize(text, { voice });
|
|
354
|
+
}
|
|
355
|
+
/** Start real-time conversation mode */
|
|
356
|
+
async startConversation(onMessage) {
|
|
357
|
+
if (!this.running)
|
|
358
|
+
await this.start();
|
|
359
|
+
this.conversationActive = true;
|
|
360
|
+
this.logger.info('Conversation mode started');
|
|
361
|
+
// In a real implementation, this would set up a microphone stream.
|
|
362
|
+
// For now, expose the conversation loop for programmatic use.
|
|
363
|
+
}
|
|
364
|
+
/** Process a single turn in conversation mode */
|
|
365
|
+
async processConversationTurn(audio, onMessage) {
|
|
366
|
+
const text = await this.transcribe(audio);
|
|
367
|
+
const response = await onMessage(text);
|
|
368
|
+
let audioResponse;
|
|
369
|
+
if (this.config.ttsProvider) {
|
|
370
|
+
audioResponse = await this.synthesize(response);
|
|
371
|
+
}
|
|
372
|
+
return { text, response, audioResponse };
|
|
373
|
+
}
|
|
374
|
+
stopConversation() {
|
|
375
|
+
this.conversationActive = false;
|
|
376
|
+
}
|
|
377
|
+
isConversationActive() {
|
|
378
|
+
return this.conversationActive;
|
|
379
|
+
}
|
|
30
380
|
/** Process audio input: STT → Agent → TTS */
|
|
31
381
|
async processAudio(audio) {
|
|
32
382
|
if (!this.handler)
|