modular-voice-agent-sdk 1.0.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +5 -2
  2. package/dist/backends/cloud/audio-llm.d.ts +72 -0
  3. package/dist/backends/cloud/audio-llm.d.ts.map +1 -0
  4. package/dist/backends/cloud/audio-llm.js +366 -0
  5. package/dist/backends/cloud/audio-llm.js.map +1 -0
  6. package/dist/backends/cloud/index.d.ts +2 -0
  7. package/dist/backends/cloud/index.d.ts.map +1 -1
  8. package/dist/backends/cloud/index.js +2 -0
  9. package/dist/backends/cloud/index.js.map +1 -1
  10. package/dist/backends/cloud/llm.d.ts.map +1 -1
  11. package/dist/backends/cloud/llm.js +31 -18
  12. package/dist/backends/cloud/llm.js.map +1 -1
  13. package/dist/backends/native/audio-llm.d.ts +126 -0
  14. package/dist/backends/native/audio-llm.d.ts.map +1 -0
  15. package/dist/backends/native/audio-llm.js +680 -0
  16. package/dist/backends/native/audio-llm.js.map +1 -0
  17. package/dist/backends/native/llm.d.ts.map +1 -1
  18. package/dist/backends/native/llm.js +5 -7
  19. package/dist/backends/native/llm.js.map +1 -1
  20. package/dist/backends/native/stt.d.ts +2 -2
  21. package/dist/backends/native/stt.d.ts.map +1 -1
  22. package/dist/backends/native/stt.js +1 -1
  23. package/dist/backends/native/stt.js.map +1 -1
  24. package/dist/backends/transformers/llm.d.ts.map +1 -1
  25. package/dist/backends/transformers/llm.js +13 -10
  26. package/dist/backends/transformers/llm.js.map +1 -1
  27. package/dist/backends/transformers/stt.d.ts +2 -2
  28. package/dist/backends/transformers/stt.d.ts.map +1 -1
  29. package/dist/backends/transformers/stt.js +12 -7
  30. package/dist/backends/transformers/stt.js.map +1 -1
  31. package/dist/backends/transformers/tts.d.ts.map +1 -1
  32. package/dist/backends/transformers/tts.js +11 -6
  33. package/dist/backends/transformers/tts.js.map +1 -1
  34. package/dist/cache-runtime.d.ts +29 -0
  35. package/dist/cache-runtime.d.ts.map +1 -0
  36. package/dist/cache-runtime.js +43 -0
  37. package/dist/cache-runtime.js.map +1 -0
  38. package/dist/cache.d.ts +4 -0
  39. package/dist/cache.d.ts.map +1 -1
  40. package/dist/cache.js +5 -0
  41. package/dist/cache.js.map +1 -1
  42. package/dist/cli.js +47 -7
  43. package/dist/cli.js.map +1 -1
  44. package/dist/client/voice-client.d.ts +4 -2
  45. package/dist/client/voice-client.d.ts.map +1 -1
  46. package/dist/client/voice-client.js +15 -13
  47. package/dist/client/voice-client.js.map +1 -1
  48. package/dist/client/web-speech-stt.d.ts +12 -1
  49. package/dist/client/web-speech-stt.d.ts.map +1 -1
  50. package/dist/client/web-speech-stt.js +49 -4
  51. package/dist/client/web-speech-stt.js.map +1 -1
  52. package/dist/server/handler.d.ts +12 -7
  53. package/dist/server/handler.d.ts.map +1 -1
  54. package/dist/server/handler.js +20 -20
  55. package/dist/server/handler.js.map +1 -1
  56. package/dist/services/llm-logger.d.ts +7 -18
  57. package/dist/services/llm-logger.d.ts.map +1 -1
  58. package/dist/services/llm-logger.js +22 -41
  59. package/dist/services/llm-logger.js.map +1 -1
  60. package/dist/types.d.ts +27 -5
  61. package/dist/types.d.ts.map +1 -1
  62. package/dist/voice-pipeline.d.ts +48 -10
  63. package/dist/voice-pipeline.d.ts.map +1 -1
  64. package/dist/voice-pipeline.js +138 -40
  65. package/dist/voice-pipeline.js.map +1 -1
  66. package/package.json +1 -1
  67. package/scripts/setup.sh +23 -0
  68. package/USAGE.md +0 -567
package/README.md CHANGED
@@ -48,6 +48,9 @@ Each component can run in the browser, on a server, or in the cloud. Pick any fr
48
48
  N/A ☁️ Cloud LLM N/A
49
49
  (OpenAI, Ollama, vLLM)
50
50
 
51
+ N/A ☁️ Cloud Audio LLM N/A
52
+ (GPT Audio, multimodal)
53
+
51
54
  ```
52
55
 
53
56
  Want browser speech recognition + a cloud LLM + browser speech synthesis? Done. Want everything running locally on your server with native binaries? Also done. Same code structure, same events, different backends.
@@ -59,11 +62,11 @@ Want browser speech recognition + a cloud LLM + browser speech synthesis? Done.
59
62
  - **Conversation history** — automatic context management
60
63
  - **Hybrid configs** — mix browser and server components freely
61
64
 
62
- See [`USAGE.md`](./USAGE.md) for full API documentation.
65
+ See [`docs/USAGE.md`](./docs/USAGE.md) for full API documentation.
63
66
 
64
67
  ## Examples
65
68
 
66
- See [`examples/`](./examples/) for 10 interactive demos covering all configurations.
69
+ See [`examples/`](./examples/) for interactive demos covering all configurations.
67
70
 
68
71
  ```bash
69
72
  cd examples
@@ -0,0 +1,72 @@
1
+ /**
2
+ * CloudAudioLLM - Multimodal Audio LLM Backend
3
+ *
4
+ * Implements BOTH STTPipeline and LLMPipeline interfaces.
5
+ * Register the same instance as both `stt` and `llm` in VoicePipeline.
6
+ *
7
+ * Uses internal caching to achieve single API call:
8
+ * 1. transcribe(audio) → calls multimodal API, caches response, returns transcript
9
+ * 2. generate(messages) → returns cached response (no second API call)
10
+ *
11
+ * The model is prompted to return both transcription and response in a structured format.
12
+ *
13
+ * Works with: OpenAI gpt-audio-mini, gpt-audio, and other audio-capable OpenAI-compatible endpoints.
14
+ */
15
+ import type { STTPipeline, LLMPipeline, CloudLLMConfig, ProgressCallback, Message, LLMGenerateOptions, LLMGenerateResult, TurnContext } from '../../types';
16
+ export interface CloudAudioLLMConfig extends CloudLLMConfig {
17
+ /**
18
+ * Format for audio encoding.
19
+ * @default 'wav'
20
+ */
21
+ audioFormat?: 'wav' | 'mp3' | 'pcm16';
22
+ /**
23
+ * Sample rate for audio input.
24
+ * @default 16000
25
+ */
26
+ sampleRate?: number;
27
+ }
28
+ export declare class CloudAudioLLM implements STTPipeline, LLMPipeline {
29
+ private config;
30
+ private ready;
31
+ private tracker;
32
+ private lastAudio;
33
+ constructor(config: CloudAudioLLMConfig);
34
+ initialize(_onProgress?: ProgressCallback): Promise<void>;
35
+ isReady(): boolean;
36
+ supportsTools(): boolean;
37
+ /**
38
+ * Transcribe audio by calling the multimodal API.
39
+ * Sets both sttResult and llmResult in TurnContext for the subsequent generate() call.
40
+ */
41
+ transcribe(audio: Float32Array, turn?: TurnContext): Promise<string>;
42
+ /**
43
+ * Generate response.
44
+ * - Tool followup: Makes fresh API call with tool results
45
+ * - User request: Returns cached result from transcribe()
46
+ */
47
+ generate(messages: Message[], options?: LLMGenerateOptions): Promise<LLMGenerateResult>;
48
+ /**
49
+ * Make API call to the audio model.
50
+ *
51
+ * @param messages - Conversation history
52
+ * @param options - Generation options (tools, etc.)
53
+ * @param freshAudio - If provided, this is an audio turn (transcribe + respond).
54
+ * If not provided, this is a text turn (use stored lastAudio).
55
+ */
56
+ private callAPI;
57
+ /**
58
+ * Build messages for the API call.
59
+ */
60
+ private buildMessages;
61
+ /**
62
+ * Parse API response. Handles both audio turn format {transcript, response}
63
+ * and text turn format {response}.
64
+ */
65
+ private parseResponse;
66
+ private encodeAudioToBase64;
67
+ private convertMessages;
68
+ private convertTools;
69
+ private extractToolCalls;
70
+ private getHeaders;
71
+ }
72
+ //# sourceMappingURL=audio-llm.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"audio-llm.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/audio-llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,OAAO,EACP,kBAAkB,EAClB,iBAAiB,EAKjB,WAAW,EACZ,MAAM,aAAa,CAAC;AAGrB,MAAM,WAAW,mBAAoB,SAAQ,cAAc;IACzD;;;OAGG;IACH,WAAW,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,OAAO,CAAC;IACtC;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,qBAAa,aAAc,YAAW,WAAW,EAAE,WAAW;IAC5D,OAAO,CAAC,MAAM,CAAsB;IACpC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAyB;IAGxC,OAAO,CAAC,SAAS,CAAuB;gBAE5B,MAAM,EAAE,mBAAmB;IASjC,UAAU,CAAC,WAAW,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAQ/D,OAAO,IAAI,OAAO;IAIlB,aAAa,IAAI,OAAO;IAQxB;;;OAGG;IACG,UAAU,CAAC,KAAK,EAAE,YAAY,EAAE,IAAI,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IAqB1E;;;;OAIG;IACG,QAAQ,CACZ,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,CAAC,EAAE,kBAAkB,GAC3B,OAAO,CAAC,iBAAiB,CAAC;IA+C7B;;;;;;;OAOG;YACW,OAAO;IAsErB;;OAEG;IACH,OAAO,CAAC,aAAa;IAoErB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAqCrB,OAAO,CAAC,mBAAmB;IAiD3B,OAAO,CAAC,eAAe;IAiCvB,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,gBAAgB;IA4BxB,OAAO,CAAC,UAAU;CAOnB"}
@@ -0,0 +1,366 @@
1
+ /**
2
+ * CloudAudioLLM - Multimodal Audio LLM Backend
3
+ *
4
+ * Implements BOTH STTPipeline and LLMPipeline interfaces.
5
+ * Register the same instance as both `stt` and `llm` in VoicePipeline.
6
+ *
7
+ * Uses internal caching to achieve single API call:
8
+ * 1. transcribe(audio) → calls multimodal API, caches response, returns transcript
9
+ * 2. generate(messages) → returns cached response (no second API call)
10
+ *
11
+ * The model is prompted to return both transcription and response in a structured format.
12
+ *
13
+ * Works with: OpenAI gpt-audio-mini, gpt-audio, and other audio-capable OpenAI-compatible endpoints.
14
+ */
15
+ import { LLMLogger, LLMConversationTracker } from '../../services';
16
+ export class CloudAudioLLM {
17
+ config;
18
+ ready = false;
19
+ tracker;
20
+ // Last audio from this session - used for tool follow-ups (audio model requires audio in every request)
21
+ lastAudio = null;
22
+ constructor(config) {
23
+ this.config = {
24
+ audioFormat: 'wav',
25
+ sampleRate: 16000,
26
+ ...config,
27
+ };
28
+ this.tracker = new LLMConversationTracker(new LLMLogger());
29
+ }
30
+ async initialize(_onProgress) {
31
+ console.log(`Initializing CloudAudioLLM (${this.config.baseUrl})...`);
32
+ console.log(` Model: ${this.config.model}`);
33
+ console.log(` Audio format: ${this.config.audioFormat}`);
34
+ this.ready = true;
35
+ console.log('CloudAudioLLM ready.');
36
+ }
37
+ isReady() {
38
+ return this.ready;
39
+ }
40
+ supportsTools() {
41
+ return true;
42
+ }
43
+ // ============================================================
44
+ // STTPipeline Implementation
45
+ // ============================================================
46
+ /**
47
+ * Transcribe audio by calling the multimodal API.
48
+ * Sets both sttResult and llmResult in TurnContext for the subsequent generate() call.
49
+ */
50
+ async transcribe(audio, turn) {
51
+ if (!turn) {
52
+ throw new Error('CloudAudioLLM.transcribe() requires TurnContext. Pass turn parameter.');
53
+ }
54
+ if (turn.sttResult) {
55
+ return turn.sttResult.transcript;
56
+ }
57
+ const { transcript, result } = await this.callAPI(turn.history, { tools: turn.tools }, audio);
58
+ turn.sttResult = { transcript };
59
+ turn.llmResult = result;
60
+ return transcript;
61
+ }
62
+ // ============================================================
63
+ // LLMPipeline Implementation
64
+ // ============================================================
65
+ /**
66
+ * Generate response.
67
+ * - Tool followup: Makes fresh API call with tool results
68
+ * - User request: Returns cached result from transcribe()
69
+ */
70
+ async generate(messages, options) {
71
+ const turn = options?.turn;
72
+ // Primary branch: Is this a tool followup?
73
+ const lastMessage = messages[messages.length - 1];
74
+ const isToolFollowup = lastMessage?.role === 'tool';
75
+ if (isToolFollowup) {
76
+ // Tool followup: fresh API call with tool results
77
+ const { result } = await this.callAPI(messages, options);
78
+ if (options?.onToken && result.content) {
79
+ for (const char of result.content) {
80
+ options.onToken(char);
81
+ }
82
+ }
83
+ return result;
84
+ }
85
+ // User request: validate cached result exists
86
+ if (!turn?.llmResult) {
87
+ throw new Error('CloudAudioLLM.generate() called for user request without cached result. ' +
88
+ 'Call transcribe() first, or check if this should be a tool followup.');
89
+ }
90
+ // Use cached result from transcribe()
91
+ const result = turn.llmResult;
92
+ this.tracker.logInput(messages);
93
+ if (options?.onToken && result.content) {
94
+ for (const char of result.content) {
95
+ options.onToken(char);
96
+ }
97
+ }
98
+ this.tracker.logOutput(result.content, result.toolCalls);
99
+ return result;
100
+ }
101
+ // ============================================================
102
+ // Internal: Unified API Call
103
+ // ============================================================
104
+ /**
105
+ * Make API call to the audio model.
106
+ *
107
+ * @param messages - Conversation history
108
+ * @param options - Generation options (tools, etc.)
109
+ * @param freshAudio - If provided, this is an audio turn (transcribe + respond).
110
+ * If not provided, this is a text turn (use stored lastAudio).
111
+ */
112
+ async callAPI(messages, options, freshAudio) {
113
+ const isAudioTurn = !!freshAudio;
114
+ const hasTools = !!(options?.tools && options.tools.length > 0);
115
+ const openaiMessages = this.buildMessages(messages, hasTools, freshAudio);
116
+ const body = {
117
+ model: this.config.model,
118
+ messages: openaiMessages,
119
+ modalities: ['text'],
120
+ ...this.config.modelParams,
121
+ };
122
+ if (hasTools) {
123
+ body.tools = this.convertTools(options.tools);
124
+ body.parallel_tool_calls = false;
125
+ }
126
+ if (!isAudioTurn) {
127
+ this.tracker.logInput(messages);
128
+ }
129
+ const response = await fetch(`${this.config.baseUrl}/chat/completions`, {
130
+ method: 'POST',
131
+ headers: {
132
+ ...this.getHeaders(),
133
+ 'Content-Type': 'application/json',
134
+ },
135
+ body: JSON.stringify(body),
136
+ });
137
+ if (!response.ok) {
138
+ const errorText = await response.text();
139
+ throw new Error(`CloudAudioLLM API error (${response.status}): ${errorText}`);
140
+ }
141
+ const data = await response.json();
142
+ const rawContent = data.choices?.[0]?.message?.content || '';
143
+ const toolCalls = this.extractToolCalls(data);
144
+ const finishReason = data.choices?.[0]?.finish_reason;
145
+ if (!rawContent && toolCalls.length === 0) {
146
+ const reason = finishReason || 'unknown';
147
+ throw new Error(`CloudAudioLLM returned empty response (finish_reason: ${reason}). ` +
148
+ (reason === 'length'
149
+ ? 'The model hit the token limit before producing output. Try increasing max_completion_tokens.'
150
+ : 'The model did not produce any content.'));
151
+ }
152
+ const { transcript, responseText } = this.parseResponse(rawContent, isAudioTurn, toolCalls.length > 0);
153
+ if (!isAudioTurn) {
154
+ this.tracker.logOutput(responseText, toolCalls.length > 0 ? toolCalls : undefined);
155
+ }
156
+ const result = {
157
+ content: responseText,
158
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
159
+ finishReason: toolCalls.length > 0 ? 'tool_calls' : 'stop',
160
+ };
161
+ return { transcript, result };
162
+ }
163
+ /**
164
+ * Build messages for the API call.
165
+ */
166
+ buildMessages(messages, hasTools, freshAudio) {
167
+ const converted = this.convertMessages(messages);
168
+ // Add tool instruction to system prompt if tools are available
169
+ if (hasTools && converted.length > 0) {
170
+ const first = converted[0];
171
+ if (first.role === 'system') {
172
+ first.content += '\n\nOnly call ONE tool at a time. Wait for the result before deciding if another tool is needed.';
173
+ }
174
+ }
175
+ if (freshAudio) {
176
+ // Audio turn: encode and store audio, add multimodal user message
177
+ const base64Audio = this.encodeAudioToBase64(freshAudio);
178
+ this.lastAudio = base64Audio;
179
+ converted.push({
180
+ role: 'user',
181
+ content: [
182
+ {
183
+ type: 'input_audio',
184
+ input_audio: {
185
+ data: base64Audio,
186
+ format: this.config.audioFormat,
187
+ },
188
+ },
189
+ {
190
+ type: 'text',
191
+ text: 'Respond with JSON: {"transcript":"<what user said>","response":"<your response>"}',
192
+ },
193
+ ],
194
+ });
195
+ }
196
+ else {
197
+ // Text turn: inject stored audio into last user message, add JSON instruction
198
+ if (this.lastAudio) {
199
+ for (let i = converted.length - 1; i >= 0; i--) {
200
+ const msg = converted[i];
201
+ if (msg.role === 'user' && typeof msg.content === 'string') {
202
+ converted[i] = {
203
+ role: 'user',
204
+ content: [
205
+ {
206
+ type: 'input_audio',
207
+ input_audio: {
208
+ data: this.lastAudio,
209
+ format: this.config.audioFormat,
210
+ },
211
+ },
212
+ {
213
+ type: 'text',
214
+ text: msg.content,
215
+ },
216
+ ],
217
+ };
218
+ break;
219
+ }
220
+ }
221
+ }
222
+ converted.push({
223
+ role: 'user',
224
+ content: 'Respond to the user\'s request with JSON: {"response":"<your response>"}',
225
+ });
226
+ }
227
+ return converted;
228
+ }
229
+ /**
230
+ * Parse API response. Handles both audio turn format {transcript, response}
231
+ * and text turn format {response}.
232
+ */
233
+ parseResponse(content, isAudioTurn, hasToolCalls) {
234
+ if (content) {
235
+ try {
236
+ const parsed = JSON.parse(content);
237
+ if (isAudioTurn && parsed.transcript !== undefined && parsed.response !== undefined) {
238
+ return { transcript: parsed.transcript, responseText: parsed.response };
239
+ }
240
+ if (!isAudioTurn && parsed.response !== undefined) {
241
+ return { transcript: '', responseText: parsed.response };
242
+ }
243
+ }
244
+ catch {
245
+ // Not valid JSON, continue to fallback
246
+ }
247
+ }
248
+ // Fallback: model went straight to tool call
249
+ if (hasToolCalls) {
250
+ return {
251
+ transcript: isAudioTurn ? '[audio processed]' : '',
252
+ responseText: content,
253
+ };
254
+ }
255
+ const expectedFormat = isAudioTurn ? '{"transcript":"...","response":"..."}' : '{"response":"..."}';
256
+ throw new Error(`CloudAudioLLM: Expected JSON ${expectedFormat} but got: ${content.substring(0, 100)}`);
257
+ }
258
+ // ============================================================
259
+ // Internal: Helpers
260
+ // ============================================================
261
+ encodeAudioToBase64(audio) {
262
+ const numChannels = 1;
263
+ const sampleRate = this.config.sampleRate;
264
+ const bitsPerSample = 16;
265
+ const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
266
+ const blockAlign = (numChannels * bitsPerSample) / 8;
267
+ const dataSize = audio.length * 2;
268
+ const headerSize = 44;
269
+ const totalSize = headerSize + dataSize;
270
+ const buffer = new ArrayBuffer(totalSize);
271
+ const view = new DataView(buffer);
272
+ const writeString = (offset, str) => {
273
+ for (let i = 0; i < str.length; i++) {
274
+ view.setUint8(offset + i, str.charCodeAt(i));
275
+ }
276
+ };
277
+ writeString(0, 'RIFF');
278
+ view.setUint32(4, totalSize - 8, true);
279
+ writeString(8, 'WAVE');
280
+ writeString(12, 'fmt ');
281
+ view.setUint32(16, 16, true);
282
+ view.setUint16(20, 1, true);
283
+ view.setUint16(22, numChannels, true);
284
+ view.setUint32(24, sampleRate, true);
285
+ view.setUint32(28, byteRate, true);
286
+ view.setUint16(32, blockAlign, true);
287
+ view.setUint16(34, bitsPerSample, true);
288
+ writeString(36, 'data');
289
+ view.setUint32(40, dataSize, true);
290
+ let offset = 44;
291
+ for (let i = 0; i < audio.length; i++) {
292
+ const sample = Math.max(-1, Math.min(1, audio[i]));
293
+ const int16 = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
294
+ view.setInt16(offset, int16, true);
295
+ offset += 2;
296
+ }
297
+ const bytes = new Uint8Array(buffer);
298
+ let binary = '';
299
+ for (let i = 0; i < bytes.length; i++) {
300
+ binary += String.fromCharCode(bytes[i]);
301
+ }
302
+ return btoa(binary);
303
+ }
304
+ convertMessages(messages) {
305
+ return messages.map((m) => {
306
+ if (m.role === 'tool') {
307
+ const toolMsg = m;
308
+ return {
309
+ role: 'tool',
310
+ content: toolMsg.content,
311
+ tool_call_id: toolMsg.toolCallId,
312
+ };
313
+ }
314
+ if (m.role === 'assistant') {
315
+ const assistantMsg = m;
316
+ if (assistantMsg.toolCalls && assistantMsg.toolCalls.length > 0) {
317
+ return {
318
+ role: 'assistant',
319
+ content: assistantMsg.content || null,
320
+ tool_calls: assistantMsg.toolCalls.map((tc) => ({
321
+ id: tc.id,
322
+ type: 'function',
323
+ function: {
324
+ name: tc.name,
325
+ arguments: JSON.stringify(tc.arguments),
326
+ },
327
+ })),
328
+ };
329
+ }
330
+ }
331
+ return { role: m.role, content: m.content };
332
+ });
333
+ }
334
+ convertTools(tools) {
335
+ return tools.map((tool) => ({
336
+ type: 'function',
337
+ function: {
338
+ name: tool.name,
339
+ description: tool.description,
340
+ parameters: tool.parameters,
341
+ },
342
+ }));
343
+ }
344
+ extractToolCalls(data) {
345
+ const toolCalls = [];
346
+ const message = data?.choices?.[0]?.message;
347
+ if (message?.tool_calls) {
348
+ for (const tc of message.tool_calls) {
349
+ toolCalls.push({
350
+ id: tc.id,
351
+ name: tc.function.name,
352
+ arguments: JSON.parse(tc.function.arguments || '{}'),
353
+ });
354
+ }
355
+ }
356
+ return toolCalls;
357
+ }
358
+ getHeaders() {
359
+ const headers = {};
360
+ if (this.config.apiKey) {
361
+ headers['Authorization'] = `Bearer ${this.config.apiKey}`;
362
+ }
363
+ return headers;
364
+ }
365
+ }
366
+ //# sourceMappingURL=audio-llm.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"audio-llm.js","sourceRoot":"","sources":["../../../src/backends/cloud/audio-llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAgBH,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAuB,MAAM,gBAAgB,CAAC;AAexF,MAAM,OAAO,aAAa;IAChB,MAAM,CAAsB;IAC5B,KAAK,GAAG,KAAK,CAAC;IACd,OAAO,CAAyB;IAExC,wGAAwG;IAChG,SAAS,GAAkB,IAAI,CAAC;IAExC,YAAY,MAA2B;QACrC,IAAI,CAAC,MAAM,GAAG;YACZ,WAAW,EAAE,KAAK;YAClB,UAAU,EAAE,KAAK;YACjB,GAAG,MAAM;SACV,CAAC;QACF,IAAI,CAAC,OAAO,GAAG,IAAI,sBAAsB,CAAC,IAAI,SAAS,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,WAA8B;QAC7C,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,CAAC,MAAM,CAAC,OAAO,MAAM,CAAC,CAAC;QACtE,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;QAC7C,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC;QAC1D,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IACtC,CAAC;IAED,OAAO;QACL,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC;IACd,CAAC;IAED,+DAA+D;IAC/D,6BAA6B;IAC7B,+DAA+D;IAE/D;;;OAGG;IACH,KAAK,CAAC,UAAU,CAAC,KAAmB,EAAE,IAAkB;QACtD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,uEAAuE,CAAC,CAAC;QAC3F,CAAC;QAED,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,OAAO,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC;QACnC,CAAC;QAED,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,EAAE,KAAK,CAAC,CAAC;QAE9F,IAAI,CAAC,SAAS,GAAG,EAAE,UAAU,EAAE,CAAC;QAChC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC;QAExB,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,+DAA+D;IAC/D,6BAA6B;IAC7B,+DAA+D;IAE/D;;;;OAIG;IACH,KAAK,CAAC,QAAQ,CACZ,QAAmB,EACnB,OAA4B;QAE5B,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,CAAC;QAE3B,2CAA2C;QAC3C,MAAM,WAAW,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAClD,MAAM,cAAc,GAAG,WAAW,EAAE,IAAI,KAAK,MAAM,CAAC;QAEpD,IAAI,cAAc,EAAE,CAAC;YACnB,kDAAkD;YAClD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEzD,IAAI,OAAO,EAAE,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBACvC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;oBAClC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,8CAA8C;QAC9C,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;YACrB,MAAM,IAAI,KAAK,CACb,0EAA0E;gBAC1E,sEAAsE,CACvE,CAAC;QACJ,CAAC;QAED,sCAAsC;QACtC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC;QAE9B,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,QAA4B,CAAC,CAAC;QAEpD,IAAI,OAAO,EAAE,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACvC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBAClC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;QACzD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,+DAA+D;IAC/D,6BAA6B;IAC7B,+DAA+D;IAE/D;;;;;;;OAOG;IACK,KAAK,CAAC,OAAO,CACnB,QAAmB,EACnB,OAA4B,EAC5B,UAAyB;QAEzB,MAAM,WAAW,GAAG,CAAC,CAAC,UAAU,CAAC;QACjC,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,KAAK,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEhE,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE1E,MAAM,IAAI,GAA4B;YACpC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YACxB,QAAQ,EAAE,cAAc;YACxB,UAAU,EAAE,CAAC,MAAM,CAAC;YACpB,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW;SAC3B,CAAC;QAEF,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAQ,CAAC,KAAM,CAAC,CAAC;YAChD,IAAI,CAAC,mBAAmB,GAAG,KAAK,CAAC;QACnC,CAAC;QAED,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,QAA4B,CAAC,CAAC;QACtD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,mBAAmB,EAAE;YACtE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,GAAG,IAAI,CAAC,UAAU,EAAE;gBACpB,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CAAC,CAAC;QAChF,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;QAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;QAC9C,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC;QAEtD,IAAI,CAAC,UAAU,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1C,MAAM,MAAM,GAAG,YAAY,IAAI,SAAS,CAAC;YACzC,MAAM,IAAI,KAAK,CACb,yDAAyD,MAAM,KAAK;gBAClE,CAAC,MAAM,KAAK,QAAQ;oBAClB,CAAC,CAAC,8FAA8F;oBAChG,CAAC,CAAC,wCAAwC,CAAC,CAChD,CAAC;QACJ,CAAC;QAED,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,WAAW,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEvG,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,YAAY,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACrF,CAAC;QAED,MAAM,MAAM,GAAsB;YAChC,OAAO,EAAE,YAAY;YACrB,SAAS,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;YACvD,YAAY,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM;SAC3D,CAAC;QAEF,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC;IAChC,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAmB,EAAE,QAAiB,EAAE,UAAyB;QACrF,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;QAEjD,+DAA+D;QAC/D,IAAI,QAAQ,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrC,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAsC,CAAC;YAChE,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC5B,KAAK,CAAC,OAAO,IAAI,kGAAkG,CAAC;YACtH,CAAC;QACH,CAAC;QAED,IAAI,UAAU,EAAE,CAAC;YACf,kEAAkE;YAClE,MAAM,WAAW,GAAG,IAAI,CAAC,mBAAmB,CAAC,UAAU,CAAC,CAAC;YACzD,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC;YAE7B,SAAS,CAAC,IAAI,CAAC;gBACb,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,aAAa;wBACnB,WAAW,EAAE;4BACX,IAAI,EAAE,WAAW;4BACjB,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;yBAChC;qBACF;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,mFAAmF;qBAC1F;iBACF;aACF,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,8EAA8E;YAC9E,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACnB,KAAK,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC/C,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,CAAuC,CAAC;oBAC/D,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;wBAC3D,SAAS,CAAC,CAAC,CAAC,GAAG;4BACb,IAAI,EAAE,MAAM;4BACZ,OAAO,EAAE;gCACP;oCACE,IAAI,EAAE,aAAa;oCACnB,WAAW,EAAE;wCACX,IAAI,EAAE,IAAI,CAAC,SAAS;wCACpB,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;qCAChC;iCACF;gCACD;oCACE,IAAI,EAAE,MAAM;oCACZ,IAAI,EAAE,GAAG,CAAC,OAAO;iCAClB;6BACF;yBACF,CAAC;wBACF,MAAM;oBACR,CAAC;gBACH,CAAC;YACH,CAAC;YAED,SAAS,CAAC,IAAI,CAAC;gBACb,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,0EAA0E;aACpF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;OAGG;IACK,aAAa,CACnB,OAAe,EACf,WAAoB,EACpB,YAAqB;QAErB,IAAI,OAAO,EAAE,CAAC;YACZ,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBAEnC,IAAI,WAAW,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;oBACpF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,YAAY,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAC1E,CAAC;gBAED,IAAI,CAAC,WAAW,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;oBAClD,OAAO,EAAE,UAAU,EAAE,EAAE,EAAE,YAAY,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAC3D,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,uCAAuC;YACzC,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO;gBACL,UAAU,EAAE,WAAW,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,EAAE;gBAClD,YAAY,EAAE,OAAO;aACtB,CAAC;QACJ,CAAC;QAED,MAAM,cAAc,GAAG,WAAW,CAAC,CAAC,CAAC,uCAAuC,CAAC,CAAC,CAAC,oBAAoB,CAAC;QACpG,MAAM,IAAI,KAAK,CAAC,gCAAgC,cAAc,aAAa,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,+DAA+D;IAC/D,oBAAoB;IACpB,+DAA+D;IAEvD,mBAAmB,CAAC,KAAmB;QAC7C,MAAM,WAAW,GAAG,CAAC,CAAC;QACtB,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAW,CAAC;QAC3C,MAAM,aAAa,GAAG,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,CAAC,UAAU,GAAG,WAAW,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QAChE,MAAM,UAAU,GAAG,CAAC,WAAW,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QACrD,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;QAClC,MAAM,UAAU,GAAG,EAAE,CAAC;QACtB,MAAM,SAAS,GAAG,UAAU,GAAG,QAAQ,CAAC;QAExC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,SAAS,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;QAElC,MAAM,WAAW,GAAG,CAAC,MAAc,EAAE,GAAW,EAAE,EAAE;YAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YAC/C,CAAC;QACH,CAAC,CAAC;QAEF,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QACvB,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;QACvC,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QACvB,WAAW,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QACxB,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,CAAC;QAC7B,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC5B,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,WAAW,EAAE,IAAI,CAAC,CAAC;QACtC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;QACnC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,aAAa,EAAE,IAAI,CAAC,CAAC;QACxC,WAAW,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QACxB,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;QAEnC,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,MAAM,KAAK,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC;YAC7D,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;YACnC,MAAM,IAAI,CAAC,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;QACrC,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,MAAM,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1C,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC;IACtB,CAAC;IAEO,eAAe,CAAC,QAAmB;QACzC,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YACxB,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,CAAgB,CAAC;gBACjC,OAAO;oBACL,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,YAAY,EAAE,OAAO,CAAC,UAAU;iBACjC,CAAC;YACJ,CAAC;YAED,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC3B,MAAM,YAAY,GAAG,CAAqB,CAAC;gBAC3C,IAAI,YAAY,CAAC,SAAS,IAAI,YAAY,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAChE,OAAO;wBACL,IAAI,EAAE,WAAW;wBACjB,OAAO,EAAE,YAAY,CAAC,OAAO,IAAI,IAAI;wBACrC,UAAU,EAAE,YAAY,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;4BAC9C,EAAE,EAAE,EAAE,CAAC,EAAE;4BACT,IAAI,EAAE,UAAU;4BAChB,QAAQ,EAAE;gCACR,IAAI,EAAE,EAAE,CAAC,IAAI;gCACb,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,SAAS,CAAC;6BACxC;yBACF,CAAC,CAAC;qBACJ,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;QAC9C,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,YAAY,CAAC,KAAuB;QAC1C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YAC1B,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE;gBACR,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;aAC5B;SACF,CAAC,CAAC,CAAC;IACN,CAAC;IAEO,gBAAgB,CAAC,IAAa;QACpC,MAAM,SAAS,GAAe,EAAE,CAAC;QACjC,MAAM,OAAO,GACX,IAUD,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC;QAEzB,IAAI,OAAO,EAAE,UAAU,EAAE,CAAC;YACxB,KAAK,MAAM,EAAE,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACpC,SAAS,CAAC,IAAI,CAAC;oBACb,EAAE,EAAE,EAAE,CAAC,EAAE;oBACT,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI;oBACtB,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,IAAI,IAAI,CAAC;iBACrD,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,UAAU;QAChB,MAAM,OAAO,GAA2B,EAAE,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACvB,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QAC5D,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
@@ -4,4 +4,6 @@
4
4
  */
5
5
  export { CloudLLM } from './llm';
6
6
  export type { CloudLLMConfig } from '../../types';
7
+ export { CloudAudioLLM } from './audio-llm';
8
+ export type { CloudAudioLLMConfig } from './audio-llm';
7
9
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,OAAO,CAAC;AACjC,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,OAAO,CAAC;AACjC,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAGlD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,YAAY,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC"}
@@ -3,4 +3,6 @@
3
3
  * Works with: OpenAI, Ollama, vLLM, LMStudio, and any OpenAI-compatible endpoint
4
4
  */
5
5
  export { CloudLLM } from './llm';
6
+ // Audio LLM (multimodal - implements both STTPipeline and LLMPipeline)
7
+ export { CloudAudioLLM } from './audio-llm';
6
8
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/backends/cloud/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,OAAO,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/backends/cloud/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,OAAO,CAAC;AAGjC,uEAAuE;AACvE,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/llm.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,OAAO,EACP,kBAAkB,EAClB,iBAAiB,EAKlB,MAAM,aAAa,CAAC;AAuBrB,qBAAa,QAAS,YAAW,WAAW;IAC1C,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAyB;gBAE5B,MAAM,EAAE,cAAc;IAS5B,UAAU,CAAC,WAAW,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA2B/D,aAAa,IAAI,OAAO;IAIlB,QAAQ,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IA+J7F,OAAO,CAAC,eAAe;IAuCvB,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,UAAU;IAUlB,OAAO,IAAI,OAAO;CAGnB"}
1
+ {"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/llm.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,OAAO,EACP,kBAAkB,EAClB,iBAAiB,EAKlB,MAAM,aAAa,CAAC;AAuBrB,qBAAa,QAAS,YAAW,WAAW;IAC1C,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAyB;gBAE5B,MAAM,EAAE,cAAc;IAK5B,UAAU,CAAC,WAAW,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA2B/D,aAAa,IAAI,OAAO;IAIlB,QAAQ,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAmL7F,OAAO,CAAC,eAAe;IAuCvB,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,UAAU;IAUlB,OAAO,IAAI,OAAO;CAGnB"}
@@ -11,11 +11,7 @@ export class CloudLLM {
11
11
  ready = false;
12
12
  tracker;
13
13
  constructor(config) {
14
- this.config = {
15
- maxTokens: 256,
16
- temperature: 0.7,
17
- ...config,
18
- };
14
+ this.config = config;
19
15
  this.tracker = new LLMConversationTracker(new LLMLogger());
20
16
  }
21
17
  async initialize(_onProgress) {
@@ -50,10 +46,8 @@ export class CloudLLM {
50
46
  if (!this.ready) {
51
47
  throw new Error('LLM pipeline not initialized');
52
48
  }
53
- // Use conversation ID if provided, else default
54
- const conversationId = options?.conversationId ?? 'default';
55
49
  // Log the input messages
56
- this.tracker.logInput(conversationId, messages);
50
+ this.tracker.logInput(messages);
57
51
  const url = `${this.config.baseUrl}/chat/completions`;
58
52
  // Convert messages to OpenAI format
59
53
  const openaiMessages = this.convertMessages(messages);
@@ -61,9 +55,8 @@ export class CloudLLM {
61
55
  const body = {
62
56
  model: this.config.model,
63
57
  messages: openaiMessages,
64
- max_tokens: this.config.maxTokens,
65
- temperature: this.config.temperature,
66
58
  stream: true,
59
+ ...this.config.modelParams,
67
60
  };
68
61
  // Add tools if provided
69
62
  if (options?.tools && options.tools.length > 0) {
@@ -90,7 +83,7 @@ export class CloudLLM {
90
83
  let fullContent = '';
91
84
  let buffer = '';
92
85
  const toolCalls = new Map();
93
- let finishReason = 'stop';
86
+ let finishReason = null;
94
87
  while (true) {
95
88
  const { done, value } = await reader.read();
96
89
  if (done) {
@@ -109,6 +102,10 @@ export class CloudLLM {
109
102
  const jsonStr = trimmed.slice(6);
110
103
  try {
111
104
  const parsed = JSON.parse(jsonStr);
105
+ // Check for API error in stream
106
+ if (parsed.error) {
107
+ throw new Error(`Cloud LLM stream error: ${JSON.stringify(parsed.error)}`);
108
+ }
112
109
  const choice = parsed.choices?.[0];
113
110
  const delta = choice?.delta;
114
111
  // Handle text content
@@ -135,13 +132,19 @@ export class CloudLLM {
135
132
  }
136
133
  }
137
134
  }
138
- // Check finish reason
139
- if (choice?.finish_reason === 'tool_calls') {
140
- finishReason = 'tool_calls';
135
+ // Track finish reason from API
136
+ if (choice?.finish_reason) {
137
+ finishReason = choice.finish_reason;
141
138
  }
142
139
  }
143
- catch {
144
- // Skip malformed JSON lines (can happen with some providers)
140
+ catch (e) {
141
+ // Re-throw actual errors, only skip JSON parse errors
142
+ if (e instanceof SyntaxError) {
143
+ console.warn('CloudLLM: Skipping malformed JSON line:', jsonStr.substring(0, 100));
144
+ }
145
+ else {
146
+ throw e;
147
+ }
145
148
  }
146
149
  }
147
150
  }
@@ -166,12 +169,22 @@ export class CloudLLM {
166
169
  options?.onToolCall?.(toolCall);
167
170
  }
168
171
  }
172
+ // Check for empty response (no content and no tool calls)
173
+ if (!fullContent && resultToolCalls.length === 0) {
174
+ const reason = finishReason || 'unknown';
175
+ throw new Error(`Cloud LLM returned empty response (finish_reason: ${reason}). ` +
176
+ (reason === 'length'
177
+ ? 'The model hit the token limit before producing output. Try increasing max_completion_tokens.'
178
+ : 'The model did not produce any content.'));
179
+ }
169
180
  // Log the response
170
- this.tracker.logOutput(conversationId, fullContent, resultToolCalls.length > 0 ? resultToolCalls : undefined);
181
+ this.tracker.logOutput(fullContent, resultToolCalls.length > 0 ? resultToolCalls : undefined);
182
+ // Normalize finish reason for our interface
183
+ const normalizedFinishReason = resultToolCalls.length > 0 ? 'tool_calls' : 'stop';
171
184
  return {
172
185
  content: fullContent,
173
186
  toolCalls: resultToolCalls.length > 0 ? resultToolCalls : undefined,
174
- finishReason: resultToolCalls.length > 0 ? 'tool_calls' : finishReason,
187
+ finishReason: normalizedFinishReason,
175
188
  };
176
189
  }
177
190
  convertMessages(messages) {