modular-voice-agent-sdk 1.0.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/dist/backends/cloud/audio-llm.d.ts +72 -0
- package/dist/backends/cloud/audio-llm.d.ts.map +1 -0
- package/dist/backends/cloud/audio-llm.js +366 -0
- package/dist/backends/cloud/audio-llm.js.map +1 -0
- package/dist/backends/cloud/index.d.ts +2 -0
- package/dist/backends/cloud/index.d.ts.map +1 -1
- package/dist/backends/cloud/index.js +2 -0
- package/dist/backends/cloud/index.js.map +1 -1
- package/dist/backends/cloud/llm.d.ts.map +1 -1
- package/dist/backends/cloud/llm.js +31 -18
- package/dist/backends/cloud/llm.js.map +1 -1
- package/dist/backends/native/audio-llm.d.ts +126 -0
- package/dist/backends/native/audio-llm.d.ts.map +1 -0
- package/dist/backends/native/audio-llm.js +680 -0
- package/dist/backends/native/audio-llm.js.map +1 -0
- package/dist/backends/native/llm.d.ts.map +1 -1
- package/dist/backends/native/llm.js +5 -7
- package/dist/backends/native/llm.js.map +1 -1
- package/dist/backends/native/stt.d.ts +2 -2
- package/dist/backends/native/stt.d.ts.map +1 -1
- package/dist/backends/native/stt.js +1 -1
- package/dist/backends/native/stt.js.map +1 -1
- package/dist/backends/transformers/llm.d.ts.map +1 -1
- package/dist/backends/transformers/llm.js +13 -10
- package/dist/backends/transformers/llm.js.map +1 -1
- package/dist/backends/transformers/stt.d.ts +2 -2
- package/dist/backends/transformers/stt.d.ts.map +1 -1
- package/dist/backends/transformers/stt.js +12 -7
- package/dist/backends/transformers/stt.js.map +1 -1
- package/dist/backends/transformers/tts.d.ts.map +1 -1
- package/dist/backends/transformers/tts.js +11 -6
- package/dist/backends/transformers/tts.js.map +1 -1
- package/dist/cache-runtime.d.ts +29 -0
- package/dist/cache-runtime.d.ts.map +1 -0
- package/dist/cache-runtime.js +43 -0
- package/dist/cache-runtime.js.map +1 -0
- package/dist/cache.d.ts +4 -0
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +5 -0
- package/dist/cache.js.map +1 -1
- package/dist/cli.js +47 -7
- package/dist/cli.js.map +1 -1
- package/dist/client/voice-client.d.ts +4 -2
- package/dist/client/voice-client.d.ts.map +1 -1
- package/dist/client/voice-client.js +15 -13
- package/dist/client/voice-client.js.map +1 -1
- package/dist/client/web-speech-stt.d.ts +12 -1
- package/dist/client/web-speech-stt.d.ts.map +1 -1
- package/dist/client/web-speech-stt.js +49 -4
- package/dist/client/web-speech-stt.js.map +1 -1
- package/dist/server/handler.d.ts +12 -7
- package/dist/server/handler.d.ts.map +1 -1
- package/dist/server/handler.js +20 -20
- package/dist/server/handler.js.map +1 -1
- package/dist/services/llm-logger.d.ts +7 -18
- package/dist/services/llm-logger.d.ts.map +1 -1
- package/dist/services/llm-logger.js +22 -41
- package/dist/services/llm-logger.js.map +1 -1
- package/dist/types.d.ts +27 -5
- package/dist/types.d.ts.map +1 -1
- package/dist/voice-pipeline.d.ts +48 -10
- package/dist/voice-pipeline.d.ts.map +1 -1
- package/dist/voice-pipeline.js +138 -40
- package/dist/voice-pipeline.js.map +1 -1
- package/package.json +1 -1
- package/scripts/setup.sh +23 -0
- package/USAGE.md +0 -567
package/README.md
CHANGED
|
@@ -48,6 +48,9 @@ Each component can run in the browser, on a server, or in the cloud. Pick any fr
|
|
|
48
48
|
N/A ☁️ Cloud LLM N/A
|
|
49
49
|
(OpenAI, Ollama, vLLM)
|
|
50
50
|
|
|
51
|
+
N/A ☁️ Cloud Audio LLM N/A
|
|
52
|
+
(GPT Audio, multimodal)
|
|
53
|
+
|
|
51
54
|
```
|
|
52
55
|
|
|
53
56
|
Want browser speech recognition + a cloud LLM + browser speech synthesis? Done. Want everything running locally on your server with native binaries? Also done. Same code structure, same events, different backends.
|
|
@@ -59,11 +62,11 @@ Want browser speech recognition + a cloud LLM + browser speech synthesis? Done.
|
|
|
59
62
|
- **Conversation history** — automatic context management
|
|
60
63
|
- **Hybrid configs** — mix browser and server components freely
|
|
61
64
|
|
|
62
|
-
See [`USAGE.md`](./USAGE.md) for full API documentation.
|
|
65
|
+
See [`docs/USAGE.md`](./docs/USAGE.md) for full API documentation.
|
|
63
66
|
|
|
64
67
|
## Examples
|
|
65
68
|
|
|
66
|
-
See [`examples/`](./examples/) for
|
|
69
|
+
See [`examples/`](./examples/) for interactive demos covering all configurations.
|
|
67
70
|
|
|
68
71
|
```bash
|
|
69
72
|
cd examples
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CloudAudioLLM - Multimodal Audio LLM Backend
|
|
3
|
+
*
|
|
4
|
+
* Implements BOTH STTPipeline and LLMPipeline interfaces.
|
|
5
|
+
* Register the same instance as both `stt` and `llm` in VoicePipeline.
|
|
6
|
+
*
|
|
7
|
+
* Uses internal caching to achieve single API call:
|
|
8
|
+
* 1. transcribe(audio) → calls multimodal API, caches response, returns transcript
|
|
9
|
+
* 2. generate(messages) → returns cached response (no second API call)
|
|
10
|
+
*
|
|
11
|
+
* The model is prompted to return both transcription and response in a structured format.
|
|
12
|
+
*
|
|
13
|
+
* Works with: OpenAI gpt-audio-mini, gpt-audio, and other audio-capable OpenAI-compatible endpoints.
|
|
14
|
+
*/
|
|
15
|
+
import type { STTPipeline, LLMPipeline, CloudLLMConfig, ProgressCallback, Message, LLMGenerateOptions, LLMGenerateResult, TurnContext } from '../../types';
|
|
16
|
+
export interface CloudAudioLLMConfig extends CloudLLMConfig {
|
|
17
|
+
/**
|
|
18
|
+
* Format for audio encoding.
|
|
19
|
+
* @default 'wav'
|
|
20
|
+
*/
|
|
21
|
+
audioFormat?: 'wav' | 'mp3' | 'pcm16';
|
|
22
|
+
/**
|
|
23
|
+
* Sample rate for audio input.
|
|
24
|
+
* @default 16000
|
|
25
|
+
*/
|
|
26
|
+
sampleRate?: number;
|
|
27
|
+
}
|
|
28
|
+
export declare class CloudAudioLLM implements STTPipeline, LLMPipeline {
|
|
29
|
+
private config;
|
|
30
|
+
private ready;
|
|
31
|
+
private tracker;
|
|
32
|
+
private lastAudio;
|
|
33
|
+
constructor(config: CloudAudioLLMConfig);
|
|
34
|
+
initialize(_onProgress?: ProgressCallback): Promise<void>;
|
|
35
|
+
isReady(): boolean;
|
|
36
|
+
supportsTools(): boolean;
|
|
37
|
+
/**
|
|
38
|
+
* Transcribe audio by calling the multimodal API.
|
|
39
|
+
* Sets both sttResult and llmResult in TurnContext for the subsequent generate() call.
|
|
40
|
+
*/
|
|
41
|
+
transcribe(audio: Float32Array, turn?: TurnContext): Promise<string>;
|
|
42
|
+
/**
|
|
43
|
+
* Generate response.
|
|
44
|
+
* - Tool followup: Makes fresh API call with tool results
|
|
45
|
+
* - User request: Returns cached result from transcribe()
|
|
46
|
+
*/
|
|
47
|
+
generate(messages: Message[], options?: LLMGenerateOptions): Promise<LLMGenerateResult>;
|
|
48
|
+
/**
|
|
49
|
+
* Make API call to the audio model.
|
|
50
|
+
*
|
|
51
|
+
* @param messages - Conversation history
|
|
52
|
+
* @param options - Generation options (tools, etc.)
|
|
53
|
+
* @param freshAudio - If provided, this is an audio turn (transcribe + respond).
|
|
54
|
+
* If not provided, this is a text turn (use stored lastAudio).
|
|
55
|
+
*/
|
|
56
|
+
private callAPI;
|
|
57
|
+
/**
|
|
58
|
+
* Build messages for the API call.
|
|
59
|
+
*/
|
|
60
|
+
private buildMessages;
|
|
61
|
+
/**
|
|
62
|
+
* Parse API response. Handles both audio turn format {transcript, response}
|
|
63
|
+
* and text turn format {response}.
|
|
64
|
+
*/
|
|
65
|
+
private parseResponse;
|
|
66
|
+
private encodeAudioToBase64;
|
|
67
|
+
private convertMessages;
|
|
68
|
+
private convertTools;
|
|
69
|
+
private extractToolCalls;
|
|
70
|
+
private getHeaders;
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=audio-llm.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-llm.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/audio-llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,OAAO,EACP,kBAAkB,EAClB,iBAAiB,EAKjB,WAAW,EACZ,MAAM,aAAa,CAAC;AAGrB,MAAM,WAAW,mBAAoB,SAAQ,cAAc;IACzD;;;OAGG;IACH,WAAW,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,OAAO,CAAC;IACtC;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,qBAAa,aAAc,YAAW,WAAW,EAAE,WAAW;IAC5D,OAAO,CAAC,MAAM,CAAsB;IACpC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAyB;IAGxC,OAAO,CAAC,SAAS,CAAuB;gBAE5B,MAAM,EAAE,mBAAmB;IASjC,UAAU,CAAC,WAAW,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAQ/D,OAAO,IAAI,OAAO;IAIlB,aAAa,IAAI,OAAO;IAQxB;;;OAGG;IACG,UAAU,CAAC,KAAK,EAAE,YAAY,EAAE,IAAI,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IAqB1E;;;;OAIG;IACG,QAAQ,CACZ,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,CAAC,EAAE,kBAAkB,GAC3B,OAAO,CAAC,iBAAiB,CAAC;IA+C7B;;;;;;;OAOG;YACW,OAAO;IAsErB;;OAEG;IACH,OAAO,CAAC,aAAa;IAoErB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAqCrB,OAAO,CAAC,mBAAmB;IAiD3B,OAAO,CAAC,eAAe;IAiCvB,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,gBAAgB;IA4BxB,OAAO,CAAC,UAAU;CAOnB"}
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CloudAudioLLM - Multimodal Audio LLM Backend
|
|
3
|
+
*
|
|
4
|
+
* Implements BOTH STTPipeline and LLMPipeline interfaces.
|
|
5
|
+
* Register the same instance as both `stt` and `llm` in VoicePipeline.
|
|
6
|
+
*
|
|
7
|
+
* Uses internal caching to achieve single API call:
|
|
8
|
+
* 1. transcribe(audio) → calls multimodal API, caches response, returns transcript
|
|
9
|
+
* 2. generate(messages) → returns cached response (no second API call)
|
|
10
|
+
*
|
|
11
|
+
* The model is prompted to return both transcription and response in a structured format.
|
|
12
|
+
*
|
|
13
|
+
* Works with: OpenAI gpt-audio-mini, gpt-audio, and other audio-capable OpenAI-compatible endpoints.
|
|
14
|
+
*/
|
|
15
|
+
import { LLMLogger, LLMConversationTracker } from '../../services';
|
|
16
|
+
export class CloudAudioLLM {
|
|
17
|
+
config;
|
|
18
|
+
ready = false;
|
|
19
|
+
tracker;
|
|
20
|
+
// Last audio from this session - used for tool follow-ups (audio model requires audio in every request)
|
|
21
|
+
lastAudio = null;
|
|
22
|
+
constructor(config) {
|
|
23
|
+
this.config = {
|
|
24
|
+
audioFormat: 'wav',
|
|
25
|
+
sampleRate: 16000,
|
|
26
|
+
...config,
|
|
27
|
+
};
|
|
28
|
+
this.tracker = new LLMConversationTracker(new LLMLogger());
|
|
29
|
+
}
|
|
30
|
+
async initialize(_onProgress) {
|
|
31
|
+
console.log(`Initializing CloudAudioLLM (${this.config.baseUrl})...`);
|
|
32
|
+
console.log(` Model: ${this.config.model}`);
|
|
33
|
+
console.log(` Audio format: ${this.config.audioFormat}`);
|
|
34
|
+
this.ready = true;
|
|
35
|
+
console.log('CloudAudioLLM ready.');
|
|
36
|
+
}
|
|
37
|
+
isReady() {
|
|
38
|
+
return this.ready;
|
|
39
|
+
}
|
|
40
|
+
supportsTools() {
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
// ============================================================
|
|
44
|
+
// STTPipeline Implementation
|
|
45
|
+
// ============================================================
|
|
46
|
+
/**
|
|
47
|
+
* Transcribe audio by calling the multimodal API.
|
|
48
|
+
* Sets both sttResult and llmResult in TurnContext for the subsequent generate() call.
|
|
49
|
+
*/
|
|
50
|
+
async transcribe(audio, turn) {
|
|
51
|
+
if (!turn) {
|
|
52
|
+
throw new Error('CloudAudioLLM.transcribe() requires TurnContext. Pass turn parameter.');
|
|
53
|
+
}
|
|
54
|
+
if (turn.sttResult) {
|
|
55
|
+
return turn.sttResult.transcript;
|
|
56
|
+
}
|
|
57
|
+
const { transcript, result } = await this.callAPI(turn.history, { tools: turn.tools }, audio);
|
|
58
|
+
turn.sttResult = { transcript };
|
|
59
|
+
turn.llmResult = result;
|
|
60
|
+
return transcript;
|
|
61
|
+
}
|
|
62
|
+
// ============================================================
|
|
63
|
+
// LLMPipeline Implementation
|
|
64
|
+
// ============================================================
|
|
65
|
+
/**
|
|
66
|
+
* Generate response.
|
|
67
|
+
* - Tool followup: Makes fresh API call with tool results
|
|
68
|
+
* - User request: Returns cached result from transcribe()
|
|
69
|
+
*/
|
|
70
|
+
async generate(messages, options) {
|
|
71
|
+
const turn = options?.turn;
|
|
72
|
+
// Primary branch: Is this a tool followup?
|
|
73
|
+
const lastMessage = messages[messages.length - 1];
|
|
74
|
+
const isToolFollowup = lastMessage?.role === 'tool';
|
|
75
|
+
if (isToolFollowup) {
|
|
76
|
+
// Tool followup: fresh API call with tool results
|
|
77
|
+
const { result } = await this.callAPI(messages, options);
|
|
78
|
+
if (options?.onToken && result.content) {
|
|
79
|
+
for (const char of result.content) {
|
|
80
|
+
options.onToken(char);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return result;
|
|
84
|
+
}
|
|
85
|
+
// User request: validate cached result exists
|
|
86
|
+
if (!turn?.llmResult) {
|
|
87
|
+
throw new Error('CloudAudioLLM.generate() called for user request without cached result. ' +
|
|
88
|
+
'Call transcribe() first, or check if this should be a tool followup.');
|
|
89
|
+
}
|
|
90
|
+
// Use cached result from transcribe()
|
|
91
|
+
const result = turn.llmResult;
|
|
92
|
+
this.tracker.logInput(messages);
|
|
93
|
+
if (options?.onToken && result.content) {
|
|
94
|
+
for (const char of result.content) {
|
|
95
|
+
options.onToken(char);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
this.tracker.logOutput(result.content, result.toolCalls);
|
|
99
|
+
return result;
|
|
100
|
+
}
|
|
101
|
+
// ============================================================
|
|
102
|
+
// Internal: Unified API Call
|
|
103
|
+
// ============================================================
|
|
104
|
+
/**
|
|
105
|
+
* Make API call to the audio model.
|
|
106
|
+
*
|
|
107
|
+
* @param messages - Conversation history
|
|
108
|
+
* @param options - Generation options (tools, etc.)
|
|
109
|
+
* @param freshAudio - If provided, this is an audio turn (transcribe + respond).
|
|
110
|
+
* If not provided, this is a text turn (use stored lastAudio).
|
|
111
|
+
*/
|
|
112
|
+
async callAPI(messages, options, freshAudio) {
|
|
113
|
+
const isAudioTurn = !!freshAudio;
|
|
114
|
+
const hasTools = !!(options?.tools && options.tools.length > 0);
|
|
115
|
+
const openaiMessages = this.buildMessages(messages, hasTools, freshAudio);
|
|
116
|
+
const body = {
|
|
117
|
+
model: this.config.model,
|
|
118
|
+
messages: openaiMessages,
|
|
119
|
+
modalities: ['text'],
|
|
120
|
+
...this.config.modelParams,
|
|
121
|
+
};
|
|
122
|
+
if (hasTools) {
|
|
123
|
+
body.tools = this.convertTools(options.tools);
|
|
124
|
+
body.parallel_tool_calls = false;
|
|
125
|
+
}
|
|
126
|
+
if (!isAudioTurn) {
|
|
127
|
+
this.tracker.logInput(messages);
|
|
128
|
+
}
|
|
129
|
+
const response = await fetch(`${this.config.baseUrl}/chat/completions`, {
|
|
130
|
+
method: 'POST',
|
|
131
|
+
headers: {
|
|
132
|
+
...this.getHeaders(),
|
|
133
|
+
'Content-Type': 'application/json',
|
|
134
|
+
},
|
|
135
|
+
body: JSON.stringify(body),
|
|
136
|
+
});
|
|
137
|
+
if (!response.ok) {
|
|
138
|
+
const errorText = await response.text();
|
|
139
|
+
throw new Error(`CloudAudioLLM API error (${response.status}): ${errorText}`);
|
|
140
|
+
}
|
|
141
|
+
const data = await response.json();
|
|
142
|
+
const rawContent = data.choices?.[0]?.message?.content || '';
|
|
143
|
+
const toolCalls = this.extractToolCalls(data);
|
|
144
|
+
const finishReason = data.choices?.[0]?.finish_reason;
|
|
145
|
+
if (!rawContent && toolCalls.length === 0) {
|
|
146
|
+
const reason = finishReason || 'unknown';
|
|
147
|
+
throw new Error(`CloudAudioLLM returned empty response (finish_reason: ${reason}). ` +
|
|
148
|
+
(reason === 'length'
|
|
149
|
+
? 'The model hit the token limit before producing output. Try increasing max_completion_tokens.'
|
|
150
|
+
: 'The model did not produce any content.'));
|
|
151
|
+
}
|
|
152
|
+
const { transcript, responseText } = this.parseResponse(rawContent, isAudioTurn, toolCalls.length > 0);
|
|
153
|
+
if (!isAudioTurn) {
|
|
154
|
+
this.tracker.logOutput(responseText, toolCalls.length > 0 ? toolCalls : undefined);
|
|
155
|
+
}
|
|
156
|
+
const result = {
|
|
157
|
+
content: responseText,
|
|
158
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
159
|
+
finishReason: toolCalls.length > 0 ? 'tool_calls' : 'stop',
|
|
160
|
+
};
|
|
161
|
+
return { transcript, result };
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Build messages for the API call.
|
|
165
|
+
*/
|
|
166
|
+
buildMessages(messages, hasTools, freshAudio) {
|
|
167
|
+
const converted = this.convertMessages(messages);
|
|
168
|
+
// Add tool instruction to system prompt if tools are available
|
|
169
|
+
if (hasTools && converted.length > 0) {
|
|
170
|
+
const first = converted[0];
|
|
171
|
+
if (first.role === 'system') {
|
|
172
|
+
first.content += '\n\nOnly call ONE tool at a time. Wait for the result before deciding if another tool is needed.';
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
if (freshAudio) {
|
|
176
|
+
// Audio turn: encode and store audio, add multimodal user message
|
|
177
|
+
const base64Audio = this.encodeAudioToBase64(freshAudio);
|
|
178
|
+
this.lastAudio = base64Audio;
|
|
179
|
+
converted.push({
|
|
180
|
+
role: 'user',
|
|
181
|
+
content: [
|
|
182
|
+
{
|
|
183
|
+
type: 'input_audio',
|
|
184
|
+
input_audio: {
|
|
185
|
+
data: base64Audio,
|
|
186
|
+
format: this.config.audioFormat,
|
|
187
|
+
},
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
type: 'text',
|
|
191
|
+
text: 'Respond with JSON: {"transcript":"<what user said>","response":"<your response>"}',
|
|
192
|
+
},
|
|
193
|
+
],
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
// Text turn: inject stored audio into last user message, add JSON instruction
|
|
198
|
+
if (this.lastAudio) {
|
|
199
|
+
for (let i = converted.length - 1; i >= 0; i--) {
|
|
200
|
+
const msg = converted[i];
|
|
201
|
+
if (msg.role === 'user' && typeof msg.content === 'string') {
|
|
202
|
+
converted[i] = {
|
|
203
|
+
role: 'user',
|
|
204
|
+
content: [
|
|
205
|
+
{
|
|
206
|
+
type: 'input_audio',
|
|
207
|
+
input_audio: {
|
|
208
|
+
data: this.lastAudio,
|
|
209
|
+
format: this.config.audioFormat,
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
type: 'text',
|
|
214
|
+
text: msg.content,
|
|
215
|
+
},
|
|
216
|
+
],
|
|
217
|
+
};
|
|
218
|
+
break;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
converted.push({
|
|
223
|
+
role: 'user',
|
|
224
|
+
content: 'Respond to the user\'s request with JSON: {"response":"<your response>"}',
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
return converted;
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Parse API response. Handles both audio turn format {transcript, response}
|
|
231
|
+
* and text turn format {response}.
|
|
232
|
+
*/
|
|
233
|
+
parseResponse(content, isAudioTurn, hasToolCalls) {
|
|
234
|
+
if (content) {
|
|
235
|
+
try {
|
|
236
|
+
const parsed = JSON.parse(content);
|
|
237
|
+
if (isAudioTurn && parsed.transcript !== undefined && parsed.response !== undefined) {
|
|
238
|
+
return { transcript: parsed.transcript, responseText: parsed.response };
|
|
239
|
+
}
|
|
240
|
+
if (!isAudioTurn && parsed.response !== undefined) {
|
|
241
|
+
return { transcript: '', responseText: parsed.response };
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
catch {
|
|
245
|
+
// Not valid JSON, continue to fallback
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
// Fallback: model went straight to tool call
|
|
249
|
+
if (hasToolCalls) {
|
|
250
|
+
return {
|
|
251
|
+
transcript: isAudioTurn ? '[audio processed]' : '',
|
|
252
|
+
responseText: content,
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
const expectedFormat = isAudioTurn ? '{"transcript":"...","response":"..."}' : '{"response":"..."}';
|
|
256
|
+
throw new Error(`CloudAudioLLM: Expected JSON ${expectedFormat} but got: ${content.substring(0, 100)}`);
|
|
257
|
+
}
|
|
258
|
+
// ============================================================
|
|
259
|
+
// Internal: Helpers
|
|
260
|
+
// ============================================================
|
|
261
|
+
encodeAudioToBase64(audio) {
|
|
262
|
+
const numChannels = 1;
|
|
263
|
+
const sampleRate = this.config.sampleRate;
|
|
264
|
+
const bitsPerSample = 16;
|
|
265
|
+
const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
|
|
266
|
+
const blockAlign = (numChannels * bitsPerSample) / 8;
|
|
267
|
+
const dataSize = audio.length * 2;
|
|
268
|
+
const headerSize = 44;
|
|
269
|
+
const totalSize = headerSize + dataSize;
|
|
270
|
+
const buffer = new ArrayBuffer(totalSize);
|
|
271
|
+
const view = new DataView(buffer);
|
|
272
|
+
const writeString = (offset, str) => {
|
|
273
|
+
for (let i = 0; i < str.length; i++) {
|
|
274
|
+
view.setUint8(offset + i, str.charCodeAt(i));
|
|
275
|
+
}
|
|
276
|
+
};
|
|
277
|
+
writeString(0, 'RIFF');
|
|
278
|
+
view.setUint32(4, totalSize - 8, true);
|
|
279
|
+
writeString(8, 'WAVE');
|
|
280
|
+
writeString(12, 'fmt ');
|
|
281
|
+
view.setUint32(16, 16, true);
|
|
282
|
+
view.setUint16(20, 1, true);
|
|
283
|
+
view.setUint16(22, numChannels, true);
|
|
284
|
+
view.setUint32(24, sampleRate, true);
|
|
285
|
+
view.setUint32(28, byteRate, true);
|
|
286
|
+
view.setUint16(32, blockAlign, true);
|
|
287
|
+
view.setUint16(34, bitsPerSample, true);
|
|
288
|
+
writeString(36, 'data');
|
|
289
|
+
view.setUint32(40, dataSize, true);
|
|
290
|
+
let offset = 44;
|
|
291
|
+
for (let i = 0; i < audio.length; i++) {
|
|
292
|
+
const sample = Math.max(-1, Math.min(1, audio[i]));
|
|
293
|
+
const int16 = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
|
|
294
|
+
view.setInt16(offset, int16, true);
|
|
295
|
+
offset += 2;
|
|
296
|
+
}
|
|
297
|
+
const bytes = new Uint8Array(buffer);
|
|
298
|
+
let binary = '';
|
|
299
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
300
|
+
binary += String.fromCharCode(bytes[i]);
|
|
301
|
+
}
|
|
302
|
+
return btoa(binary);
|
|
303
|
+
}
|
|
304
|
+
convertMessages(messages) {
|
|
305
|
+
return messages.map((m) => {
|
|
306
|
+
if (m.role === 'tool') {
|
|
307
|
+
const toolMsg = m;
|
|
308
|
+
return {
|
|
309
|
+
role: 'tool',
|
|
310
|
+
content: toolMsg.content,
|
|
311
|
+
tool_call_id: toolMsg.toolCallId,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
if (m.role === 'assistant') {
|
|
315
|
+
const assistantMsg = m;
|
|
316
|
+
if (assistantMsg.toolCalls && assistantMsg.toolCalls.length > 0) {
|
|
317
|
+
return {
|
|
318
|
+
role: 'assistant',
|
|
319
|
+
content: assistantMsg.content || null,
|
|
320
|
+
tool_calls: assistantMsg.toolCalls.map((tc) => ({
|
|
321
|
+
id: tc.id,
|
|
322
|
+
type: 'function',
|
|
323
|
+
function: {
|
|
324
|
+
name: tc.name,
|
|
325
|
+
arguments: JSON.stringify(tc.arguments),
|
|
326
|
+
},
|
|
327
|
+
})),
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
return { role: m.role, content: m.content };
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
convertTools(tools) {
|
|
335
|
+
return tools.map((tool) => ({
|
|
336
|
+
type: 'function',
|
|
337
|
+
function: {
|
|
338
|
+
name: tool.name,
|
|
339
|
+
description: tool.description,
|
|
340
|
+
parameters: tool.parameters,
|
|
341
|
+
},
|
|
342
|
+
}));
|
|
343
|
+
}
|
|
344
|
+
extractToolCalls(data) {
|
|
345
|
+
const toolCalls = [];
|
|
346
|
+
const message = data?.choices?.[0]?.message;
|
|
347
|
+
if (message?.tool_calls) {
|
|
348
|
+
for (const tc of message.tool_calls) {
|
|
349
|
+
toolCalls.push({
|
|
350
|
+
id: tc.id,
|
|
351
|
+
name: tc.function.name,
|
|
352
|
+
arguments: JSON.parse(tc.function.arguments || '{}'),
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
return toolCalls;
|
|
357
|
+
}
|
|
358
|
+
getHeaders() {
|
|
359
|
+
const headers = {};
|
|
360
|
+
if (this.config.apiKey) {
|
|
361
|
+
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
|
|
362
|
+
}
|
|
363
|
+
return headers;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
//# sourceMappingURL=audio-llm.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-llm.js","sourceRoot":"","sources":["../../../src/backends/cloud/audio-llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAgBH,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAuB,MAAM,gBAAgB,CAAC;AAexF,MAAM,OAAO,aAAa;IAChB,MAAM,CAAsB;IAC5B,KAAK,GAAG,KAAK,CAAC;IACd,OAAO,CAAyB;IAExC,wGAAwG;IAChG,SAAS,GAAkB,IAAI,CAAC;IAExC,YAAY,MAA2B;QACrC,IAAI,CAAC,MAAM,GAAG;YACZ,WAAW,EAAE,KAAK;YAClB,UAAU,EAAE,KAAK;YACjB,GAAG,MAAM;SACV,CAAC;QACF,IAAI,CAAC,OAAO,GAAG,IAAI,sBAAsB,CAAC,IAAI,SAAS,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,WAA8B;QAC7C,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,CAAC,MAAM,CAAC,OAAO,MAAM,CAAC,CAAC;QACtE,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;QAC7C,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC;QAC1D,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IACtC,CAAC;IAED,OAAO;QACL,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC;IACd,CAAC;IAED,+DAA+D;IAC/D,6BAA6B;IAC7B,+DAA+D;IAE/D;;;OAGG;IACH,KAAK,CAAC,UAAU,CAAC,KAAmB,EAAE,IAAkB;QACtD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,uEAAuE,CAAC,CAAC;QAC3F,CAAC;QAED,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,OAAO,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC;QACnC,CAAC;QAED,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,EAAE,KAAK,CAAC,CAAC;QAE9F,IAAI,CAAC,SAAS,GAAG,EAAE,UAAU,EAAE,CAAC;QAChC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC;QAExB,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,+DAA+D;IAC/D,6BAA6B;IAC7B,+DAA+D;IAE/D;;;;OAIG;IACH,KAAK,CAAC,QAAQ,CACZ,QAAmB,EACnB,OAA4B;QAE5B,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,CAAC;QAE3B,2CAA2C;QAC3C,MAAM,WAAW,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAClD,MAAM,cAAc,GAAG,WAAW,EAAE,IAAI,KAAK,MAAM,CAAC;QAEpD,IAAI,cAAc,EAAE,CAAC;YACnB,kDAAkD;YAClD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEzD,IAAI,OAAO,EAAE,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBACvC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;oBAClC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,8CAA8C;QAC9C,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;YACrB,MAAM,IAAI,KAAK,CACb,0EAA0E;gBAC1E,sEAAsE,CACvE,CAAC;QACJ,CAAC;QAED,sCAAsC;QACtC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC;QAE9B,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,QAA4B,CAAC,CAAC;QAEpD,IAAI,OAAO,EAAE,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACvC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBAClC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;QACzD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,+DAA+D;IAC/D,6BAA6B;IAC7B,+DAA+D;IAE/D;;;;;;;OAOG;IACK,KAAK,CAAC,OAAO,CACnB,QAAmB,EACnB,OAA4B,EAC5B,UAAyB;QAEzB,MAAM,WAAW,GAAG,CAAC,CAAC,UAAU,CAAC;QACjC,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,KAAK,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEhE,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE1E,MAAM,IAAI,GAA4B;YACpC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YACxB,QAAQ,EAAE,cAAc;YACxB,UAAU,EAAE,CAAC,MAAM,CAAC;YACpB,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW;SAC3B,CAAC;QAEF,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAQ,CAAC,KAAM,CAAC,CAAC;YAChD,IAAI,CAAC,mBAAmB,GAAG,KAAK,CAAC;QACnC,CAAC;QAED,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,QAA4B,CAAC,CAAC;QACtD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,mBAAmB,EAAE;YACtE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,GAAG,IAAI,CAAC,UAAU,EAAE;gBACpB,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CAAC,CAAC;QAChF,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;QAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;QAC9C,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC;QAEtD,IAAI,CAAC,UAAU,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1C,MAAM,MAAM,GAAG,YAAY,IAAI,SAAS,CAAC;YACzC,MAAM,IAAI,KAAK,CACb,yDAAyD,MAAM,KAAK;gBAClE,CAAC,MAAM,KAAK,QAAQ;oBAClB,CAAC,CAAC,8FAA8F;oBAChG,CAAC,CAAC,wCAAwC,CAAC,CAChD,CAAC;QACJ,CAAC;QAED,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,WAAW,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEvG,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,YAAY,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACrF,CAAC;QAED,MAAM,MAAM,GAAsB;YAChC,OAAO,EAAE,YAAY;YACrB,SAAS,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;YACvD,YAAY,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM;SAC3D,CAAC;QAEF,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC;IAChC,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAmB,EAAE,QAAiB,EAAE,UAAyB;QACrF,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;QAEjD,+DAA+D;QAC/D,IAAI,QAAQ,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrC,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAsC,CAAC;YAChE,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC5B,KAAK,CAAC,OAAO,IAAI,kGAAkG,CAAC;YACtH,CAAC;QACH,CAAC;QAED,IAAI,UAAU,EAAE,CAAC;YACf,kEAAkE;YAClE,MAAM,WAAW,GAAG,IAAI,CAAC,mBAAmB,CAAC,UAAU,CAAC,CAAC;YACzD,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC;YAE7B,SAAS,CAAC,IAAI,CAAC;gBACb,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,aAAa;wBACnB,WAAW,EAAE;4BACX,IAAI,EAAE,WAAW;4BACjB,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;yBAChC;qBACF;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,mFAAmF;qBAC1F;iBACF;aACF,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,8EAA8E;YAC9E,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACnB,KAAK,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC/C,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,CAAuC,CAAC;oBAC/D,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;wBAC3D,SAAS,CAAC,CAAC,CAAC,GAAG;4BACb,IAAI,EAAE,MAAM;4BACZ,OAAO,EAAE;gCACP;oCACE,IAAI,EAAE,aAAa;oCACnB,WAAW,EAAE;wCACX,IAAI,EAAE,IAAI,CAAC,SAAS;wCACpB,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;qCAChC;iCACF;gCACD;oCACE,IAAI,EAAE,MAAM;oCACZ,IAAI,EAAE,GAAG,CAAC,OAAO;iCAClB;6BACF;yBACF,CAAC;wBACF,MAAM;oBACR,CAAC;gBACH,CAAC;YACH,CAAC;YAED,SAAS,CAAC,IAAI,CAAC;gBACb,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,0EAA0E;aACpF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;OAGG;IACK,aAAa,CACnB,OAAe,EACf,WAAoB,EACpB,YAAqB;QAErB,IAAI,OAAO,EAAE,CAAC;YACZ,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBAEnC,IAAI,WAAW,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;oBACpF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,YAAY,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAC1E,CAAC;gBAED,IAAI,CAAC,WAAW,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;oBAClD,OAAO,EAAE,UAAU,EAAE,EAAE,EAAE,YAAY,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAC3D,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,uCAAuC;YACzC,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO;gBACL,UAAU,EAAE,WAAW,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,EAAE;gBAClD,YAAY,EAAE,OAAO;aACtB,CAAC;QACJ,CAAC;QAED,MAAM,cAAc,GAAG,WAAW,CAAC,CAAC,CAAC,uCAAuC,CAAC,CAAC,CAAC,oBAAoB,CAAC;QACpG,MAAM,IAAI,KAAK,CAAC,gCAAgC,cAAc,aAAa,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,+DAA+D;IAC/D,oBAAoB;IACpB,+DAA+D;IAEvD,mBAAmB,CAAC,KAAmB;QAC7C,MAAM,WAAW,GAAG,CAAC,CAAC;QACtB,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAW,CAAC;QAC3C,MAAM,aAAa,GAAG,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,CAAC,UAAU,GAAG,WAAW,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QAChE,MAAM,UAAU,GAAG,CAAC,WAAW,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QACrD,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;QAClC,MAAM,UAAU,GAAG,EAAE,CAAC;QACtB,MAAM,SAAS,GAAG,UAAU,GAAG,QAAQ,CAAC;QAExC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,SAAS,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;QAElC,MAAM,WAAW,GAAG,CAAC,MAAc,EAAE,GAAW,EAAE,EAAE;YAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YAC/C,CAAC;QACH,CAAC,CAAC;QAEF,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QACvB,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;QACvC,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QACvB,WAAW,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QACxB,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,CAAC;QAC7B,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC5B,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,WAAW,EAAE,IAAI,CAAC,CAAC;QACtC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;QACnC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,aAAa,EAAE,IAAI,CAAC,CAAC;QACxC,WAAW,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QACxB,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;QAEnC,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,MAAM,KAAK,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC;YAC7D,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;YACnC,MAAM,IAAI,CAAC,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;QACrC,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,MAAM,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1C,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC;IACtB,CAAC;IAEO,eAAe,CAAC,QAAmB;QACzC,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YACxB,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,CAAgB,CAAC;gBACjC,OAAO;oBACL,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,YAAY,EAAE,OAAO,CAAC,UAAU;iBACjC,CAAC;YACJ,CAAC;YAED,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC3B,MAAM,YAAY,GAAG,CAAqB,CAAC;gBAC3C,IAAI,YAAY,CAAC,SAAS,IAAI,YAAY,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAChE,OAAO;wBACL,IAAI,EAAE,WAAW;wBACjB,OAAO,EAAE,YAAY,CAAC,OAAO,IAAI,IAAI;wBACrC,UAAU,EAAE,YAAY,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;4BAC9C,EAAE,EAAE,EAAE,CAAC,EAAE;4BACT,IAAI,EAAE,UAAU;4BAChB,QAAQ,EAAE;gCACR,IAAI,EAAE,EAAE,CAAC,IAAI;gCACb,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,SAAS,CAAC;6BACxC;yBACF,CAAC,CAAC;qBACJ,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;QAC9C,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,YAAY,CAAC,KAAuB;QAC1C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YAC1B,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE;gBACR,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;aAC5B;SACF,CAAC,CAAC,CAAC;IACN,CAAC;IAEO,gBAAgB,CAAC,IAAa;QACpC,MAAM,SAAS,GAAe,EAAE,CAAC;QACjC,MAAM,OAAO,GACX,IAUD,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC;QAEzB,IAAI,OAAO,EAAE,UAAU,EAAE,CAAC;YACxB,KAAK,MAAM,EAAE,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACpC,SAAS,CAAC,IAAI,CAAC;oBACb,EAAE,EAAE,EAAE,CAAC,EAAE;oBACT,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI;oBACtB,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,IAAI,IAAI,CAAC;iBACrD,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,UAAU;QAChB,MAAM,OAAO,GAA2B,EAAE,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACvB,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QAC5D,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,OAAO,CAAC;AACjC,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,OAAO,CAAC;AACjC,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAGlD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,YAAY,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -3,4 +3,6 @@
|
|
|
3
3
|
* Works with: OpenAI, Ollama, vLLM, LMStudio, and any OpenAI-compatible endpoint
|
|
4
4
|
*/
|
|
5
5
|
export { CloudLLM } from './llm';
|
|
6
|
+
// Audio LLM (multimodal - implements both STTPipeline and LLMPipeline)
|
|
7
|
+
export { CloudAudioLLM } from './audio-llm';
|
|
6
8
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/backends/cloud/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,OAAO,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/backends/cloud/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,OAAO,CAAC;AAGjC,uEAAuE;AACvE,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/llm.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,OAAO,EACP,kBAAkB,EAClB,iBAAiB,EAKlB,MAAM,aAAa,CAAC;AAuBrB,qBAAa,QAAS,YAAW,WAAW;IAC1C,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAyB;gBAE5B,MAAM,EAAE,cAAc;
|
|
1
|
+
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/backends/cloud/llm.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,OAAO,EACP,kBAAkB,EAClB,iBAAiB,EAKlB,MAAM,aAAa,CAAC;AAuBrB,qBAAa,QAAS,YAAW,WAAW;IAC1C,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,OAAO,CAAyB;gBAE5B,MAAM,EAAE,cAAc;IAK5B,UAAU,CAAC,WAAW,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA2B/D,aAAa,IAAI,OAAO;IAIlB,QAAQ,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAmL7F,OAAO,CAAC,eAAe;IAuCvB,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,UAAU;IAUlB,OAAO,IAAI,OAAO;CAGnB"}
|
|
@@ -11,11 +11,7 @@ export class CloudLLM {
|
|
|
11
11
|
ready = false;
|
|
12
12
|
tracker;
|
|
13
13
|
constructor(config) {
|
|
14
|
-
this.config =
|
|
15
|
-
maxTokens: 256,
|
|
16
|
-
temperature: 0.7,
|
|
17
|
-
...config,
|
|
18
|
-
};
|
|
14
|
+
this.config = config;
|
|
19
15
|
this.tracker = new LLMConversationTracker(new LLMLogger());
|
|
20
16
|
}
|
|
21
17
|
async initialize(_onProgress) {
|
|
@@ -50,10 +46,8 @@ export class CloudLLM {
|
|
|
50
46
|
if (!this.ready) {
|
|
51
47
|
throw new Error('LLM pipeline not initialized');
|
|
52
48
|
}
|
|
53
|
-
// Use conversation ID if provided, else default
|
|
54
|
-
const conversationId = options?.conversationId ?? 'default';
|
|
55
49
|
// Log the input messages
|
|
56
|
-
this.tracker.logInput(
|
|
50
|
+
this.tracker.logInput(messages);
|
|
57
51
|
const url = `${this.config.baseUrl}/chat/completions`;
|
|
58
52
|
// Convert messages to OpenAI format
|
|
59
53
|
const openaiMessages = this.convertMessages(messages);
|
|
@@ -61,9 +55,8 @@ export class CloudLLM {
|
|
|
61
55
|
const body = {
|
|
62
56
|
model: this.config.model,
|
|
63
57
|
messages: openaiMessages,
|
|
64
|
-
max_tokens: this.config.maxTokens,
|
|
65
|
-
temperature: this.config.temperature,
|
|
66
58
|
stream: true,
|
|
59
|
+
...this.config.modelParams,
|
|
67
60
|
};
|
|
68
61
|
// Add tools if provided
|
|
69
62
|
if (options?.tools && options.tools.length > 0) {
|
|
@@ -90,7 +83,7 @@ export class CloudLLM {
|
|
|
90
83
|
let fullContent = '';
|
|
91
84
|
let buffer = '';
|
|
92
85
|
const toolCalls = new Map();
|
|
93
|
-
let finishReason =
|
|
86
|
+
let finishReason = null;
|
|
94
87
|
while (true) {
|
|
95
88
|
const { done, value } = await reader.read();
|
|
96
89
|
if (done) {
|
|
@@ -109,6 +102,10 @@ export class CloudLLM {
|
|
|
109
102
|
const jsonStr = trimmed.slice(6);
|
|
110
103
|
try {
|
|
111
104
|
const parsed = JSON.parse(jsonStr);
|
|
105
|
+
// Check for API error in stream
|
|
106
|
+
if (parsed.error) {
|
|
107
|
+
throw new Error(`Cloud LLM stream error: ${JSON.stringify(parsed.error)}`);
|
|
108
|
+
}
|
|
112
109
|
const choice = parsed.choices?.[0];
|
|
113
110
|
const delta = choice?.delta;
|
|
114
111
|
// Handle text content
|
|
@@ -135,13 +132,19 @@ export class CloudLLM {
|
|
|
135
132
|
}
|
|
136
133
|
}
|
|
137
134
|
}
|
|
138
|
-
//
|
|
139
|
-
if (choice?.finish_reason
|
|
140
|
-
finishReason =
|
|
135
|
+
// Track finish reason from API
|
|
136
|
+
if (choice?.finish_reason) {
|
|
137
|
+
finishReason = choice.finish_reason;
|
|
141
138
|
}
|
|
142
139
|
}
|
|
143
|
-
catch {
|
|
144
|
-
//
|
|
140
|
+
catch (e) {
|
|
141
|
+
// Re-throw actual errors, only skip JSON parse errors
|
|
142
|
+
if (e instanceof SyntaxError) {
|
|
143
|
+
console.warn('CloudLLM: Skipping malformed JSON line:', jsonStr.substring(0, 100));
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
throw e;
|
|
147
|
+
}
|
|
145
148
|
}
|
|
146
149
|
}
|
|
147
150
|
}
|
|
@@ -166,12 +169,22 @@ export class CloudLLM {
|
|
|
166
169
|
options?.onToolCall?.(toolCall);
|
|
167
170
|
}
|
|
168
171
|
}
|
|
172
|
+
// Check for empty response (no content and no tool calls)
|
|
173
|
+
if (!fullContent && resultToolCalls.length === 0) {
|
|
174
|
+
const reason = finishReason || 'unknown';
|
|
175
|
+
throw new Error(`Cloud LLM returned empty response (finish_reason: ${reason}). ` +
|
|
176
|
+
(reason === 'length'
|
|
177
|
+
? 'The model hit the token limit before producing output. Try increasing max_completion_tokens.'
|
|
178
|
+
: 'The model did not produce any content.'));
|
|
179
|
+
}
|
|
169
180
|
// Log the response
|
|
170
|
-
this.tracker.logOutput(
|
|
181
|
+
this.tracker.logOutput(fullContent, resultToolCalls.length > 0 ? resultToolCalls : undefined);
|
|
182
|
+
// Normalize finish reason for our interface
|
|
183
|
+
const normalizedFinishReason = resultToolCalls.length > 0 ? 'tool_calls' : 'stop';
|
|
171
184
|
return {
|
|
172
185
|
content: fullContent,
|
|
173
186
|
toolCalls: resultToolCalls.length > 0 ? resultToolCalls : undefined,
|
|
174
|
-
finishReason:
|
|
187
|
+
finishReason: normalizedFinishReason,
|
|
175
188
|
};
|
|
176
189
|
}
|
|
177
190
|
convertMessages(messages) {
|