@livekit/agents 1.0.33 → 1.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +4 -4
- package/dist/inference/api_protos.d.ts +4 -4
- package/dist/inference/llm.cjs +30 -5
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +3 -1
- package/dist/inference/llm.d.ts +3 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +30 -5
- package/dist/inference/llm.js.map +1 -1
- package/dist/ipc/inference_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +1 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/llm/chat_context.cjs +20 -2
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +9 -0
- package/dist/llm/chat_context.d.ts +9 -0
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +20 -2
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/fallback_adapter.cjs +278 -0
- package/dist/llm/fallback_adapter.cjs.map +1 -0
- package/dist/llm/fallback_adapter.d.cts +73 -0
- package/dist/llm/fallback_adapter.d.ts +73 -0
- package/dist/llm/fallback_adapter.d.ts.map +1 -0
- package/dist/llm/fallback_adapter.js +254 -0
- package/dist/llm/fallback_adapter.js.map +1 -0
- package/dist/llm/fallback_adapter.test.cjs +176 -0
- package/dist/llm/fallback_adapter.test.cjs.map +1 -0
- package/dist/llm/fallback_adapter.test.js +175 -0
- package/dist/llm/fallback_adapter.test.js.map +1 -0
- package/dist/llm/index.cjs +3 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -0
- package/dist/llm/index.d.ts +1 -0
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +4 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +1 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -0
- package/dist/llm/llm.d.ts +1 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +1 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/openai.cjs +43 -20
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.d.ts.map +1 -1
- package/dist/llm/provider_format/openai.js +43 -20
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +35 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +35 -0
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/llm/provider_format/utils.cjs +1 -1
- package/dist/llm/provider_format/utils.cjs.map +1 -1
- package/dist/llm/provider_format/utils.d.ts.map +1 -1
- package/dist/llm/provider_format/utils.js +1 -1
- package/dist/llm/provider_format/utils.js.map +1 -1
- package/dist/stt/stt.cjs +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.js +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/tts.cjs +2 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.js +2 -2
- package/dist/tts/tts.js.map +1 -1
- package/dist/voice/background_audio.cjs.map +1 -1
- package/dist/voice/generation.cjs +2 -1
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +2 -1
- package/dist/voice/generation.js.map +1 -1
- package/package.json +1 -1
- package/src/inference/llm.ts +42 -5
- package/src/ipc/job_proc_lazy_main.ts +1 -1
- package/src/llm/chat_context.ts +32 -2
- package/src/llm/fallback_adapter.test.ts +238 -0
- package/src/llm/fallback_adapter.ts +391 -0
- package/src/llm/index.ts +6 -0
- package/src/llm/llm.ts +2 -1
- package/src/llm/provider_format/openai.test.ts +40 -0
- package/src/llm/provider_format/openai.ts +46 -19
- package/src/llm/provider_format/utils.ts +5 -1
- package/src/stt/stt.ts +1 -1
- package/src/tts/tts.ts +2 -2
- package/src/voice/generation.ts +1 -0
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { APIConnectionError, APIError } from '../_exceptions.js';
|
|
5
|
+
import { log } from '../log.js';
|
|
6
|
+
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
7
|
+
import type { ChatContext } from './chat_context.js';
|
|
8
|
+
import type { ChatChunk } from './llm.js';
|
|
9
|
+
import { LLM, LLMStream } from './llm.js';
|
|
10
|
+
import type { ToolChoice, ToolContext } from './tool_context.js';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Default connection options for FallbackAdapter.
|
|
14
|
+
* Uses max_retry=0 since fallback handles retries at a higher level.
|
|
15
|
+
*/
|
|
16
|
+
const DEFAULT_FALLBACK_API_CONNECT_OPTIONS: APIConnectOptions = {
|
|
17
|
+
maxRetry: 0,
|
|
18
|
+
timeoutMs: DEFAULT_API_CONNECT_OPTIONS.timeoutMs,
|
|
19
|
+
retryIntervalMs: DEFAULT_API_CONNECT_OPTIONS.retryIntervalMs,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Internal status tracking for each LLM instance.
|
|
24
|
+
*/
|
|
25
|
+
interface LLMStatus {
|
|
26
|
+
available: boolean;
|
|
27
|
+
recoveringTask: Promise<void> | null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Event emitted when an LLM's availability changes.
|
|
32
|
+
*/
|
|
33
|
+
export interface AvailabilityChangedEvent {
|
|
34
|
+
llm: LLM;
|
|
35
|
+
available: boolean;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Options for creating a FallbackAdapter.
|
|
40
|
+
*/
|
|
41
|
+
export interface FallbackAdapterOptions {
|
|
42
|
+
/** List of LLM instances to fallback to (in order). */
|
|
43
|
+
llms: LLM[];
|
|
44
|
+
/** Timeout for each LLM attempt in seconds. Defaults to 5.0. */
|
|
45
|
+
attemptTimeout?: number;
|
|
46
|
+
/** Internal retries per LLM before moving to next. Defaults to 0. */
|
|
47
|
+
maxRetryPerLLM?: number;
|
|
48
|
+
/** Interval between retries in seconds. Defaults to 0.5. */
|
|
49
|
+
retryInterval?: number;
|
|
50
|
+
/** Whether to retry when LLM fails after chunks are sent. Defaults to false. */
|
|
51
|
+
retryOnChunkSent?: boolean;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* FallbackAdapter is an LLM that can fallback to a different LLM if the current LLM fails.
|
|
56
|
+
*
|
|
57
|
+
* @example
|
|
58
|
+
* ```typescript
|
|
59
|
+
* const fallbackLLM = new FallbackAdapter({
|
|
60
|
+
* llms: [primaryLLM, secondaryLLM, tertiaryLLM],
|
|
61
|
+
* attemptTimeout: 5.0,
|
|
62
|
+
* maxRetryPerLLM: 1,
|
|
63
|
+
* });
|
|
64
|
+
* ```
|
|
65
|
+
*/
|
|
66
|
+
export class FallbackAdapter extends LLM {
|
|
67
|
+
readonly llms: LLM[];
|
|
68
|
+
readonly attemptTimeout: number;
|
|
69
|
+
readonly maxRetryPerLLM: number;
|
|
70
|
+
readonly retryInterval: number;
|
|
71
|
+
readonly retryOnChunkSent: boolean;
|
|
72
|
+
|
|
73
|
+
/** @internal */
|
|
74
|
+
_status: LLMStatus[];
|
|
75
|
+
|
|
76
|
+
private logger = log();
|
|
77
|
+
|
|
78
|
+
constructor(options: FallbackAdapterOptions) {
|
|
79
|
+
super();
|
|
80
|
+
|
|
81
|
+
if (!options.llms || options.llms.length < 1) {
|
|
82
|
+
throw new Error('at least one LLM instance must be provided.');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
this.llms = options.llms;
|
|
86
|
+
this.attemptTimeout = options.attemptTimeout ?? 5.0;
|
|
87
|
+
this.maxRetryPerLLM = options.maxRetryPerLLM ?? 0;
|
|
88
|
+
this.retryInterval = options.retryInterval ?? 0.5;
|
|
89
|
+
this.retryOnChunkSent = options.retryOnChunkSent ?? false;
|
|
90
|
+
|
|
91
|
+
// Initialize status for each LLM
|
|
92
|
+
this._status = this.llms.map(() => ({
|
|
93
|
+
available: true,
|
|
94
|
+
recoveringTask: null,
|
|
95
|
+
}));
|
|
96
|
+
|
|
97
|
+
// Forward metrics_collected events from child LLMs
|
|
98
|
+
for (const llm of this.llms) {
|
|
99
|
+
llm.on('metrics_collected', (metrics) => {
|
|
100
|
+
this.emit('metrics_collected', metrics);
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
get model(): string {
|
|
106
|
+
return 'FallbackAdapter';
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
label(): string {
|
|
110
|
+
return 'FallbackAdapter';
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
chat(opts: {
|
|
114
|
+
chatCtx: ChatContext;
|
|
115
|
+
toolCtx?: ToolContext;
|
|
116
|
+
connOptions?: APIConnectOptions;
|
|
117
|
+
parallelToolCalls?: boolean;
|
|
118
|
+
toolChoice?: ToolChoice;
|
|
119
|
+
extraKwargs?: Record<string, unknown>;
|
|
120
|
+
}): LLMStream {
|
|
121
|
+
return new FallbackLLMStream(this, {
|
|
122
|
+
chatCtx: opts.chatCtx,
|
|
123
|
+
toolCtx: opts.toolCtx,
|
|
124
|
+
connOptions: opts.connOptions || DEFAULT_FALLBACK_API_CONNECT_OPTIONS,
|
|
125
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
126
|
+
toolChoice: opts.toolChoice,
|
|
127
|
+
extraKwargs: opts.extraKwargs,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Emit availability changed event.
|
|
133
|
+
* @internal
|
|
134
|
+
*/
|
|
135
|
+
_emitAvailabilityChanged(llm: LLM, available: boolean): void {
|
|
136
|
+
const event: AvailabilityChangedEvent = { llm, available };
|
|
137
|
+
// Use type assertion for custom event
|
|
138
|
+
(this as unknown as { emit: (event: string, data: AvailabilityChangedEvent) => void }).emit(
|
|
139
|
+
'llm_availability_changed',
|
|
140
|
+
event,
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* LLMStream implementation for FallbackAdapter.
|
|
147
|
+
* Handles fallback logic between multiple LLM providers.
|
|
148
|
+
*/
|
|
149
|
+
class FallbackLLMStream extends LLMStream {
|
|
150
|
+
private adapter: FallbackAdapter;
|
|
151
|
+
private parallelToolCalls?: boolean;
|
|
152
|
+
private toolChoice?: ToolChoice;
|
|
153
|
+
private extraKwargs?: Record<string, unknown>;
|
|
154
|
+
private _currentStream?: LLMStream;
|
|
155
|
+
private _log = log();
|
|
156
|
+
|
|
157
|
+
constructor(
|
|
158
|
+
adapter: FallbackAdapter,
|
|
159
|
+
opts: {
|
|
160
|
+
chatCtx: ChatContext;
|
|
161
|
+
toolCtx?: ToolContext;
|
|
162
|
+
connOptions: APIConnectOptions;
|
|
163
|
+
parallelToolCalls?: boolean;
|
|
164
|
+
toolChoice?: ToolChoice;
|
|
165
|
+
extraKwargs?: Record<string, unknown>;
|
|
166
|
+
},
|
|
167
|
+
) {
|
|
168
|
+
super(adapter, {
|
|
169
|
+
chatCtx: opts.chatCtx,
|
|
170
|
+
toolCtx: opts.toolCtx,
|
|
171
|
+
connOptions: opts.connOptions,
|
|
172
|
+
});
|
|
173
|
+
this.adapter = adapter;
|
|
174
|
+
this.parallelToolCalls = opts.parallelToolCalls;
|
|
175
|
+
this.toolChoice = opts.toolChoice;
|
|
176
|
+
this.extraKwargs = opts.extraKwargs;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Override chatCtx to return current stream's context if available.
|
|
181
|
+
*/
|
|
182
|
+
override get chatCtx(): ChatContext {
|
|
183
|
+
return this._currentStream?.chatCtx ?? super.chatCtx;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Try to generate with a single LLM.
|
|
188
|
+
* Returns an async generator that yields chunks.
|
|
189
|
+
*/
|
|
190
|
+
private async *tryGenerate(
|
|
191
|
+
llm: LLM,
|
|
192
|
+
checkRecovery: boolean = false,
|
|
193
|
+
): AsyncGenerator<ChatChunk, void, unknown> {
|
|
194
|
+
const connOptions: APIConnectOptions = {
|
|
195
|
+
...this.connOptions,
|
|
196
|
+
maxRetry: this.adapter.maxRetryPerLLM,
|
|
197
|
+
timeoutMs: this.adapter.attemptTimeout * 1000,
|
|
198
|
+
retryIntervalMs: this.adapter.retryInterval * 1000,
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
const stream = llm.chat({
|
|
202
|
+
chatCtx: super.chatCtx,
|
|
203
|
+
toolCtx: this.toolCtx,
|
|
204
|
+
connOptions,
|
|
205
|
+
parallelToolCalls: this.parallelToolCalls,
|
|
206
|
+
toolChoice: this.toolChoice,
|
|
207
|
+
extraKwargs: this.extraKwargs,
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
// Listen for error events - child LLMs emit errors via their LLM instance, not the stream
|
|
211
|
+
let streamError: Error | undefined;
|
|
212
|
+
const errorHandler = (ev: { error: Error }) => {
|
|
213
|
+
streamError = ev.error;
|
|
214
|
+
};
|
|
215
|
+
llm.on('error', errorHandler);
|
|
216
|
+
|
|
217
|
+
try {
|
|
218
|
+
let shouldSetCurrent = !checkRecovery;
|
|
219
|
+
for await (const chunk of stream) {
|
|
220
|
+
if (shouldSetCurrent) {
|
|
221
|
+
shouldSetCurrent = false;
|
|
222
|
+
this._currentStream = stream;
|
|
223
|
+
}
|
|
224
|
+
yield chunk;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// If an error was emitted but not thrown through iteration, throw it now
|
|
228
|
+
if (streamError) {
|
|
229
|
+
throw streamError;
|
|
230
|
+
}
|
|
231
|
+
} catch (error) {
|
|
232
|
+
if (error instanceof APIError) {
|
|
233
|
+
if (checkRecovery) {
|
|
234
|
+
this._log.warn({ llm: llm.label(), error }, 'recovery failed');
|
|
235
|
+
} else {
|
|
236
|
+
this._log.warn({ llm: llm.label(), error }, 'failed, switching to next LLM');
|
|
237
|
+
}
|
|
238
|
+
throw error;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Handle timeout errors
|
|
242
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
243
|
+
if (checkRecovery) {
|
|
244
|
+
this._log.warn({ llm: llm.label() }, 'recovery timed out');
|
|
245
|
+
} else {
|
|
246
|
+
this._log.warn({ llm: llm.label() }, 'timed out, switching to next LLM');
|
|
247
|
+
}
|
|
248
|
+
throw error;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Unexpected error
|
|
252
|
+
if (checkRecovery) {
|
|
253
|
+
this._log.error({ llm: llm.label(), error }, 'recovery unexpected error');
|
|
254
|
+
} else {
|
|
255
|
+
this._log.error({ llm: llm.label(), error }, 'unexpected error, switching to next LLM');
|
|
256
|
+
}
|
|
257
|
+
throw error;
|
|
258
|
+
} finally {
|
|
259
|
+
llm.off('error', errorHandler);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Start background recovery task for an LLM.
|
|
265
|
+
*/
|
|
266
|
+
private tryRecovery(llm: LLM, index: number): void {
|
|
267
|
+
const status = this.adapter._status[index]!;
|
|
268
|
+
|
|
269
|
+
// Skip if already recovering
|
|
270
|
+
if (status.recoveringTask !== null) {
|
|
271
|
+
return;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const recoverTask = async (): Promise<void> => {
|
|
275
|
+
try {
|
|
276
|
+
// Try to generate (just iterate to check if it works)
|
|
277
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
278
|
+
for await (const _chunk of this.tryGenerate(llm, true)) {
|
|
279
|
+
// Just consume the stream to verify it works
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Recovery successful
|
|
283
|
+
status.available = true;
|
|
284
|
+
this._log.info({ llm: llm.label() }, 'LLM recovered');
|
|
285
|
+
this.adapter._emitAvailabilityChanged(llm, true);
|
|
286
|
+
} catch {
|
|
287
|
+
// Recovery failed, stay unavailable
|
|
288
|
+
} finally {
|
|
289
|
+
status.recoveringTask = null;
|
|
290
|
+
}
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
// Fire and forget
|
|
294
|
+
status.recoveringTask = recoverTask();
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Main run method - iterates through LLMs with fallback logic.
|
|
299
|
+
*/
|
|
300
|
+
protected async run(): Promise<void> {
|
|
301
|
+
const startTime = Date.now();
|
|
302
|
+
|
|
303
|
+
// Check if all LLMs are unavailable
|
|
304
|
+
const allFailed = this.adapter._status.every((s) => !s.available);
|
|
305
|
+
if (allFailed) {
|
|
306
|
+
this._log.error('all LLMs are unavailable, retrying...');
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
for (let i = 0; i < this.adapter.llms.length; i++) {
|
|
310
|
+
const llm = this.adapter.llms[i]!;
|
|
311
|
+
const status = this.adapter._status[i]!;
|
|
312
|
+
|
|
313
|
+
this._log.debug(
|
|
314
|
+
{ llm: llm.label(), index: i, available: status.available, allFailed },
|
|
315
|
+
'checking LLM',
|
|
316
|
+
);
|
|
317
|
+
|
|
318
|
+
if (status.available || allFailed) {
|
|
319
|
+
let textSent = '';
|
|
320
|
+
const toolCallsSent: string[] = [];
|
|
321
|
+
|
|
322
|
+
try {
|
|
323
|
+
this._log.info({ llm: llm.label() }, 'FallbackAdapter: Attempting provider');
|
|
324
|
+
|
|
325
|
+
let chunkCount = 0;
|
|
326
|
+
for await (const chunk of this.tryGenerate(llm, false)) {
|
|
327
|
+
chunkCount++;
|
|
328
|
+
// Track what's been sent
|
|
329
|
+
if (chunk.delta) {
|
|
330
|
+
if (chunk.delta.content) {
|
|
331
|
+
textSent += chunk.delta.content;
|
|
332
|
+
}
|
|
333
|
+
if (chunk.delta.toolCalls) {
|
|
334
|
+
for (const tc of chunk.delta.toolCalls) {
|
|
335
|
+
if (tc.name) {
|
|
336
|
+
toolCallsSent.push(tc.name);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Forward chunk to queue
|
|
343
|
+
this._log.debug({ llm: llm.label(), chunkCount }, 'run: forwarding chunk to queue');
|
|
344
|
+
this.queue.put(chunk);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Success!
|
|
348
|
+
this._log.info(
|
|
349
|
+
{ llm: llm.label(), totalChunks: chunkCount, textLength: textSent.length },
|
|
350
|
+
'FallbackAdapter: Provider succeeded',
|
|
351
|
+
);
|
|
352
|
+
return;
|
|
353
|
+
} catch (error) {
|
|
354
|
+
// Mark as unavailable if it was available before
|
|
355
|
+
if (status.available) {
|
|
356
|
+
status.available = false;
|
|
357
|
+
this.adapter._emitAvailabilityChanged(llm, false);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Check if we sent data before failing
|
|
361
|
+
if (textSent || toolCallsSent.length > 0) {
|
|
362
|
+
const extra = { textSent, toolCallsSent };
|
|
363
|
+
|
|
364
|
+
if (!this.adapter.retryOnChunkSent) {
|
|
365
|
+
this._log.error(
|
|
366
|
+
{ llm: llm.label(), ...extra },
|
|
367
|
+
'failed after sending chunk, skip retrying. Set `retryOnChunkSent` to `true` to enable.',
|
|
368
|
+
);
|
|
369
|
+
throw error;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
this._log.warn(
|
|
373
|
+
{ llm: llm.label(), ...extra },
|
|
374
|
+
'failed after sending chunk, retrying...',
|
|
375
|
+
);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Trigger background recovery for this LLM
|
|
381
|
+
this.tryRecovery(llm, i);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// All LLMs failed
|
|
385
|
+
const duration = (Date.now() - startTime) / 1000;
|
|
386
|
+
const labels = this.adapter.llms.map((l) => l.label()).join(', ');
|
|
387
|
+
throw new APIConnectionError({
|
|
388
|
+
message: `all LLMs failed (${labels}) after ${duration.toFixed(2)}s`,
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
}
|
package/src/llm/index.ts
CHANGED
package/src/llm/llm.ts
CHANGED
|
@@ -17,6 +17,7 @@ export interface ChoiceDelta {
|
|
|
17
17
|
role: ChatRole;
|
|
18
18
|
content?: string;
|
|
19
19
|
toolCalls?: FunctionCall[];
|
|
20
|
+
extra?: Record<string, unknown>;
|
|
20
21
|
}
|
|
21
22
|
|
|
22
23
|
export interface CompletionUsage {
|
|
@@ -135,7 +136,7 @@ export abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {
|
|
|
135
136
|
// is run **after** the constructor has finished. Otherwise we get
|
|
136
137
|
// runtime error when trying to access class variables in the
|
|
137
138
|
// `run` method.
|
|
138
|
-
startSoon(() => this.mainTask().
|
|
139
|
+
startSoon(() => this.mainTask().finally(() => this.queue.close()));
|
|
139
140
|
}
|
|
140
141
|
|
|
141
142
|
private _mainTaskImpl = async (span: Span) => {
|
|
@@ -258,6 +258,46 @@ describe('toChatCtx', () => {
|
|
|
258
258
|
]);
|
|
259
259
|
});
|
|
260
260
|
|
|
261
|
+
it('should include provider-specific extra content on tool calls', async () => {
|
|
262
|
+
const ctx = ChatContext.empty();
|
|
263
|
+
const msg = ctx.addMessage({ role: 'assistant', content: 'Running tool' });
|
|
264
|
+
|
|
265
|
+
const toolCall = FunctionCall.create({
|
|
266
|
+
id: `${msg.id}/tool_1`,
|
|
267
|
+
callId: 'call_789',
|
|
268
|
+
name: 'google_call',
|
|
269
|
+
args: '{}',
|
|
270
|
+
extra: { google: { thoughtSignature: 'sig-123' } },
|
|
271
|
+
});
|
|
272
|
+
const toolOutput = FunctionCallOutput.create({
|
|
273
|
+
callId: 'call_789',
|
|
274
|
+
output: '{"result": "ok"}',
|
|
275
|
+
isError: false,
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
ctx.insert([toolCall, toolOutput]);
|
|
279
|
+
|
|
280
|
+
const result = await toChatCtx(ctx);
|
|
281
|
+
|
|
282
|
+
expect(result[0]).toEqual({
|
|
283
|
+
role: 'assistant',
|
|
284
|
+
content: 'Running tool',
|
|
285
|
+
tool_calls: [
|
|
286
|
+
{
|
|
287
|
+
type: 'function',
|
|
288
|
+
id: 'call_789',
|
|
289
|
+
function: { name: 'google_call', arguments: '{}' },
|
|
290
|
+
extra_content: { google: { thoughtSignature: 'sig-123' } },
|
|
291
|
+
},
|
|
292
|
+
],
|
|
293
|
+
});
|
|
294
|
+
expect(result[1]).toEqual({
|
|
295
|
+
role: 'tool',
|
|
296
|
+
tool_call_id: 'call_789',
|
|
297
|
+
content: '{"result": "ok"}',
|
|
298
|
+
});
|
|
299
|
+
});
|
|
300
|
+
|
|
261
301
|
it('should handle multiple tool calls in one message', async () => {
|
|
262
302
|
const ctx = ChatContext.empty();
|
|
263
303
|
|
|
@@ -17,11 +17,20 @@ export async function toChatCtx(chatCtx: ChatContext, injectDummyUserMessage: bo
|
|
|
17
17
|
? await toChatItem(group.message)
|
|
18
18
|
: { role: 'assistant' };
|
|
19
19
|
|
|
20
|
-
const toolCalls = group.toolCalls.map((toolCall) =>
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
const toolCalls = group.toolCalls.map((toolCall) => {
|
|
21
|
+
const tc: Record<string, any> = {
|
|
22
|
+
type: 'function',
|
|
23
|
+
id: toolCall.callId,
|
|
24
|
+
function: { name: toolCall.name, arguments: toolCall.args },
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
// Include provider-specific extra content (e.g., Google thought signatures)
|
|
28
|
+
const googleExtra = getGoogleExtra(toolCall);
|
|
29
|
+
if (googleExtra) {
|
|
30
|
+
tc.extra_content = { google: googleExtra };
|
|
31
|
+
}
|
|
32
|
+
return tc;
|
|
33
|
+
});
|
|
25
34
|
|
|
26
35
|
if (toolCalls.length > 0) {
|
|
27
36
|
message['tool_calls'] = toolCalls;
|
|
@@ -53,24 +62,33 @@ async function toChatItem(item: ChatItem) {
|
|
|
53
62
|
}
|
|
54
63
|
}
|
|
55
64
|
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
65
|
+
const result: Record<string, any> = { role: item.role };
|
|
66
|
+
if (listContent.length === 0) {
|
|
67
|
+
result.content = textContent;
|
|
68
|
+
} else {
|
|
69
|
+
if (textContent.length > 0) {
|
|
70
|
+
listContent.push({ type: 'text', text: textContent });
|
|
71
|
+
}
|
|
72
|
+
result.content = listContent;
|
|
73
|
+
}
|
|
62
74
|
|
|
63
|
-
return
|
|
75
|
+
return result;
|
|
64
76
|
} else if (item.type === 'function_call') {
|
|
77
|
+
const tc: Record<string, any> = {
|
|
78
|
+
id: item.callId,
|
|
79
|
+
type: 'function',
|
|
80
|
+
function: { name: item.name, arguments: item.args },
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// Include provider-specific extra content (e.g., Google thought signatures)
|
|
84
|
+
const googleExtra = getGoogleExtra(item);
|
|
85
|
+
if (googleExtra) {
|
|
86
|
+
tc.extra_content = { google: googleExtra };
|
|
87
|
+
}
|
|
88
|
+
|
|
65
89
|
return {
|
|
66
90
|
role: 'assistant',
|
|
67
|
-
tool_calls: [
|
|
68
|
-
{
|
|
69
|
-
id: item.callId,
|
|
70
|
-
type: 'function',
|
|
71
|
-
function: { name: item.name, arguments: item.args },
|
|
72
|
-
},
|
|
73
|
-
],
|
|
91
|
+
tool_calls: [tc],
|
|
74
92
|
};
|
|
75
93
|
} else if (item.type === 'function_call_output') {
|
|
76
94
|
return {
|
|
@@ -84,6 +102,15 @@ async function toChatItem(item: ChatItem) {
|
|
|
84
102
|
throw new Error(`Unsupported item type: ${item['type']}`);
|
|
85
103
|
}
|
|
86
104
|
|
|
105
|
+
function getGoogleExtra(
|
|
106
|
+
item: Partial<{ extra?: Record<string, unknown>; thoughtSignature?: string }>,
|
|
107
|
+
): Record<string, unknown> | undefined {
|
|
108
|
+
const googleExtra =
|
|
109
|
+
(item.extra?.google as Record<string, unknown> | undefined) ||
|
|
110
|
+
(item.thoughtSignature ? { thoughtSignature: item.thoughtSignature } : undefined);
|
|
111
|
+
return googleExtra;
|
|
112
|
+
}
|
|
113
|
+
|
|
87
114
|
async function toImageContent(content: ImageContent) {
|
|
88
115
|
const cacheKey = 'serialized_image'; // TODO: use hash of encoding options if available
|
|
89
116
|
let serialized: SerializedImage;
|
|
@@ -133,7 +133,11 @@ export function groupToolCalls(chatCtx: ChatContext) {
|
|
|
133
133
|
|
|
134
134
|
if (isAssistantMessage || isFunctionCall) {
|
|
135
135
|
// only assistant messages and function calls can be grouped
|
|
136
|
-
|
|
136
|
+
// For function calls, use group_id if available (for parallel function calls),
|
|
137
|
+
// otherwise fall back to id-based grouping for backwards compatibility
|
|
138
|
+
const groupId =
|
|
139
|
+
item.type === 'function_call' && item.groupId ? item.groupId : item.id.split('/')[0]!;
|
|
140
|
+
|
|
137
141
|
if (itemGroups[groupId] === undefined) {
|
|
138
142
|
itemGroups[groupId] = ChatItemGroup.create();
|
|
139
143
|
|
package/src/stt/stt.ts
CHANGED
|
@@ -195,7 +195,7 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
|
|
|
195
195
|
// is run **after** the constructor has finished. Otherwise we get
|
|
196
196
|
// runtime error when trying to access class variables in the
|
|
197
197
|
// `run` method.
|
|
198
|
-
startSoon(() => this.mainTask().
|
|
198
|
+
startSoon(() => this.mainTask().finally(() => this.queue.close()));
|
|
199
199
|
}
|
|
200
200
|
|
|
201
201
|
private async mainTask() {
|
package/src/tts/tts.ts
CHANGED
|
@@ -169,7 +169,7 @@ export abstract class SynthesizeStream
|
|
|
169
169
|
// is run **after** the constructor has finished. Otherwise we get
|
|
170
170
|
// runtime error when trying to access class variables in the
|
|
171
171
|
// `run` method.
|
|
172
|
-
startSoon(() => this.mainTask().
|
|
172
|
+
startSoon(() => this.mainTask().finally(() => this.queue.close()));
|
|
173
173
|
}
|
|
174
174
|
|
|
175
175
|
private _mainTaskImpl = async (span: Span) => {
|
|
@@ -448,7 +448,7 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
|
|
|
448
448
|
// is run **after** the constructor has finished. Otherwise we get
|
|
449
449
|
// runtime error when trying to access class variables in the
|
|
450
450
|
// `run` method.
|
|
451
|
-
Promise.resolve().then(() => this.mainTask().
|
|
451
|
+
Promise.resolve().then(() => this.mainTask().finally(() => this.queue.close()));
|
|
452
452
|
}
|
|
453
453
|
|
|
454
454
|
private _mainTaskImpl = async (span: Span) => {
|
package/src/voice/generation.ts
CHANGED