@livekit/agents 1.0.32 → 1.0.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/llm.cjs +0 -2
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +0 -2
- package/dist/inference/llm.js.map +1 -1
- package/dist/llm/fallback_adapter.cjs +278 -0
- package/dist/llm/fallback_adapter.cjs.map +1 -0
- package/dist/llm/fallback_adapter.d.cts +73 -0
- package/dist/llm/fallback_adapter.d.ts +73 -0
- package/dist/llm/fallback_adapter.d.ts.map +1 -0
- package/dist/llm/fallback_adapter.js +254 -0
- package/dist/llm/fallback_adapter.js.map +1 -0
- package/dist/llm/fallback_adapter.test.cjs +176 -0
- package/dist/llm/fallback_adapter.test.cjs.map +1 -0
- package/dist/llm/fallback_adapter.test.js +175 -0
- package/dist/llm/fallback_adapter.test.js.map +1 -0
- package/dist/llm/index.cjs +3 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -0
- package/dist/llm/index.d.ts +1 -0
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +4 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +1 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.js +1 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/log.cjs +3 -3
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.cts +0 -5
- package/dist/log.d.ts +0 -5
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +3 -3
- package/dist/log.js.map +1 -1
- package/dist/stt/stt.cjs +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.js +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/tts.cjs +2 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.js +2 -2
- package/dist/tts/tts.js.map +1 -1
- package/package.json +1 -1
- package/src/inference/llm.ts +0 -2
- package/src/llm/fallback_adapter.test.ts +238 -0
- package/src/llm/fallback_adapter.ts +391 -0
- package/src/llm/index.ts +6 -0
- package/src/llm/llm.ts +1 -1
- package/src/log.ts +3 -9
- package/src/stt/stt.ts +1 -1
- package/src/tts/tts.ts +2 -2
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { beforeAll, describe, expect, it, vi } from 'vitest';
|
|
5
|
+
import { APIConnectionError, APIError } from '../_exceptions.js';
|
|
6
|
+
import { initializeLogger } from '../log.js';
|
|
7
|
+
import type { APIConnectOptions } from '../types.js';
|
|
8
|
+
import { delay } from '../utils.js';
|
|
9
|
+
import type { ChatContext } from './chat_context.js';
|
|
10
|
+
import { FallbackAdapter } from './fallback_adapter.js';
|
|
11
|
+
import { type ChatChunk, LLM, LLMStream } from './llm.js';
|
|
12
|
+
import type { ToolChoice, ToolContext } from './tool_context.js';
|
|
13
|
+
|
|
14
|
+
class MockLLMStream extends LLMStream {
|
|
15
|
+
public myLLM: LLM;
|
|
16
|
+
|
|
17
|
+
constructor(
|
|
18
|
+
llm: LLM,
|
|
19
|
+
opts: {
|
|
20
|
+
chatCtx: ChatContext;
|
|
21
|
+
toolCtx?: ToolContext;
|
|
22
|
+
connOptions: APIConnectOptions;
|
|
23
|
+
},
|
|
24
|
+
private shouldFail: boolean = false,
|
|
25
|
+
private failAfterChunks: number = 0,
|
|
26
|
+
) {
|
|
27
|
+
super(llm, opts);
|
|
28
|
+
this.myLLM = llm;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
protected async run(): Promise<void> {
|
|
32
|
+
if (this.shouldFail && this.failAfterChunks === 0) {
|
|
33
|
+
throw new APIError('Mock LLM failed immediately');
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const chunk: ChatChunk = {
|
|
37
|
+
id: 'test-id',
|
|
38
|
+
delta: { role: 'assistant', content: 'chunk' },
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
for (let i = 0; i < 3; i++) {
|
|
42
|
+
if (this.shouldFail && i === this.failAfterChunks) {
|
|
43
|
+
throw new APIError('Mock LLM failed after chunks');
|
|
44
|
+
}
|
|
45
|
+
this.queue.put(chunk);
|
|
46
|
+
await delay(10);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
class MockLLM extends LLM {
|
|
52
|
+
shouldFail: boolean = false;
|
|
53
|
+
failAfterChunks: number = 0;
|
|
54
|
+
private _label: string;
|
|
55
|
+
|
|
56
|
+
constructor(label: string) {
|
|
57
|
+
super();
|
|
58
|
+
this._label = label;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
label(): string {
|
|
62
|
+
return this._label;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
chat(opts: {
|
|
66
|
+
chatCtx: ChatContext;
|
|
67
|
+
toolCtx?: ToolContext;
|
|
68
|
+
connOptions?: APIConnectOptions;
|
|
69
|
+
parallelToolCalls?: boolean;
|
|
70
|
+
toolChoice?: ToolChoice;
|
|
71
|
+
extraKwargs?: Record<string, unknown>;
|
|
72
|
+
}): LLMStream {
|
|
73
|
+
return new MockLLMStream(
|
|
74
|
+
this,
|
|
75
|
+
{
|
|
76
|
+
chatCtx: opts.chatCtx,
|
|
77
|
+
toolCtx: opts.toolCtx,
|
|
78
|
+
connOptions: opts.connOptions!,
|
|
79
|
+
},
|
|
80
|
+
this.shouldFail,
|
|
81
|
+
this.failAfterChunks,
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
describe('FallbackAdapter', () => {
|
|
87
|
+
beforeAll(() => {
|
|
88
|
+
initializeLogger({ pretty: false });
|
|
89
|
+
// Suppress unhandled rejections from LLMStream background tasks
|
|
90
|
+
process.on('unhandledRejection', () => {});
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it('should initialize correctly', () => {
|
|
94
|
+
const llm1 = new MockLLM('llm1');
|
|
95
|
+
const adapter = new FallbackAdapter({ llms: [llm1] });
|
|
96
|
+
expect(adapter.llms).toHaveLength(1);
|
|
97
|
+
expect(adapter.llms[0]).toBe(llm1);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it('should throw if no LLMs provided', () => {
|
|
101
|
+
expect(() => new FallbackAdapter({ llms: [] })).toThrow();
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('should use primary LLM if successful', async () => {
|
|
105
|
+
const llm1 = new MockLLM('llm1');
|
|
106
|
+
const llm2 = new MockLLM('llm2');
|
|
107
|
+
const adapter = new FallbackAdapter({ llms: [llm1, llm2] });
|
|
108
|
+
|
|
109
|
+
const stream = adapter.chat({
|
|
110
|
+
chatCtx: {} as ChatContext,
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
const chunks: ChatChunk[] = [];
|
|
114
|
+
for await (const chunk of stream) {
|
|
115
|
+
chunks.push(chunk);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
expect(chunks).toHaveLength(3);
|
|
119
|
+
// Should verify it used llm1 (we can check logs or spy, but simple success is good first step)
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('should fallback to second LLM if first fails immediately', async () => {
|
|
123
|
+
const llm1 = new MockLLM('llm1');
|
|
124
|
+
llm1.shouldFail = true;
|
|
125
|
+
const llm2 = new MockLLM('llm2');
|
|
126
|
+
const adapter = new FallbackAdapter({ llms: [llm1, llm2] });
|
|
127
|
+
|
|
128
|
+
const stream = adapter.chat({
|
|
129
|
+
chatCtx: {} as ChatContext,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
const chunks: ChatChunk[] = [];
|
|
133
|
+
for await (const chunk of stream) {
|
|
134
|
+
chunks.push(chunk);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
expect(chunks).toHaveLength(3);
|
|
138
|
+
expect(adapter._status[0]!.available).toBe(false);
|
|
139
|
+
expect(adapter._status[1]!.available).toBe(true);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it('should fail if all LLMs fail', async () => {
|
|
143
|
+
const llm1 = new MockLLM('llm1');
|
|
144
|
+
llm1.shouldFail = true;
|
|
145
|
+
const llm2 = new MockLLM('llm2');
|
|
146
|
+
llm2.shouldFail = true;
|
|
147
|
+
const adapter = new FallbackAdapter({ llms: [llm1, llm2] });
|
|
148
|
+
|
|
149
|
+
const stream = adapter.chat({
|
|
150
|
+
chatCtx: {} as ChatContext,
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
const errorPromise = new Promise<Error>((resolve) => {
|
|
154
|
+
adapter.on('error', (e) => resolve(e.error));
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
for await (const _ of stream) {
|
|
158
|
+
// consume
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const error = await errorPromise;
|
|
162
|
+
expect(error).toBeInstanceOf(APIConnectionError);
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
it('should fail if chunks sent and retryOnChunkSent is false', async () => {
|
|
166
|
+
const llm1 = new MockLLM('llm1');
|
|
167
|
+
llm1.shouldFail = true;
|
|
168
|
+
llm1.failAfterChunks = 1; // Fail after 1 chunk
|
|
169
|
+
const llm2 = new MockLLM('llm2');
|
|
170
|
+
const adapter = new FallbackAdapter({
|
|
171
|
+
llms: [llm1, llm2],
|
|
172
|
+
retryOnChunkSent: false,
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
const stream = adapter.chat({
|
|
176
|
+
chatCtx: {} as ChatContext,
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
const errorPromise = new Promise<Error>((resolve) => {
|
|
180
|
+
adapter.on('error', (e) => resolve(e.error));
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
for await (const _ of stream) {
|
|
184
|
+
// consume
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const error = await errorPromise;
|
|
188
|
+
expect(error).toBeInstanceOf(APIError);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
it('should fallback if chunks sent and retryOnChunkSent is true', async () => {
|
|
192
|
+
const llm1 = new MockLLM('llm1');
|
|
193
|
+
llm1.shouldFail = true;
|
|
194
|
+
llm1.failAfterChunks = 1;
|
|
195
|
+
const llm2 = new MockLLM('llm2');
|
|
196
|
+
const adapter = new FallbackAdapter({
|
|
197
|
+
llms: [llm1, llm2],
|
|
198
|
+
retryOnChunkSent: true,
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
const stream = adapter.chat({
|
|
202
|
+
chatCtx: {} as ChatContext,
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
const chunks: ChatChunk[] = [];
|
|
206
|
+
for await (const chunk of stream) {
|
|
207
|
+
chunks.push(chunk);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// 1 chunk from failed llm1 + 3 chunks from llm2
|
|
211
|
+
expect(chunks).toHaveLength(4);
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
it('should emit availability changed events', async () => {
|
|
215
|
+
const llm1 = new MockLLM('llm1');
|
|
216
|
+
llm1.shouldFail = true;
|
|
217
|
+
const llm2 = new MockLLM('llm2');
|
|
218
|
+
const adapter = new FallbackAdapter({ llms: [llm1, llm2] });
|
|
219
|
+
|
|
220
|
+
const eventSpy = vi.fn();
|
|
221
|
+
(adapter as any).on('llm_availability_changed', eventSpy);
|
|
222
|
+
|
|
223
|
+
const stream = adapter.chat({
|
|
224
|
+
chatCtx: {} as ChatContext,
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
for await (const _ of stream) {
|
|
228
|
+
// consume
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
expect(eventSpy).toHaveBeenCalledWith(
|
|
232
|
+
expect.objectContaining({
|
|
233
|
+
llm: llm1,
|
|
234
|
+
available: false,
|
|
235
|
+
}),
|
|
236
|
+
);
|
|
237
|
+
});
|
|
238
|
+
});
|
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { APIConnectionError, APIError } from '../_exceptions.js';
|
|
5
|
+
import { log } from '../log.js';
|
|
6
|
+
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
7
|
+
import type { ChatContext } from './chat_context.js';
|
|
8
|
+
import type { ChatChunk } from './llm.js';
|
|
9
|
+
import { LLM, LLMStream } from './llm.js';
|
|
10
|
+
import type { ToolChoice, ToolContext } from './tool_context.js';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Default connection options for FallbackAdapter.
|
|
14
|
+
* Uses max_retry=0 since fallback handles retries at a higher level.
|
|
15
|
+
*/
|
|
16
|
+
const DEFAULT_FALLBACK_API_CONNECT_OPTIONS: APIConnectOptions = {
|
|
17
|
+
maxRetry: 0,
|
|
18
|
+
timeoutMs: DEFAULT_API_CONNECT_OPTIONS.timeoutMs,
|
|
19
|
+
retryIntervalMs: DEFAULT_API_CONNECT_OPTIONS.retryIntervalMs,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Internal status tracking for each LLM instance.
|
|
24
|
+
*/
|
|
25
|
+
interface LLMStatus {
|
|
26
|
+
available: boolean;
|
|
27
|
+
recoveringTask: Promise<void> | null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Event emitted when an LLM's availability changes.
|
|
32
|
+
*/
|
|
33
|
+
export interface AvailabilityChangedEvent {
|
|
34
|
+
llm: LLM;
|
|
35
|
+
available: boolean;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Options for creating a FallbackAdapter.
|
|
40
|
+
*/
|
|
41
|
+
export interface FallbackAdapterOptions {
|
|
42
|
+
/** List of LLM instances to fallback to (in order). */
|
|
43
|
+
llms: LLM[];
|
|
44
|
+
/** Timeout for each LLM attempt in seconds. Defaults to 5.0. */
|
|
45
|
+
attemptTimeout?: number;
|
|
46
|
+
/** Internal retries per LLM before moving to next. Defaults to 0. */
|
|
47
|
+
maxRetryPerLLM?: number;
|
|
48
|
+
/** Interval between retries in seconds. Defaults to 0.5. */
|
|
49
|
+
retryInterval?: number;
|
|
50
|
+
/** Whether to retry when LLM fails after chunks are sent. Defaults to false. */
|
|
51
|
+
retryOnChunkSent?: boolean;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* FallbackAdapter is an LLM that can fallback to a different LLM if the current LLM fails.
|
|
56
|
+
*
|
|
57
|
+
* @example
|
|
58
|
+
* ```typescript
|
|
59
|
+
* const fallbackLLM = new FallbackAdapter({
|
|
60
|
+
* llms: [primaryLLM, secondaryLLM, tertiaryLLM],
|
|
61
|
+
* attemptTimeout: 5.0,
|
|
62
|
+
* maxRetryPerLLM: 1,
|
|
63
|
+
* });
|
|
64
|
+
* ```
|
|
65
|
+
*/
|
|
66
|
+
export class FallbackAdapter extends LLM {
|
|
67
|
+
readonly llms: LLM[];
|
|
68
|
+
readonly attemptTimeout: number;
|
|
69
|
+
readonly maxRetryPerLLM: number;
|
|
70
|
+
readonly retryInterval: number;
|
|
71
|
+
readonly retryOnChunkSent: boolean;
|
|
72
|
+
|
|
73
|
+
/** @internal */
|
|
74
|
+
_status: LLMStatus[];
|
|
75
|
+
|
|
76
|
+
private logger = log();
|
|
77
|
+
|
|
78
|
+
constructor(options: FallbackAdapterOptions) {
|
|
79
|
+
super();
|
|
80
|
+
|
|
81
|
+
if (!options.llms || options.llms.length < 1) {
|
|
82
|
+
throw new Error('at least one LLM instance must be provided.');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
this.llms = options.llms;
|
|
86
|
+
this.attemptTimeout = options.attemptTimeout ?? 5.0;
|
|
87
|
+
this.maxRetryPerLLM = options.maxRetryPerLLM ?? 0;
|
|
88
|
+
this.retryInterval = options.retryInterval ?? 0.5;
|
|
89
|
+
this.retryOnChunkSent = options.retryOnChunkSent ?? false;
|
|
90
|
+
|
|
91
|
+
// Initialize status for each LLM
|
|
92
|
+
this._status = this.llms.map(() => ({
|
|
93
|
+
available: true,
|
|
94
|
+
recoveringTask: null,
|
|
95
|
+
}));
|
|
96
|
+
|
|
97
|
+
// Forward metrics_collected events from child LLMs
|
|
98
|
+
for (const llm of this.llms) {
|
|
99
|
+
llm.on('metrics_collected', (metrics) => {
|
|
100
|
+
this.emit('metrics_collected', metrics);
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
get model(): string {
|
|
106
|
+
return 'FallbackAdapter';
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
label(): string {
|
|
110
|
+
return 'FallbackAdapter';
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
chat(opts: {
|
|
114
|
+
chatCtx: ChatContext;
|
|
115
|
+
toolCtx?: ToolContext;
|
|
116
|
+
connOptions?: APIConnectOptions;
|
|
117
|
+
parallelToolCalls?: boolean;
|
|
118
|
+
toolChoice?: ToolChoice;
|
|
119
|
+
extraKwargs?: Record<string, unknown>;
|
|
120
|
+
}): LLMStream {
|
|
121
|
+
return new FallbackLLMStream(this, {
|
|
122
|
+
chatCtx: opts.chatCtx,
|
|
123
|
+
toolCtx: opts.toolCtx,
|
|
124
|
+
connOptions: opts.connOptions || DEFAULT_FALLBACK_API_CONNECT_OPTIONS,
|
|
125
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
126
|
+
toolChoice: opts.toolChoice,
|
|
127
|
+
extraKwargs: opts.extraKwargs,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Emit availability changed event.
|
|
133
|
+
* @internal
|
|
134
|
+
*/
|
|
135
|
+
_emitAvailabilityChanged(llm: LLM, available: boolean): void {
|
|
136
|
+
const event: AvailabilityChangedEvent = { llm, available };
|
|
137
|
+
// Use type assertion for custom event
|
|
138
|
+
(this as unknown as { emit: (event: string, data: AvailabilityChangedEvent) => void }).emit(
|
|
139
|
+
'llm_availability_changed',
|
|
140
|
+
event,
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* LLMStream implementation for FallbackAdapter.
|
|
147
|
+
* Handles fallback logic between multiple LLM providers.
|
|
148
|
+
*/
|
|
149
|
+
class FallbackLLMStream extends LLMStream {
|
|
150
|
+
private adapter: FallbackAdapter;
|
|
151
|
+
private parallelToolCalls?: boolean;
|
|
152
|
+
private toolChoice?: ToolChoice;
|
|
153
|
+
private extraKwargs?: Record<string, unknown>;
|
|
154
|
+
private _currentStream?: LLMStream;
|
|
155
|
+
private _log = log();
|
|
156
|
+
|
|
157
|
+
constructor(
|
|
158
|
+
adapter: FallbackAdapter,
|
|
159
|
+
opts: {
|
|
160
|
+
chatCtx: ChatContext;
|
|
161
|
+
toolCtx?: ToolContext;
|
|
162
|
+
connOptions: APIConnectOptions;
|
|
163
|
+
parallelToolCalls?: boolean;
|
|
164
|
+
toolChoice?: ToolChoice;
|
|
165
|
+
extraKwargs?: Record<string, unknown>;
|
|
166
|
+
},
|
|
167
|
+
) {
|
|
168
|
+
super(adapter, {
|
|
169
|
+
chatCtx: opts.chatCtx,
|
|
170
|
+
toolCtx: opts.toolCtx,
|
|
171
|
+
connOptions: opts.connOptions,
|
|
172
|
+
});
|
|
173
|
+
this.adapter = adapter;
|
|
174
|
+
this.parallelToolCalls = opts.parallelToolCalls;
|
|
175
|
+
this.toolChoice = opts.toolChoice;
|
|
176
|
+
this.extraKwargs = opts.extraKwargs;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Override chatCtx to return current stream's context if available.
|
|
181
|
+
*/
|
|
182
|
+
override get chatCtx(): ChatContext {
|
|
183
|
+
return this._currentStream?.chatCtx ?? super.chatCtx;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Try to generate with a single LLM.
|
|
188
|
+
* Returns an async generator that yields chunks.
|
|
189
|
+
*/
|
|
190
|
+
private async *tryGenerate(
|
|
191
|
+
llm: LLM,
|
|
192
|
+
checkRecovery: boolean = false,
|
|
193
|
+
): AsyncGenerator<ChatChunk, void, unknown> {
|
|
194
|
+
const connOptions: APIConnectOptions = {
|
|
195
|
+
...this.connOptions,
|
|
196
|
+
maxRetry: this.adapter.maxRetryPerLLM,
|
|
197
|
+
timeoutMs: this.adapter.attemptTimeout * 1000,
|
|
198
|
+
retryIntervalMs: this.adapter.retryInterval * 1000,
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
const stream = llm.chat({
|
|
202
|
+
chatCtx: super.chatCtx,
|
|
203
|
+
toolCtx: this.toolCtx,
|
|
204
|
+
connOptions,
|
|
205
|
+
parallelToolCalls: this.parallelToolCalls,
|
|
206
|
+
toolChoice: this.toolChoice,
|
|
207
|
+
extraKwargs: this.extraKwargs,
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
// Listen for error events - child LLMs emit errors via their LLM instance, not the stream
|
|
211
|
+
let streamError: Error | undefined;
|
|
212
|
+
const errorHandler = (ev: { error: Error }) => {
|
|
213
|
+
streamError = ev.error;
|
|
214
|
+
};
|
|
215
|
+
llm.on('error', errorHandler);
|
|
216
|
+
|
|
217
|
+
try {
|
|
218
|
+
let shouldSetCurrent = !checkRecovery;
|
|
219
|
+
for await (const chunk of stream) {
|
|
220
|
+
if (shouldSetCurrent) {
|
|
221
|
+
shouldSetCurrent = false;
|
|
222
|
+
this._currentStream = stream;
|
|
223
|
+
}
|
|
224
|
+
yield chunk;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// If an error was emitted but not thrown through iteration, throw it now
|
|
228
|
+
if (streamError) {
|
|
229
|
+
throw streamError;
|
|
230
|
+
}
|
|
231
|
+
} catch (error) {
|
|
232
|
+
if (error instanceof APIError) {
|
|
233
|
+
if (checkRecovery) {
|
|
234
|
+
this._log.warn({ llm: llm.label(), error }, 'recovery failed');
|
|
235
|
+
} else {
|
|
236
|
+
this._log.warn({ llm: llm.label(), error }, 'failed, switching to next LLM');
|
|
237
|
+
}
|
|
238
|
+
throw error;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Handle timeout errors
|
|
242
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
243
|
+
if (checkRecovery) {
|
|
244
|
+
this._log.warn({ llm: llm.label() }, 'recovery timed out');
|
|
245
|
+
} else {
|
|
246
|
+
this._log.warn({ llm: llm.label() }, 'timed out, switching to next LLM');
|
|
247
|
+
}
|
|
248
|
+
throw error;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Unexpected error
|
|
252
|
+
if (checkRecovery) {
|
|
253
|
+
this._log.error({ llm: llm.label(), error }, 'recovery unexpected error');
|
|
254
|
+
} else {
|
|
255
|
+
this._log.error({ llm: llm.label(), error }, 'unexpected error, switching to next LLM');
|
|
256
|
+
}
|
|
257
|
+
throw error;
|
|
258
|
+
} finally {
|
|
259
|
+
llm.off('error', errorHandler);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Start background recovery task for an LLM.
|
|
265
|
+
*/
|
|
266
|
+
private tryRecovery(llm: LLM, index: number): void {
|
|
267
|
+
const status = this.adapter._status[index]!;
|
|
268
|
+
|
|
269
|
+
// Skip if already recovering
|
|
270
|
+
if (status.recoveringTask !== null) {
|
|
271
|
+
return;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const recoverTask = async (): Promise<void> => {
|
|
275
|
+
try {
|
|
276
|
+
// Try to generate (just iterate to check if it works)
|
|
277
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
278
|
+
for await (const _chunk of this.tryGenerate(llm, true)) {
|
|
279
|
+
// Just consume the stream to verify it works
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Recovery successful
|
|
283
|
+
status.available = true;
|
|
284
|
+
this._log.info({ llm: llm.label() }, 'LLM recovered');
|
|
285
|
+
this.adapter._emitAvailabilityChanged(llm, true);
|
|
286
|
+
} catch {
|
|
287
|
+
// Recovery failed, stay unavailable
|
|
288
|
+
} finally {
|
|
289
|
+
status.recoveringTask = null;
|
|
290
|
+
}
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
// Fire and forget
|
|
294
|
+
status.recoveringTask = recoverTask();
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Main run method - iterates through LLMs with fallback logic.
|
|
299
|
+
*/
|
|
300
|
+
protected async run(): Promise<void> {
|
|
301
|
+
const startTime = Date.now();
|
|
302
|
+
|
|
303
|
+
// Check if all LLMs are unavailable
|
|
304
|
+
const allFailed = this.adapter._status.every((s) => !s.available);
|
|
305
|
+
if (allFailed) {
|
|
306
|
+
this._log.error('all LLMs are unavailable, retrying...');
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
for (let i = 0; i < this.adapter.llms.length; i++) {
|
|
310
|
+
const llm = this.adapter.llms[i]!;
|
|
311
|
+
const status = this.adapter._status[i]!;
|
|
312
|
+
|
|
313
|
+
this._log.debug(
|
|
314
|
+
{ llm: llm.label(), index: i, available: status.available, allFailed },
|
|
315
|
+
'checking LLM',
|
|
316
|
+
);
|
|
317
|
+
|
|
318
|
+
if (status.available || allFailed) {
|
|
319
|
+
let textSent = '';
|
|
320
|
+
const toolCallsSent: string[] = [];
|
|
321
|
+
|
|
322
|
+
try {
|
|
323
|
+
this._log.info({ llm: llm.label() }, 'FallbackAdapter: Attempting provider');
|
|
324
|
+
|
|
325
|
+
let chunkCount = 0;
|
|
326
|
+
for await (const chunk of this.tryGenerate(llm, false)) {
|
|
327
|
+
chunkCount++;
|
|
328
|
+
// Track what's been sent
|
|
329
|
+
if (chunk.delta) {
|
|
330
|
+
if (chunk.delta.content) {
|
|
331
|
+
textSent += chunk.delta.content;
|
|
332
|
+
}
|
|
333
|
+
if (chunk.delta.toolCalls) {
|
|
334
|
+
for (const tc of chunk.delta.toolCalls) {
|
|
335
|
+
if (tc.name) {
|
|
336
|
+
toolCallsSent.push(tc.name);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Forward chunk to queue
|
|
343
|
+
this._log.debug({ llm: llm.label(), chunkCount }, 'run: forwarding chunk to queue');
|
|
344
|
+
this.queue.put(chunk);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Success!
|
|
348
|
+
this._log.info(
|
|
349
|
+
{ llm: llm.label(), totalChunks: chunkCount, textLength: textSent.length },
|
|
350
|
+
'FallbackAdapter: Provider succeeded',
|
|
351
|
+
);
|
|
352
|
+
return;
|
|
353
|
+
} catch (error) {
|
|
354
|
+
// Mark as unavailable if it was available before
|
|
355
|
+
if (status.available) {
|
|
356
|
+
status.available = false;
|
|
357
|
+
this.adapter._emitAvailabilityChanged(llm, false);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Check if we sent data before failing
|
|
361
|
+
if (textSent || toolCallsSent.length > 0) {
|
|
362
|
+
const extra = { textSent, toolCallsSent };
|
|
363
|
+
|
|
364
|
+
if (!this.adapter.retryOnChunkSent) {
|
|
365
|
+
this._log.error(
|
|
366
|
+
{ llm: llm.label(), ...extra },
|
|
367
|
+
'failed after sending chunk, skip retrying. Set `retryOnChunkSent` to `true` to enable.',
|
|
368
|
+
);
|
|
369
|
+
throw error;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
this._log.warn(
|
|
373
|
+
{ llm: llm.label(), ...extra },
|
|
374
|
+
'failed after sending chunk, retrying...',
|
|
375
|
+
);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Trigger background recovery for this LLM
|
|
381
|
+
this.tryRecovery(llm, i);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// All LLMs failed
|
|
385
|
+
const duration = (Date.now() - startTime) / 1000;
|
|
386
|
+
const labels = this.adapter.llms.map((l) => l.label()).join(', ');
|
|
387
|
+
throw new APIConnectionError({
|
|
388
|
+
message: `all LLMs failed (${labels}) after ${duration.toFixed(2)}s`,
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
}
|
package/src/llm/index.ts
CHANGED
package/src/llm/llm.ts
CHANGED
|
@@ -135,7 +135,7 @@ export abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {
|
|
|
135
135
|
// is run **after** the constructor has finished. Otherwise we get
|
|
136
136
|
// runtime error when trying to access class variables in the
|
|
137
137
|
// `run` method.
|
|
138
|
-
startSoon(() => this.mainTask().
|
|
138
|
+
startSoon(() => this.mainTask().finally(() => this.queue.close()));
|
|
139
139
|
}
|
|
140
140
|
|
|
141
141
|
private _mainTaskImpl = async (span: Span) => {
|
package/src/log.ts
CHANGED
|
@@ -62,11 +62,6 @@ class OtelDestination extends Writable {
|
|
|
62
62
|
* Enable OTEL logging by reconfiguring the logger with multistream.
|
|
63
63
|
* Uses a custom destination that receives full JSON logs (with msg, level, time).
|
|
64
64
|
*
|
|
65
|
-
* The base logger level is set to 'debug' so all logs are generated,
|
|
66
|
-
* while each stream filters to its own level:
|
|
67
|
-
* - Terminal: user-specified level (default: 'info')
|
|
68
|
-
* - OTEL/Cloud: always 'debug' to capture all logs for observability
|
|
69
|
-
*
|
|
70
65
|
* @internal
|
|
71
66
|
*/
|
|
72
67
|
export const enableOtelLogging = () => {
|
|
@@ -78,12 +73,11 @@ export const enableOtelLogging = () => {
|
|
|
78
73
|
|
|
79
74
|
const { pretty, level } = loggerOptions;
|
|
80
75
|
|
|
81
|
-
const
|
|
76
|
+
const logLevel = level || 'info';
|
|
82
77
|
const streams: { stream: DestinationStream; level: string }[] = [
|
|
83
|
-
{ stream: pretty ? pinoPretty({ colorize: true }) : process.stdout, level:
|
|
78
|
+
{ stream: pretty ? pinoPretty({ colorize: true }) : process.stdout, level: logLevel },
|
|
84
79
|
{ stream: new OtelDestination(), level: 'debug' },
|
|
85
80
|
];
|
|
86
81
|
|
|
87
|
-
|
|
88
|
-
logger = pino({ level: 'debug' }, multistream(streams));
|
|
82
|
+
logger = pino({ level: logLevel }, multistream(streams));
|
|
89
83
|
};
|
package/src/stt/stt.ts
CHANGED
|
@@ -195,7 +195,7 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
|
|
|
195
195
|
// is run **after** the constructor has finished. Otherwise we get
|
|
196
196
|
// runtime error when trying to access class variables in the
|
|
197
197
|
// `run` method.
|
|
198
|
-
startSoon(() => this.mainTask().
|
|
198
|
+
startSoon(() => this.mainTask().finally(() => this.queue.close()));
|
|
199
199
|
}
|
|
200
200
|
|
|
201
201
|
private async mainTask() {
|
package/src/tts/tts.ts
CHANGED
|
@@ -169,7 +169,7 @@ export abstract class SynthesizeStream
|
|
|
169
169
|
// is run **after** the constructor has finished. Otherwise we get
|
|
170
170
|
// runtime error when trying to access class variables in the
|
|
171
171
|
// `run` method.
|
|
172
|
-
startSoon(() => this.mainTask().
|
|
172
|
+
startSoon(() => this.mainTask().finally(() => this.queue.close()));
|
|
173
173
|
}
|
|
174
174
|
|
|
175
175
|
private _mainTaskImpl = async (span: Span) => {
|
|
@@ -448,7 +448,7 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
|
|
|
448
448
|
// is run **after** the constructor has finished. Otherwise we get
|
|
449
449
|
// runtime error when trying to access class variables in the
|
|
450
450
|
// `run` method.
|
|
451
|
-
Promise.resolve().then(() => this.mainTask().
|
|
451
|
+
Promise.resolve().then(() => this.mainTask().finally(() => this.queue.close()));
|
|
452
452
|
}
|
|
453
453
|
|
|
454
454
|
private _mainTaskImpl = async (span: Span) => {
|