@livekit/agents-plugin-openai 0.3.5 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +22 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/llm.d.ts +195 -0
- package/dist/llm.d.ts.map +1 -0
- package/dist/llm.js +453 -0
- package/dist/llm.js.map +1 -0
- package/dist/models.d.ts +10 -0
- package/dist/models.d.ts.map +1 -0
- package/dist/models.js +5 -0
- package/dist/models.js.map +1 -0
- package/dist/realtime/api_proto.d.ts +1 -1
- package/dist/realtime/api_proto.d.ts.map +1 -1
- package/dist/realtime/realtime_model.d.ts +8 -3
- package/dist/realtime/realtime_model.d.ts.map +1 -1
- package/dist/realtime/realtime_model.js +601 -459
- package/dist/realtime/realtime_model.js.map +1 -1
- package/package.json +5 -3
- package/src/index.ts +2 -0
- package/src/llm.ts +670 -0
- package/src/models.ts +107 -0
- package/src/realtime/api_proto.ts +1 -1
- package/src/realtime/realtime_model.ts +152 -15
- package/tsconfig.tsbuildinfo +1 -1
package/src/models.ts
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
export type ChatModels =
|
|
6
|
+
| 'gpt-4o'
|
|
7
|
+
| 'gpt-4o-2024-05-13'
|
|
8
|
+
| 'gpt-4o-mini'
|
|
9
|
+
| 'gpt-4o-mini-2024-07-18'
|
|
10
|
+
| 'gpt-4-turbo'
|
|
11
|
+
| 'gpt-4-turbo-2024-04-09'
|
|
12
|
+
| 'gpt-4-turbo-preview'
|
|
13
|
+
| 'gpt-4-0125-preview'
|
|
14
|
+
| 'gpt-4-1106-preview'
|
|
15
|
+
| 'gpt-4-vision-preview'
|
|
16
|
+
| 'gpt-4-1106-vision-preview'
|
|
17
|
+
| 'gpt-4'
|
|
18
|
+
| 'gpt-4-0314'
|
|
19
|
+
| 'gpt-4-0613'
|
|
20
|
+
| 'gpt-4-32k'
|
|
21
|
+
| 'gpt-4-32k-0314'
|
|
22
|
+
| 'gpt-4-32k-0613'
|
|
23
|
+
| 'gpt-3.5-turbo'
|
|
24
|
+
| 'gpt-3.5-turbo-16k'
|
|
25
|
+
| 'gpt-3.5-turbo-0301'
|
|
26
|
+
| 'gpt-3.5-turbo-0613'
|
|
27
|
+
| 'gpt-3.5-turbo-1106'
|
|
28
|
+
| 'gpt-3.5-turbo-16k-0613';
|
|
29
|
+
|
|
30
|
+
// adapters for OpenAI-compatible LLMs
|
|
31
|
+
|
|
32
|
+
export type TelnyxChatModels =
|
|
33
|
+
| 'meta-llama/Meta-Llama-3.1-8B-Instruct'
|
|
34
|
+
| 'meta-llama/Meta-Llama-3.1-70B-Instruct';
|
|
35
|
+
|
|
36
|
+
export type CerebrasChatModels = 'llama3.1-8b' | 'llama3.1-70b';
|
|
37
|
+
|
|
38
|
+
export type PerplexityChatModels =
|
|
39
|
+
| 'llama-3.1-sonar-small-128k-online'
|
|
40
|
+
| 'llama-3.1-sonar-small-128k-chat'
|
|
41
|
+
| 'llama-3.1-sonar-large-128k-online'
|
|
42
|
+
| 'llama-3.1-sonar-large-128k-chat'
|
|
43
|
+
| 'llama-3.1-8b-instruct'
|
|
44
|
+
| 'llama-3.1-70b-instruct';
|
|
45
|
+
|
|
46
|
+
export type GroqChatModels =
|
|
47
|
+
| 'llama-3.1-405b-reasoning'
|
|
48
|
+
| 'llama-3.1-70b-versatile'
|
|
49
|
+
| 'llama-3.1-8b-instant'
|
|
50
|
+
| 'llama3-groq-70b-8192-tool-use-preview'
|
|
51
|
+
| 'llama3-groq-8b-8192-tool-use-preview'
|
|
52
|
+
| 'llama-guard-3-8b'
|
|
53
|
+
| 'llama3-70b-8192'
|
|
54
|
+
| 'llama3-8b-8192'
|
|
55
|
+
| 'mixtral-8x7b-32768'
|
|
56
|
+
| 'gemma-7b-it'
|
|
57
|
+
| 'gemma2-9b-it';
|
|
58
|
+
|
|
59
|
+
export type DeepSeekChatModels = 'deepseek-coder' | 'deepseek-chat';
|
|
60
|
+
|
|
61
|
+
export type TogetherChatModels =
|
|
62
|
+
| 'garage-bAInd/Platypus2-70B-instruct'
|
|
63
|
+
| 'google/gemma-2-27b-it'
|
|
64
|
+
| 'google/gemma-2-9b-it'
|
|
65
|
+
| 'google/gemma-2b-it'
|
|
66
|
+
| 'google/gemma-7b-it'
|
|
67
|
+
| 'lmsys/vicuna-13b-v1.5'
|
|
68
|
+
| 'lmsys/vicuna-7b-v1.5'
|
|
69
|
+
| 'meta-llama/Llama-2-13b-chat-hf'
|
|
70
|
+
| 'meta-llama/Llama-2-70b-chat-hf'
|
|
71
|
+
| 'meta-llama/Llama-2-7b-chat-hf'
|
|
72
|
+
| 'meta-llama/Llama-3-70b-chat-hf'
|
|
73
|
+
| 'meta-llama/Llama-3-8b-chat-hf'
|
|
74
|
+
| 'meta-llama/Meta-Llama-3-70B-Instruct-Lite'
|
|
75
|
+
| 'meta-llama/Meta-Llama-3-70B-Instruct-Turbo'
|
|
76
|
+
| 'meta-llama/Meta-Llama-3-8B-Instruct-Lite'
|
|
77
|
+
| 'meta-llama/Meta-Llama-3-8B-Instruct-Turbo'
|
|
78
|
+
| 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo'
|
|
79
|
+
| 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo'
|
|
80
|
+
| 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
|
|
81
|
+
| 'mistralai/Mistral-7B-Instruct-v0.1'
|
|
82
|
+
| 'mistralai/Mistral-7B-Instruct-v0.2'
|
|
83
|
+
| 'mistralai/Mistral-7B-Instruct-v0.3'
|
|
84
|
+
| 'mistralai/Mixtral-8x22B-Instruct-v0.1'
|
|
85
|
+
| 'mistralai/Mixtral-8x7B-Instruct-v0.1'
|
|
86
|
+
| 'openchat/openchat-3.5-1210'
|
|
87
|
+
| 'snorkelai/Snorkel-Mistral-PairRM-DPO'
|
|
88
|
+
| 'teknium/OpenHermes-2-Mistral-7B'
|
|
89
|
+
| 'teknium/OpenHermes-2p5-Mistral-7B'
|
|
90
|
+
| 'togethercomputer/Llama-2-7B-32K-Instruct'
|
|
91
|
+
| 'togethercomputer/RedPajama-INCITE-7B-Chat'
|
|
92
|
+
| 'togethercomputer/RedPajama-INCITE-Chat-3B-v1'
|
|
93
|
+
| 'togethercomputer/StripedHyena-Nous-7B'
|
|
94
|
+
| 'togethercomputer/alpaca-7b'
|
|
95
|
+
| 'upstage/SOLAR-10.7B-Instruct-v1.0'
|
|
96
|
+
| 'zero-one-ai/Yi-34B-Chat';
|
|
97
|
+
|
|
98
|
+
export type OctoChatModels =
|
|
99
|
+
| 'meta-llama-3-70b-instruct'
|
|
100
|
+
| 'meta-llama-3.1-405b-instruct'
|
|
101
|
+
| 'meta-llama-3.1-70b-instruct'
|
|
102
|
+
| 'meta-llama-3.1-8b-instruct'
|
|
103
|
+
| 'mistral-7b-instruct'
|
|
104
|
+
| 'mixtral-8x7b-instruct'
|
|
105
|
+
| 'wizardlm-2-8x22bllamaguard-2-7b';
|
|
106
|
+
|
|
107
|
+
export type XAIChatModels = 'grok-2' | 'grok-2-mini' | 'grok-2-mini-public' | 'grok-2-public';
|
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
AsyncIterableQueue,
|
|
6
|
+
Future,
|
|
7
|
+
Queue,
|
|
8
|
+
llm,
|
|
9
|
+
log,
|
|
10
|
+
mergeFrames,
|
|
11
|
+
multimodal,
|
|
12
|
+
} from '@livekit/agents';
|
|
5
13
|
import { AudioFrame } from '@livekit/rtc-node';
|
|
6
|
-
import { once } from 'events';
|
|
14
|
+
import { once } from 'node:events';
|
|
7
15
|
import { WebSocket } from 'ws';
|
|
8
16
|
import * as api_proto from './api_proto.js';
|
|
9
17
|
|
|
@@ -109,6 +117,7 @@ class InputAudioBuffer {
|
|
|
109
117
|
|
|
110
118
|
class ConversationItem {
|
|
111
119
|
#session: RealtimeSession;
|
|
120
|
+
#logger = log();
|
|
112
121
|
|
|
113
122
|
constructor(session: RealtimeSession) {
|
|
114
123
|
this.#session = session;
|
|
@@ -130,12 +139,126 @@ class ConversationItem {
|
|
|
130
139
|
});
|
|
131
140
|
}
|
|
132
141
|
|
|
133
|
-
create(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
142
|
+
create(message: llm.ChatMessage, previousItemId?: string): void {
|
|
143
|
+
if (!message.content) {
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
let event: api_proto.ConversationItemCreateEvent;
|
|
148
|
+
|
|
149
|
+
if (message.toolCallId) {
|
|
150
|
+
if (typeof message.content !== 'string') {
|
|
151
|
+
throw new TypeError('message.content must be a string');
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
event = {
|
|
155
|
+
type: 'conversation.item.create',
|
|
156
|
+
previous_item_id: previousItemId,
|
|
157
|
+
item: {
|
|
158
|
+
type: 'function_call_output',
|
|
159
|
+
call_id: message.toolCallId,
|
|
160
|
+
output: message.content,
|
|
161
|
+
},
|
|
162
|
+
};
|
|
163
|
+
} else {
|
|
164
|
+
let content = message.content;
|
|
165
|
+
if (!Array.isArray(content)) {
|
|
166
|
+
content = [content];
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (message.role === llm.ChatRole.USER) {
|
|
170
|
+
const contents: (api_proto.InputTextContent | api_proto.InputAudioContent)[] = [];
|
|
171
|
+
for (const c of content) {
|
|
172
|
+
if (typeof c === 'string') {
|
|
173
|
+
contents.push({
|
|
174
|
+
type: 'input_text',
|
|
175
|
+
text: c,
|
|
176
|
+
});
|
|
177
|
+
} else if (
|
|
178
|
+
// typescript type guard for determining ChatAudio vs ChatImage
|
|
179
|
+
((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatAudio => {
|
|
180
|
+
return (c as llm.ChatAudio).frame !== undefined;
|
|
181
|
+
})(c)
|
|
182
|
+
) {
|
|
183
|
+
contents.push({
|
|
184
|
+
type: 'input_audio',
|
|
185
|
+
audio: Buffer.from(mergeFrames(c.frame).data.buffer).toString('base64'),
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
event = {
|
|
191
|
+
type: 'conversation.item.create',
|
|
192
|
+
previous_item_id: previousItemId,
|
|
193
|
+
item: {
|
|
194
|
+
type: 'message',
|
|
195
|
+
role: 'user',
|
|
196
|
+
content: contents,
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
} else if (message.role === llm.ChatRole.ASSISTANT) {
|
|
200
|
+
const contents: api_proto.TextContent[] = [];
|
|
201
|
+
for (const c of content) {
|
|
202
|
+
if (typeof c === 'string') {
|
|
203
|
+
contents.push({
|
|
204
|
+
type: 'text',
|
|
205
|
+
text: c,
|
|
206
|
+
});
|
|
207
|
+
} else if (
|
|
208
|
+
// typescript type guard for determining ChatAudio vs ChatImage
|
|
209
|
+
((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatAudio => {
|
|
210
|
+
return (c as llm.ChatAudio).frame !== undefined;
|
|
211
|
+
})(c)
|
|
212
|
+
) {
|
|
213
|
+
this.#logger.warn('audio content in assistant message is not supported');
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
event = {
|
|
218
|
+
type: 'conversation.item.create',
|
|
219
|
+
previous_item_id: previousItemId,
|
|
220
|
+
item: {
|
|
221
|
+
type: 'message',
|
|
222
|
+
role: 'assistant',
|
|
223
|
+
content: contents,
|
|
224
|
+
},
|
|
225
|
+
};
|
|
226
|
+
} else if (message.role === llm.ChatRole.SYSTEM) {
|
|
227
|
+
const contents: api_proto.InputTextContent[] = [];
|
|
228
|
+
for (const c of content) {
|
|
229
|
+
if (typeof c === 'string') {
|
|
230
|
+
contents.push({
|
|
231
|
+
type: 'input_text',
|
|
232
|
+
text: c,
|
|
233
|
+
});
|
|
234
|
+
} else if (
|
|
235
|
+
// typescript type guard for determining ChatAudio vs ChatImage
|
|
236
|
+
((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatAudio => {
|
|
237
|
+
return (c as llm.ChatAudio).frame !== undefined;
|
|
238
|
+
})(c)
|
|
239
|
+
) {
|
|
240
|
+
this.#logger.warn('audio content in system message is not supported');
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
event = {
|
|
245
|
+
type: 'conversation.item.create',
|
|
246
|
+
previous_item_id: previousItemId,
|
|
247
|
+
item: {
|
|
248
|
+
type: 'message',
|
|
249
|
+
role: 'system',
|
|
250
|
+
content: contents,
|
|
251
|
+
},
|
|
252
|
+
};
|
|
253
|
+
} else {
|
|
254
|
+
this.#logger
|
|
255
|
+
.child({ message })
|
|
256
|
+
.warn('chat message is not supported inside the realtime API');
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
this.#session.queueMsg(event);
|
|
139
262
|
}
|
|
140
263
|
}
|
|
141
264
|
|
|
@@ -303,6 +426,7 @@ export class RealtimeModel extends multimodal.RealtimeModel {
|
|
|
303
426
|
|
|
304
427
|
session({
|
|
305
428
|
fncCtx,
|
|
429
|
+
chatCtx,
|
|
306
430
|
modalities = this.#defaultOpts.modalities,
|
|
307
431
|
instructions = this.#defaultOpts.instructions,
|
|
308
432
|
voice = this.#defaultOpts.voice,
|
|
@@ -314,6 +438,7 @@ export class RealtimeModel extends multimodal.RealtimeModel {
|
|
|
314
438
|
maxResponseOutputTokens = this.#defaultOpts.maxResponseOutputTokens,
|
|
315
439
|
}: {
|
|
316
440
|
fncCtx?: llm.FunctionContext;
|
|
441
|
+
chatCtx?: llm.ChatContext;
|
|
317
442
|
modalities?: ['text', 'audio'] | ['text'];
|
|
318
443
|
instructions?: string;
|
|
319
444
|
voice?: api_proto.Voice;
|
|
@@ -342,7 +467,10 @@ export class RealtimeModel extends multimodal.RealtimeModel {
|
|
|
342
467
|
entraToken: this.#defaultOpts.entraToken,
|
|
343
468
|
};
|
|
344
469
|
|
|
345
|
-
const newSession = new RealtimeSession(opts,
|
|
470
|
+
const newSession = new RealtimeSession(opts, {
|
|
471
|
+
chatCtx: chatCtx || new llm.ChatContext(),
|
|
472
|
+
fncCtx,
|
|
473
|
+
});
|
|
346
474
|
this.#sessions.push(newSession);
|
|
347
475
|
return newSession;
|
|
348
476
|
}
|
|
@@ -353,6 +481,7 @@ export class RealtimeModel extends multimodal.RealtimeModel {
|
|
|
353
481
|
}
|
|
354
482
|
|
|
355
483
|
export class RealtimeSession extends multimodal.RealtimeSession {
|
|
484
|
+
#chatCtx: llm.ChatContext | undefined = undefined;
|
|
356
485
|
#fncCtx: llm.FunctionContext | undefined = undefined;
|
|
357
486
|
#opts: ModelOptions;
|
|
358
487
|
#pendingResponses: { [id: string]: RealtimeResponse } = {};
|
|
@@ -364,10 +493,14 @@ export class RealtimeSession extends multimodal.RealtimeSession {
|
|
|
364
493
|
#closing = true;
|
|
365
494
|
#sendQueue = new Queue<api_proto.ClientEvent>();
|
|
366
495
|
|
|
367
|
-
constructor(
|
|
496
|
+
constructor(
|
|
497
|
+
opts: ModelOptions,
|
|
498
|
+
{ fncCtx, chatCtx }: { fncCtx?: llm.FunctionContext; chatCtx?: llm.ChatContext },
|
|
499
|
+
) {
|
|
368
500
|
super();
|
|
369
501
|
|
|
370
502
|
this.#opts = opts;
|
|
503
|
+
this.#chatCtx = chatCtx;
|
|
371
504
|
this.#fncCtx = fncCtx;
|
|
372
505
|
|
|
373
506
|
this.#task = this.#start();
|
|
@@ -386,6 +519,10 @@ export class RealtimeSession extends multimodal.RealtimeSession {
|
|
|
386
519
|
});
|
|
387
520
|
}
|
|
388
521
|
|
|
522
|
+
get chatCtx(): llm.ChatContext | undefined {
|
|
523
|
+
return this.#chatCtx;
|
|
524
|
+
}
|
|
525
|
+
|
|
389
526
|
get fncCtx(): llm.FunctionContext | undefined {
|
|
390
527
|
return this.#fncCtx;
|
|
391
528
|
}
|
|
@@ -872,11 +1009,11 @@ export class RealtimeSession extends multimodal.RealtimeSession {
|
|
|
872
1009
|
callId: item.call_id,
|
|
873
1010
|
});
|
|
874
1011
|
this.conversation.item.create(
|
|
875
|
-
{
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
},
|
|
1012
|
+
llm.ChatMessage.createToolFromFunctionResult({
|
|
1013
|
+
name: item.name,
|
|
1014
|
+
toolCallId: item.call_id,
|
|
1015
|
+
result: content,
|
|
1016
|
+
}),
|
|
880
1017
|
output.itemId,
|
|
881
1018
|
);
|
|
882
1019
|
this.response.create();
|