@livekit/agents-plugin-openai 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +26 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/llm.d.ts +195 -0
- package/dist/llm.d.ts.map +1 -0
- package/dist/llm.js +453 -0
- package/dist/llm.js.map +1 -0
- package/dist/models.d.ts +10 -0
- package/dist/models.d.ts.map +1 -0
- package/dist/models.js +5 -0
- package/dist/models.js.map +1 -0
- package/dist/realtime/api_proto.d.ts +1 -1
- package/dist/realtime/api_proto.d.ts.map +1 -1
- package/dist/realtime/realtime_model.d.ts +9 -3
- package/dist/realtime/realtime_model.d.ts.map +1 -1
- package/dist/realtime/realtime_model.js +601 -457
- package/dist/realtime/realtime_model.js.map +1 -1
- package/package.json +5 -3
- package/src/index.ts +2 -0
- package/src/llm.ts +670 -0
- package/src/models.ts +107 -0
- package/src/realtime/api_proto.ts +1 -1
- package/src/realtime/realtime_model.ts +155 -15
- package/tsconfig.tsbuildinfo +1 -1
package/src/models.ts
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
export type ChatModels =
|
|
6
|
+
| 'gpt-4o'
|
|
7
|
+
| 'gpt-4o-2024-05-13'
|
|
8
|
+
| 'gpt-4o-mini'
|
|
9
|
+
| 'gpt-4o-mini-2024-07-18'
|
|
10
|
+
| 'gpt-4-turbo'
|
|
11
|
+
| 'gpt-4-turbo-2024-04-09'
|
|
12
|
+
| 'gpt-4-turbo-preview'
|
|
13
|
+
| 'gpt-4-0125-preview'
|
|
14
|
+
| 'gpt-4-1106-preview'
|
|
15
|
+
| 'gpt-4-vision-preview'
|
|
16
|
+
| 'gpt-4-1106-vision-preview'
|
|
17
|
+
| 'gpt-4'
|
|
18
|
+
| 'gpt-4-0314'
|
|
19
|
+
| 'gpt-4-0613'
|
|
20
|
+
| 'gpt-4-32k'
|
|
21
|
+
| 'gpt-4-32k-0314'
|
|
22
|
+
| 'gpt-4-32k-0613'
|
|
23
|
+
| 'gpt-3.5-turbo'
|
|
24
|
+
| 'gpt-3.5-turbo-16k'
|
|
25
|
+
| 'gpt-3.5-turbo-0301'
|
|
26
|
+
| 'gpt-3.5-turbo-0613'
|
|
27
|
+
| 'gpt-3.5-turbo-1106'
|
|
28
|
+
| 'gpt-3.5-turbo-16k-0613';
|
|
29
|
+
|
|
30
|
+
// adapters for OpenAI-compatible LLMs
|
|
31
|
+
|
|
32
|
+
export type TelnyxChatModels =
|
|
33
|
+
| 'meta-llama/Meta-Llama-3.1-8B-Instruct'
|
|
34
|
+
| 'meta-llama/Meta-Llama-3.1-70B-Instruct';
|
|
35
|
+
|
|
36
|
+
export type CerebrasChatModels = 'llama3.1-8b' | 'llama3.1-70b';
|
|
37
|
+
|
|
38
|
+
export type PerplexityChatModels =
|
|
39
|
+
| 'llama-3.1-sonar-small-128k-online'
|
|
40
|
+
| 'llama-3.1-sonar-small-128k-chat'
|
|
41
|
+
| 'llama-3.1-sonar-large-128k-online'
|
|
42
|
+
| 'llama-3.1-sonar-large-128k-chat'
|
|
43
|
+
| 'llama-3.1-8b-instruct'
|
|
44
|
+
| 'llama-3.1-70b-instruct';
|
|
45
|
+
|
|
46
|
+
export type GroqChatModels =
|
|
47
|
+
| 'llama-3.1-405b-reasoning'
|
|
48
|
+
| 'llama-3.1-70b-versatile'
|
|
49
|
+
| 'llama-3.1-8b-instant'
|
|
50
|
+
| 'llama3-groq-70b-8192-tool-use-preview'
|
|
51
|
+
| 'llama3-groq-8b-8192-tool-use-preview'
|
|
52
|
+
| 'llama-guard-3-8b'
|
|
53
|
+
| 'llama3-70b-8192'
|
|
54
|
+
| 'llama3-8b-8192'
|
|
55
|
+
| 'mixtral-8x7b-32768'
|
|
56
|
+
| 'gemma-7b-it'
|
|
57
|
+
| 'gemma2-9b-it';
|
|
58
|
+
|
|
59
|
+
export type DeepSeekChatModels = 'deepseek-coder' | 'deepseek-chat';
|
|
60
|
+
|
|
61
|
+
export type TogetherChatModels =
|
|
62
|
+
| 'garage-bAInd/Platypus2-70B-instruct'
|
|
63
|
+
| 'google/gemma-2-27b-it'
|
|
64
|
+
| 'google/gemma-2-9b-it'
|
|
65
|
+
| 'google/gemma-2b-it'
|
|
66
|
+
| 'google/gemma-7b-it'
|
|
67
|
+
| 'lmsys/vicuna-13b-v1.5'
|
|
68
|
+
| 'lmsys/vicuna-7b-v1.5'
|
|
69
|
+
| 'meta-llama/Llama-2-13b-chat-hf'
|
|
70
|
+
| 'meta-llama/Llama-2-70b-chat-hf'
|
|
71
|
+
| 'meta-llama/Llama-2-7b-chat-hf'
|
|
72
|
+
| 'meta-llama/Llama-3-70b-chat-hf'
|
|
73
|
+
| 'meta-llama/Llama-3-8b-chat-hf'
|
|
74
|
+
| 'meta-llama/Meta-Llama-3-70B-Instruct-Lite'
|
|
75
|
+
| 'meta-llama/Meta-Llama-3-70B-Instruct-Turbo'
|
|
76
|
+
| 'meta-llama/Meta-Llama-3-8B-Instruct-Lite'
|
|
77
|
+
| 'meta-llama/Meta-Llama-3-8B-Instruct-Turbo'
|
|
78
|
+
| 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo'
|
|
79
|
+
| 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo'
|
|
80
|
+
| 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
|
|
81
|
+
| 'mistralai/Mistral-7B-Instruct-v0.1'
|
|
82
|
+
| 'mistralai/Mistral-7B-Instruct-v0.2'
|
|
83
|
+
| 'mistralai/Mistral-7B-Instruct-v0.3'
|
|
84
|
+
| 'mistralai/Mixtral-8x22B-Instruct-v0.1'
|
|
85
|
+
| 'mistralai/Mixtral-8x7B-Instruct-v0.1'
|
|
86
|
+
| 'openchat/openchat-3.5-1210'
|
|
87
|
+
| 'snorkelai/Snorkel-Mistral-PairRM-DPO'
|
|
88
|
+
| 'teknium/OpenHermes-2-Mistral-7B'
|
|
89
|
+
| 'teknium/OpenHermes-2p5-Mistral-7B'
|
|
90
|
+
| 'togethercomputer/Llama-2-7B-32K-Instruct'
|
|
91
|
+
| 'togethercomputer/RedPajama-INCITE-7B-Chat'
|
|
92
|
+
| 'togethercomputer/RedPajama-INCITE-Chat-3B-v1'
|
|
93
|
+
| 'togethercomputer/StripedHyena-Nous-7B'
|
|
94
|
+
| 'togethercomputer/alpaca-7b'
|
|
95
|
+
| 'upstage/SOLAR-10.7B-Instruct-v1.0'
|
|
96
|
+
| 'zero-one-ai/Yi-34B-Chat';
|
|
97
|
+
|
|
98
|
+
export type OctoChatModels =
|
|
99
|
+
| 'meta-llama-3-70b-instruct'
|
|
100
|
+
| 'meta-llama-3.1-405b-instruct'
|
|
101
|
+
| 'meta-llama-3.1-70b-instruct'
|
|
102
|
+
| 'meta-llama-3.1-8b-instruct'
|
|
103
|
+
| 'mistral-7b-instruct'
|
|
104
|
+
| 'mixtral-8x7b-instruct'
|
|
105
|
+
| 'wizardlm-2-8x22bllamaguard-2-7b';
|
|
106
|
+
|
|
107
|
+
export type XAIChatModels = 'grok-2' | 'grok-2-mini' | 'grok-2-mini-public' | 'grok-2-public';
|
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
AsyncIterableQueue,
|
|
6
|
+
Future,
|
|
7
|
+
Queue,
|
|
8
|
+
llm,
|
|
9
|
+
log,
|
|
10
|
+
mergeFrames,
|
|
11
|
+
multimodal,
|
|
12
|
+
} from '@livekit/agents';
|
|
5
13
|
import { AudioFrame } from '@livekit/rtc-node';
|
|
6
|
-
import { once } from 'events';
|
|
14
|
+
import { once } from 'node:events';
|
|
7
15
|
import { WebSocket } from 'ws';
|
|
8
16
|
import * as api_proto from './api_proto.js';
|
|
9
17
|
|
|
@@ -29,6 +37,7 @@ export interface RealtimeResponse {
|
|
|
29
37
|
id: string;
|
|
30
38
|
status: api_proto.ResponseStatus;
|
|
31
39
|
statusDetails: api_proto.ResponseStatusDetails | null;
|
|
40
|
+
usage: api_proto.ResponseResource['usage'] | null;
|
|
32
41
|
output: RealtimeOutput[];
|
|
33
42
|
doneFut: Future;
|
|
34
43
|
}
|
|
@@ -108,6 +117,7 @@ class InputAudioBuffer {
|
|
|
108
117
|
|
|
109
118
|
class ConversationItem {
|
|
110
119
|
#session: RealtimeSession;
|
|
120
|
+
#logger = log();
|
|
111
121
|
|
|
112
122
|
constructor(session: RealtimeSession) {
|
|
113
123
|
this.#session = session;
|
|
@@ -129,12 +139,126 @@ class ConversationItem {
|
|
|
129
139
|
});
|
|
130
140
|
}
|
|
131
141
|
|
|
132
|
-
create(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
142
|
+
create(message: llm.ChatMessage, previousItemId?: string): void {
|
|
143
|
+
if (!message.content) {
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
let event: api_proto.ConversationItemCreateEvent;
|
|
148
|
+
|
|
149
|
+
if (message.toolCallId) {
|
|
150
|
+
if (typeof message.content !== 'string') {
|
|
151
|
+
throw new TypeError('message.content must be a string');
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
event = {
|
|
155
|
+
type: 'conversation.item.create',
|
|
156
|
+
previous_item_id: previousItemId,
|
|
157
|
+
item: {
|
|
158
|
+
type: 'function_call_output',
|
|
159
|
+
call_id: message.toolCallId,
|
|
160
|
+
output: message.content,
|
|
161
|
+
},
|
|
162
|
+
};
|
|
163
|
+
} else {
|
|
164
|
+
let content = message.content;
|
|
165
|
+
if (!Array.isArray(content)) {
|
|
166
|
+
content = [content];
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (message.role === llm.ChatRole.USER) {
|
|
170
|
+
const contents: (api_proto.InputTextContent | api_proto.InputAudioContent)[] = [];
|
|
171
|
+
for (const c of content) {
|
|
172
|
+
if (typeof c === 'string') {
|
|
173
|
+
contents.push({
|
|
174
|
+
type: 'input_text',
|
|
175
|
+
text: c,
|
|
176
|
+
});
|
|
177
|
+
} else if (
|
|
178
|
+
// typescript type guard for determining ChatAudio vs ChatImage
|
|
179
|
+
((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatAudio => {
|
|
180
|
+
return (c as llm.ChatAudio).frame !== undefined;
|
|
181
|
+
})(c)
|
|
182
|
+
) {
|
|
183
|
+
contents.push({
|
|
184
|
+
type: 'input_audio',
|
|
185
|
+
audio: Buffer.from(mergeFrames(c.frame).data.buffer).toString('base64'),
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
event = {
|
|
191
|
+
type: 'conversation.item.create',
|
|
192
|
+
previous_item_id: previousItemId,
|
|
193
|
+
item: {
|
|
194
|
+
type: 'message',
|
|
195
|
+
role: 'user',
|
|
196
|
+
content: contents,
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
} else if (message.role === llm.ChatRole.ASSISTANT) {
|
|
200
|
+
const contents: api_proto.TextContent[] = [];
|
|
201
|
+
for (const c of content) {
|
|
202
|
+
if (typeof c === 'string') {
|
|
203
|
+
contents.push({
|
|
204
|
+
type: 'text',
|
|
205
|
+
text: c,
|
|
206
|
+
});
|
|
207
|
+
} else if (
|
|
208
|
+
// typescript type guard for determining ChatAudio vs ChatImage
|
|
209
|
+
((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatAudio => {
|
|
210
|
+
return (c as llm.ChatAudio).frame !== undefined;
|
|
211
|
+
})(c)
|
|
212
|
+
) {
|
|
213
|
+
this.#logger.warn('audio content in assistant message is not supported');
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
event = {
|
|
218
|
+
type: 'conversation.item.create',
|
|
219
|
+
previous_item_id: previousItemId,
|
|
220
|
+
item: {
|
|
221
|
+
type: 'message',
|
|
222
|
+
role: 'assistant',
|
|
223
|
+
content: contents,
|
|
224
|
+
},
|
|
225
|
+
};
|
|
226
|
+
} else if (message.role === llm.ChatRole.SYSTEM) {
|
|
227
|
+
const contents: api_proto.InputTextContent[] = [];
|
|
228
|
+
for (const c of content) {
|
|
229
|
+
if (typeof c === 'string') {
|
|
230
|
+
contents.push({
|
|
231
|
+
type: 'input_text',
|
|
232
|
+
text: c,
|
|
233
|
+
});
|
|
234
|
+
} else if (
|
|
235
|
+
// typescript type guard for determining ChatAudio vs ChatImage
|
|
236
|
+
((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatAudio => {
|
|
237
|
+
return (c as llm.ChatAudio).frame !== undefined;
|
|
238
|
+
})(c)
|
|
239
|
+
) {
|
|
240
|
+
this.#logger.warn('audio content in system message is not supported');
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
event = {
|
|
245
|
+
type: 'conversation.item.create',
|
|
246
|
+
previous_item_id: previousItemId,
|
|
247
|
+
item: {
|
|
248
|
+
type: 'message',
|
|
249
|
+
role: 'system',
|
|
250
|
+
content: contents,
|
|
251
|
+
},
|
|
252
|
+
};
|
|
253
|
+
} else {
|
|
254
|
+
this.#logger
|
|
255
|
+
.child({ message })
|
|
256
|
+
.warn('chat message is not supported inside the realtime API');
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
this.#session.queueMsg(event);
|
|
138
262
|
}
|
|
139
263
|
}
|
|
140
264
|
|
|
@@ -302,6 +426,7 @@ export class RealtimeModel extends multimodal.RealtimeModel {
|
|
|
302
426
|
|
|
303
427
|
session({
|
|
304
428
|
fncCtx,
|
|
429
|
+
chatCtx,
|
|
305
430
|
modalities = this.#defaultOpts.modalities,
|
|
306
431
|
instructions = this.#defaultOpts.instructions,
|
|
307
432
|
voice = this.#defaultOpts.voice,
|
|
@@ -313,6 +438,7 @@ export class RealtimeModel extends multimodal.RealtimeModel {
|
|
|
313
438
|
maxResponseOutputTokens = this.#defaultOpts.maxResponseOutputTokens,
|
|
314
439
|
}: {
|
|
315
440
|
fncCtx?: llm.FunctionContext;
|
|
441
|
+
chatCtx?: llm.ChatContext;
|
|
316
442
|
modalities?: ['text', 'audio'] | ['text'];
|
|
317
443
|
instructions?: string;
|
|
318
444
|
voice?: api_proto.Voice;
|
|
@@ -341,7 +467,10 @@ export class RealtimeModel extends multimodal.RealtimeModel {
|
|
|
341
467
|
entraToken: this.#defaultOpts.entraToken,
|
|
342
468
|
};
|
|
343
469
|
|
|
344
|
-
const newSession = new RealtimeSession(opts,
|
|
470
|
+
const newSession = new RealtimeSession(opts, {
|
|
471
|
+
chatCtx: chatCtx || new llm.ChatContext(),
|
|
472
|
+
fncCtx,
|
|
473
|
+
});
|
|
345
474
|
this.#sessions.push(newSession);
|
|
346
475
|
return newSession;
|
|
347
476
|
}
|
|
@@ -352,6 +481,7 @@ export class RealtimeModel extends multimodal.RealtimeModel {
|
|
|
352
481
|
}
|
|
353
482
|
|
|
354
483
|
export class RealtimeSession extends multimodal.RealtimeSession {
|
|
484
|
+
#chatCtx: llm.ChatContext | undefined = undefined;
|
|
355
485
|
#fncCtx: llm.FunctionContext | undefined = undefined;
|
|
356
486
|
#opts: ModelOptions;
|
|
357
487
|
#pendingResponses: { [id: string]: RealtimeResponse } = {};
|
|
@@ -363,10 +493,14 @@ export class RealtimeSession extends multimodal.RealtimeSession {
|
|
|
363
493
|
#closing = true;
|
|
364
494
|
#sendQueue = new Queue<api_proto.ClientEvent>();
|
|
365
495
|
|
|
366
|
-
constructor(
|
|
496
|
+
constructor(
|
|
497
|
+
opts: ModelOptions,
|
|
498
|
+
{ fncCtx, chatCtx }: { fncCtx?: llm.FunctionContext; chatCtx?: llm.ChatContext },
|
|
499
|
+
) {
|
|
367
500
|
super();
|
|
368
501
|
|
|
369
502
|
this.#opts = opts;
|
|
503
|
+
this.#chatCtx = chatCtx;
|
|
370
504
|
this.#fncCtx = fncCtx;
|
|
371
505
|
|
|
372
506
|
this.#task = this.#start();
|
|
@@ -385,6 +519,10 @@ export class RealtimeSession extends multimodal.RealtimeSession {
|
|
|
385
519
|
});
|
|
386
520
|
}
|
|
387
521
|
|
|
522
|
+
get chatCtx(): llm.ChatContext | undefined {
|
|
523
|
+
return this.#chatCtx;
|
|
524
|
+
}
|
|
525
|
+
|
|
388
526
|
get fncCtx(): llm.FunctionContext | undefined {
|
|
389
527
|
return this.#fncCtx;
|
|
390
528
|
}
|
|
@@ -787,6 +925,7 @@ export class RealtimeSession extends multimodal.RealtimeSession {
|
|
|
787
925
|
id: response.id,
|
|
788
926
|
status: response.status,
|
|
789
927
|
statusDetails: response.status_details,
|
|
928
|
+
usage: null,
|
|
790
929
|
output: [],
|
|
791
930
|
doneFut: doneFut,
|
|
792
931
|
};
|
|
@@ -800,6 +939,7 @@ export class RealtimeSession extends multimodal.RealtimeSession {
|
|
|
800
939
|
const response = this.#pendingResponses[responseId];
|
|
801
940
|
response.status = responseData.status;
|
|
802
941
|
response.statusDetails = responseData.status_details;
|
|
942
|
+
response.usage = responseData.usage;
|
|
803
943
|
this.#pendingResponses[responseId] = response;
|
|
804
944
|
response.doneFut.resolve();
|
|
805
945
|
this.emit('response_done', response);
|
|
@@ -869,11 +1009,11 @@ export class RealtimeSession extends multimodal.RealtimeSession {
|
|
|
869
1009
|
callId: item.call_id,
|
|
870
1010
|
});
|
|
871
1011
|
this.conversation.item.create(
|
|
872
|
-
{
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
},
|
|
1012
|
+
llm.ChatMessage.createToolFromFunctionResult({
|
|
1013
|
+
name: item.name,
|
|
1014
|
+
toolCallId: item.call_id,
|
|
1015
|
+
result: content,
|
|
1016
|
+
}),
|
|
877
1017
|
output.itemId,
|
|
878
1018
|
);
|
|
879
1019
|
this.response.create();
|