@livekit/agents-plugin-openai 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +17 -0
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +1 -1
  6. package/dist/index.js.map +1 -1
  7. package/dist/realtime/api_proto.d.ts +400 -0
  8. package/dist/realtime/api_proto.d.ts.map +1 -0
  9. package/dist/realtime/api_proto.js +9 -0
  10. package/dist/realtime/api_proto.js.map +1 -0
  11. package/dist/realtime/index.d.ts +3 -0
  12. package/dist/realtime/index.d.ts.map +1 -0
  13. package/dist/realtime/index.js +6 -0
  14. package/dist/realtime/index.js.map +1 -0
  15. package/dist/realtime/realtime_model.d.ts +148 -0
  16. package/dist/realtime/realtime_model.d.ts.map +1 -0
  17. package/dist/realtime/realtime_model.js +555 -0
  18. package/dist/realtime/realtime_model.js.map +1 -0
  19. package/package.json +5 -3
  20. package/src/index.ts +1 -2
  21. package/src/realtime/api_proto.ts +568 -0
  22. package/src/realtime/index.ts +5 -0
  23. package/src/realtime/realtime_model.ts +842 -0
  24. package/dist/omni_assistant/agent_playout.d.ts +0 -27
  25. package/dist/omni_assistant/agent_playout.d.ts.map +0 -1
  26. package/dist/omni_assistant/agent_playout.js +0 -111
  27. package/dist/omni_assistant/agent_playout.js.map +0 -1
  28. package/dist/omni_assistant/index.d.ts +0 -61
  29. package/dist/omni_assistant/index.d.ts.map +0 -1
  30. package/dist/omni_assistant/index.js +0 -453
  31. package/dist/omni_assistant/index.js.map +0 -1
  32. package/dist/omni_assistant/proto.d.ts +0 -218
  33. package/dist/omni_assistant/proto.d.ts.map +0 -1
  34. package/dist/omni_assistant/proto.js +0 -68
  35. package/dist/omni_assistant/proto.js.map +0 -1
  36. package/dist/omni_assistant/transcription_forwarder.d.ts +0 -28
  37. package/dist/omni_assistant/transcription_forwarder.d.ts.map +0 -1
  38. package/dist/omni_assistant/transcription_forwarder.js +0 -117
  39. package/dist/omni_assistant/transcription_forwarder.js.map +0 -1
  40. package/src/omni_assistant/agent_playout.ts +0 -127
  41. package/src/omni_assistant/index.ts +0 -547
  42. package/src/omni_assistant/proto.ts +0 -280
  43. package/src/omni_assistant/transcription_forwarder.ts +0 -128
@@ -1,280 +0,0 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
- //
3
- // SPDX-License-Identifier: Apache-2.0
4
- export enum Voice {
5
- ALLOY = 'alloy',
6
- SHIMMER = 'shimmer',
7
- ECHO = 'echo',
8
- }
9
-
10
- export enum TurnEndType {
11
- SERVER_DETECTION = 'server_detection',
12
- CLIENT_DECISION = 'client_decision',
13
- }
14
-
15
- export enum AudioFormat {
16
- PCM16 = 'pcm16',
17
- // G711_ULAW = 'g711-ulaw',
18
- // G711_ALAW = 'g711-alaw',
19
- }
20
-
21
- export enum ServerEventType {
22
- START_SESSION = 'start_session',
23
- ERROR = 'error',
24
- ADD_MESSAGE = 'add_message',
25
- ADD_CONTENT = 'add_content',
26
- MESSAGE_ADDED = 'message_added',
27
- VAD_SPEECH_STARTED = 'vad_speech_started',
28
- VAD_SPEECH_STOPPED = 'vad_speech_stopped',
29
- INPUT_TRANSCRIBED = 'input_transcribed',
30
- GENERATION_CANCELED = 'generation_canceled',
31
- SEND_STATE = 'send_state',
32
- GENERATION_FINISHED = 'generation_finished',
33
- }
34
-
35
- export type ServerEvent =
36
- | {
37
- event: ServerEventType.START_SESSION;
38
- session_id: string;
39
- model: string;
40
- system_fingerprint: string;
41
- }
42
- | {
43
- event: ServerEventType.ERROR;
44
- error: string;
45
- }
46
- | {
47
- event: ServerEventType.ADD_MESSAGE;
48
- previous_id: string;
49
- conversation_label: string;
50
- message: {
51
- role: 'assistant';
52
- content: (
53
- | {
54
- type: 'text';
55
- text: string;
56
- }
57
- | {
58
- type: 'audio';
59
- audio: string;
60
- }
61
- | {
62
- type: 'tool_call';
63
- name: string;
64
- arguments: string;
65
- tool_call_id: string;
66
- }
67
- )[];
68
- };
69
- }
70
- | {
71
- event: ServerEventType.ADD_CONTENT;
72
- message_id: string;
73
- type: 'text' | 'audio' | 'tool_call';
74
- data: string; // text or base64 audio or JSON stringified object
75
- }
76
- | {
77
- event: ServerEventType.MESSAGE_ADDED;
78
- id: string;
79
- previous_id: string;
80
- conversation_label: string;
81
- content:
82
- | {
83
- type: 'tool_call';
84
- name: string;
85
- tool_call_id: string;
86
- arguments: string; // JSON stringified object
87
- }[]
88
- | null;
89
- }
90
- | {
91
- event: ServerEventType.GENERATION_FINISHED;
92
- reason: 'stop' | 'max_tokens' | 'content_filter' | 'interrupt';
93
- conversation_label: string;
94
- message_ids: string[];
95
- }
96
- | {
97
- event: ServerEventType.SEND_STATE;
98
- session_id: string;
99
- input_audio_format: AudioFormat;
100
- vad_active: boolean;
101
- audio_buffer: string;
102
- conversations: any; // TODO(nbsp): get this
103
- session_config: SessionConfig;
104
- }
105
- | {
106
- event:
107
- | ServerEventType.VAD_SPEECH_STARTED
108
- | ServerEventType.VAD_SPEECH_STOPPED
109
- | ServerEventType.GENERATION_CANCELED;
110
- sample_index: number;
111
- message_id: string;
112
- }
113
- | {
114
- event: ServerEventType.INPUT_TRANSCRIBED;
115
- message_id: string;
116
- transcript: string;
117
- };
118
-
119
- export enum ClientEventType {
120
- UPDATE_SESSION_CONFIG = 'update_session_config',
121
- UPDATE_CONVERSATION_CONFIG = 'update_conversation_config',
122
- ADD_MESSAGE = 'add_message',
123
- DELETE_MESSAGE = 'delete_message',
124
- ADD_USER_AUDIO = 'add_user_audio',
125
- COMMIT_USER_AUDIO = 'commit_user_audio',
126
- CANCEL_GENERATION = 'cancel_generation',
127
- GENERATE = 'generate',
128
- CREATE_CONVERSATION = 'create_conversation',
129
- DELETE_CONVERSATION = 'delete_conversation',
130
- TRUNCATE_CONTENT = 'truncate_content',
131
- REQUEST_STATE = 'request_state',
132
- }
133
-
134
- export type ClientEvent =
135
- | ({
136
- event: ClientEventType.UPDATE_SESSION_CONFIG;
137
- } & SessionConfig)
138
- | ({
139
- event: ClientEventType.UPDATE_CONVERSATION_CONFIG;
140
- } & ConversationConfig)
141
- | {
142
- event: ClientEventType.ADD_MESSAGE;
143
- // id, previous_id, conversation_label are unused by us
144
- message: (
145
- | {
146
- role: 'tool';
147
- tool_call_id: string;
148
- }
149
- | {
150
- role: 'user' | 'assistant' | 'system';
151
- }
152
- ) &
153
- (
154
- | {
155
- content: (
156
- | {
157
- type: 'text';
158
- text: string;
159
- }
160
- | {
161
- type: 'tool_call';
162
- name: string;
163
- arguments: string;
164
- tool_call_id: string;
165
- }
166
- )[];
167
- }
168
- | {
169
- role: 'user' | 'tool';
170
- content: (
171
- | {
172
- type: 'text';
173
- text: string;
174
- }
175
- | {
176
- type: 'tool_call';
177
- name: string;
178
- arguments: string;
179
- tool_call_id: string;
180
- }
181
- | {
182
- type: 'audio';
183
- audio: string; // base64 encoded buffer
184
- }
185
- )[];
186
- }
187
- );
188
- }
189
- | {
190
- event: ClientEventType.DELETE_MESSAGE;
191
- id: string;
192
- conversation_label?: string; // defaults to 'default'
193
- }
194
- | {
195
- event: ClientEventType.ADD_USER_AUDIO;
196
- data: string; // base64 encoded buffer
197
- }
198
- | {
199
- event: ClientEventType.COMMIT_USER_AUDIO | ClientEventType.CANCEL_GENERATION;
200
- }
201
- | {
202
- event: ClientEventType.GENERATE;
203
- conversation_label?: string; // defaults to 'default'
204
- }
205
- | {
206
- event:
207
- | ClientEventType.CREATE_CONVERSATION
208
- | ClientEventType.DELETE_CONVERSATION
209
- | ClientEventType.REQUEST_STATE;
210
- label: string;
211
- }
212
- | {
213
- event: ClientEventType.TRUNCATE_CONTENT;
214
- message_id: string;
215
- index: number; // integer, ignored
216
- text_chars?: number; // integer
217
- audio_samples?: number; // integer
218
- };
219
-
220
- export enum ToolChoice {
221
- AUTO = 'auto',
222
- NONE = 'none',
223
- REQUIRED = 'required',
224
- }
225
-
226
- export interface Tool {
227
- type: 'function';
228
- function: {
229
- name: string;
230
- description: string;
231
- parameters: {
232
- type: 'object';
233
- properties: {
234
- [prop: string]: {
235
- [prop: string]: any;
236
- };
237
- };
238
- required_properties: string[];
239
- };
240
- };
241
- }
242
-
243
- export const API_URL = 'wss://api.openai.com/v1/realtime';
244
- export const SAMPLE_RATE = 24000;
245
- export const NUM_CHANNELS = 1;
246
-
247
- export const INPUT_PCM_FRAME_SIZE = 2400; // 100ms
248
- export const OUTPUT_PCM_FRAME_SIZE = 1200; // 50ms
249
-
250
- export type SessionConfig = Partial<{
251
- turn_detection: 'disabled' | 'server_vad';
252
- input_audio_format: AudioFormat;
253
- transcribe_input: boolean;
254
- vad: Partial<{
255
- threshold: number; // 0..1 inclusive, default 0.5
256
- prefix_padding_ms: number; // default 300
257
- silence_duration_ms: number; // default 200
258
- }>;
259
- }>;
260
-
261
- export type ConversationConfig = Partial<{
262
- system_message: string;
263
- voice: Voice;
264
- subscribe_to_user_audio: boolean;
265
- output_audio_format: AudioFormat;
266
- tools: Tool[];
267
- tool_choice: ToolChoice;
268
- temperature: number; // 0.6..1.2 inclusive, default 0.8
269
- max_tokens: number; // 1..4096, default 2048;
270
- disable_audio: boolean;
271
- transcribe_input: boolean;
272
- conversation_label: string; // default "default"
273
- }>;
274
-
275
- export enum State {
276
- INITIALIZING = 'initializing',
277
- LISTENING = 'listening',
278
- THINKING = 'thinking',
279
- SPEAKING = 'speaking',
280
- }
@@ -1,128 +0,0 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
- //
3
- // SPDX-License-Identifier: Apache-2.0
4
- import { log } from '@livekit/agents';
5
- import type { AudioFrame, Room } from '@livekit/rtc-node';
6
-
7
- export interface TranscriptionForwarder {
8
- start(): void;
9
- pushAudio(frame: AudioFrame): void;
10
- pushText(text: string): void;
11
- markTextComplete(): void;
12
- markAudioComplete(): void;
13
- close(interrupt: boolean): Promise<void>;
14
- currentCharacterIndex: number;
15
- }
16
-
17
- export class BasicTranscriptionForwarder implements TranscriptionForwarder {
18
- #room: Room;
19
- #participantIdentity: string;
20
- #trackSid: string;
21
- #currentText: string = '';
22
- #totalAudioDuration: number = 0;
23
- #currentPlayoutTime: number = 0;
24
- #DEFAULT_CHARS_PER_SECOND = 16;
25
- #charsPerSecond: number = this.#DEFAULT_CHARS_PER_SECOND;
26
- #messageId: string;
27
- #isRunning: boolean = false;
28
- currentCharacterIndex: number = 0;
29
-
30
- constructor(room: Room, participantIdentity: string, trackSid: string, messageId: string) {
31
- this.#room = room;
32
- this.#participantIdentity = participantIdentity;
33
- this.#trackSid = trackSid;
34
- this.#messageId = messageId;
35
- }
36
-
37
- start(): void {
38
- if (!this.#isRunning) {
39
- this.#isRunning = true;
40
- this.startPublishingLoop().catch((error) => {
41
- log().error('Error in publishing loop:', error);
42
- this.#isRunning = false;
43
- });
44
- }
45
- }
46
-
47
- pushAudio(frame: AudioFrame): void {
48
- this.#totalAudioDuration += frame.samplesPerChannel / frame.sampleRate;
49
- }
50
-
51
- pushText(text: string): void {
52
- this.#currentText += text;
53
- }
54
-
55
- private textIsComplete: boolean = false;
56
- private audioIsComplete: boolean = false;
57
-
58
- markTextComplete(): void {
59
- this.textIsComplete = true;
60
- this.adjustTimingIfBothFinished();
61
- }
62
-
63
- markAudioComplete(): void {
64
- this.audioIsComplete = true;
65
- this.adjustTimingIfBothFinished();
66
- }
67
-
68
- private adjustTimingIfBothFinished(): void {
69
- if (this.textIsComplete && this.audioIsComplete) {
70
- const actualDuration = this.#totalAudioDuration;
71
- if (actualDuration > 0 && this.#currentText.length > 0) {
72
- this.#charsPerSecond = this.#currentText.length / actualDuration;
73
- }
74
- }
75
- }
76
-
77
- private computeSleepInterval(): number {
78
- return Math.min(Math.max(1 / this.#charsPerSecond, 0.0625), 0.5);
79
- }
80
-
81
- private async startPublishingLoop(): Promise<void> {
82
- this.#isRunning = true;
83
- let sleepInterval = this.computeSleepInterval();
84
- let isComplete = false;
85
- while (this.#isRunning && !isComplete) {
86
- this.#currentPlayoutTime += sleepInterval;
87
- this.currentCharacterIndex = Math.floor(this.#currentPlayoutTime * this.#charsPerSecond);
88
- isComplete = this.textIsComplete && this.currentCharacterIndex >= this.#currentText.length;
89
- await this.publishTranscription(false);
90
- if (this.#isRunning && !isComplete) {
91
- sleepInterval = this.computeSleepInterval();
92
- await new Promise((resolve) => setTimeout(resolve, sleepInterval * 1000));
93
- }
94
- }
95
-
96
- if (this.#isRunning) {
97
- this.close(false);
98
- }
99
- }
100
-
101
- private async publishTranscription(final: boolean): Promise<void> {
102
- const textToPublish = this.#currentText.slice(0, this.currentCharacterIndex);
103
- await this.#room.localParticipant?.publishTranscription({
104
- participantIdentity: this.#participantIdentity,
105
- trackSid: this.#trackSid,
106
- segments: [
107
- {
108
- text: textToPublish,
109
- final: final,
110
- id: this.#messageId,
111
- startTime: BigInt(0),
112
- endTime: BigInt(0),
113
- language: '',
114
- },
115
- ],
116
- });
117
- }
118
-
119
- async close(interrupt: boolean): Promise<void> {
120
- this.#isRunning = false;
121
-
122
- // Publish whatever we had as final
123
- if (!interrupt) {
124
- this.currentCharacterIndex = this.#currentText.length;
125
- }
126
- await this.publishTranscription(true);
127
- }
128
- }