@livekit/agents-plugin-openai 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,218 @@
1
+ export declare enum Voice {
2
+ ALLOY = "alloy",
3
+ SHIMMER = "shimmer",
4
+ ECHO = "echo"
5
+ }
6
+ export declare enum TurnEndType {
7
+ SERVER_DETECTION = "server_detection",
8
+ CLIENT_DECISION = "client_decision"
9
+ }
10
+ export declare enum AudioFormat {
11
+ PCM16 = "pcm16"
12
+ }
13
+ export declare enum ServerEventType {
14
+ START_SESSION = "start_session",
15
+ ERROR = "error",
16
+ ADD_MESSAGE = "add_message",
17
+ ADD_CONTENT = "add_content",
18
+ MESSAGE_ADDED = "message_added",
19
+ VAD_SPEECH_STARTED = "vad_speech_started",
20
+ VAD_SPEECH_STOPPED = "vad_speech_stopped",
21
+ INPUT_TRANSCRIBED = "input_transcribed",
22
+ GENERATION_CANCELED = "generation_canceled",
23
+ SEND_STATE = "send_state",
24
+ GENERATION_FINISHED = "generation_finished"
25
+ }
26
+ export type ServerEvent = {
27
+ event: ServerEventType.START_SESSION;
28
+ session_id: string;
29
+ model: string;
30
+ system_fingerprint: string;
31
+ } | {
32
+ event: ServerEventType.ERROR;
33
+ error: string;
34
+ } | {
35
+ event: ServerEventType.ADD_MESSAGE;
36
+ previous_id: string;
37
+ conversation_label: string;
38
+ message: {
39
+ role: 'assistant';
40
+ content: ({
41
+ type: 'text';
42
+ text: string;
43
+ } | {
44
+ type: 'audio';
45
+ audio: string;
46
+ } | {
47
+ type: 'tool_call';
48
+ name: string;
49
+ arguments: string;
50
+ tool_call_id: string;
51
+ })[];
52
+ };
53
+ } | {
54
+ event: ServerEventType.ADD_CONTENT;
55
+ message_id: string;
56
+ type: 'text' | 'audio' | 'tool_call';
57
+ data: string;
58
+ } | {
59
+ event: ServerEventType.MESSAGE_ADDED;
60
+ id: string;
61
+ previous_id: string;
62
+ conversation_label: string;
63
+ content: {
64
+ type: 'tool_call';
65
+ name: string;
66
+ tool_call_id: string;
67
+ arguments: string;
68
+ }[] | null;
69
+ } | {
70
+ event: ServerEventType.GENERATION_FINISHED;
71
+ reason: 'stop' | 'max_tokens' | 'content_filter' | 'interrupt';
72
+ conversation_label: string;
73
+ message_ids: string[];
74
+ } | {
75
+ event: ServerEventType.SEND_STATE;
76
+ session_id: string;
77
+ input_audio_format: AudioFormat;
78
+ vad_active: boolean;
79
+ audio_buffer: string;
80
+ conversations: any;
81
+ session_config: SessionConfig;
82
+ } | {
83
+ event: ServerEventType.VAD_SPEECH_STARTED | ServerEventType.VAD_SPEECH_STOPPED | ServerEventType.GENERATION_CANCELED;
84
+ sample_index: number;
85
+ message_id: string;
86
+ } | {
87
+ event: ServerEventType.INPUT_TRANSCRIBED;
88
+ message_id: string;
89
+ transcript: string;
90
+ };
91
+ export declare enum ClientEventType {
92
+ UPDATE_SESSION_CONFIG = "update_session_config",
93
+ UPDATE_CONVERSATION_CONFIG = "update_conversation_config",
94
+ ADD_MESSAGE = "add_message",
95
+ DELETE_MESSAGE = "delete_message",
96
+ ADD_USER_AUDIO = "add_user_audio",
97
+ COMMIT_USER_AUDIO = "commit_user_audio",
98
+ CANCEL_GENERATION = "cancel_generation",
99
+ GENERATE = "generate",
100
+ CREATE_CONVERSATION = "create_conversation",
101
+ DELETE_CONVERSATION = "delete_conversation",
102
+ TRUNCATE_CONTENT = "truncate_content",
103
+ REQUEST_STATE = "request_state"
104
+ }
105
+ export type ClientEvent = ({
106
+ event: ClientEventType.UPDATE_SESSION_CONFIG;
107
+ } & SessionConfig) | ({
108
+ event: ClientEventType.UPDATE_CONVERSATION_CONFIG;
109
+ } & ConversationConfig) | {
110
+ event: ClientEventType.ADD_MESSAGE;
111
+ message: ({
112
+ role: 'tool';
113
+ tool_call_id: string;
114
+ } | {
115
+ role: 'user' | 'assistant' | 'system';
116
+ }) & ({
117
+ content: ({
118
+ type: 'text';
119
+ text: string;
120
+ } | {
121
+ type: 'tool_call';
122
+ name: string;
123
+ arguments: string;
124
+ tool_call_id: string;
125
+ })[];
126
+ } | {
127
+ role: 'user' | 'tool';
128
+ content: ({
129
+ type: 'text';
130
+ text: string;
131
+ } | {
132
+ type: 'tool_call';
133
+ name: string;
134
+ arguments: string;
135
+ tool_call_id: string;
136
+ } | {
137
+ type: 'audio';
138
+ audio: string;
139
+ })[];
140
+ });
141
+ } | {
142
+ event: ClientEventType.DELETE_MESSAGE;
143
+ id: string;
144
+ conversation_label?: string;
145
+ } | {
146
+ event: ClientEventType.ADD_USER_AUDIO;
147
+ data: string;
148
+ } | {
149
+ event: ClientEventType.COMMIT_USER_AUDIO | ClientEventType.CANCEL_GENERATION;
150
+ } | {
151
+ event: ClientEventType.GENERATE;
152
+ conversation_label?: string;
153
+ } | {
154
+ event: ClientEventType.CREATE_CONVERSATION | ClientEventType.DELETE_CONVERSATION | ClientEventType.REQUEST_STATE;
155
+ label: string;
156
+ } | {
157
+ event: ClientEventType.TRUNCATE_CONTENT;
158
+ message_id: string;
159
+ index: number;
160
+ text_chars?: number;
161
+ audio_samples?: number;
162
+ };
163
+ export declare enum ToolChoice {
164
+ AUTO = "auto",
165
+ NONE = "none",
166
+ REQUIRED = "required"
167
+ }
168
+ export interface Tool {
169
+ type: 'function';
170
+ function: {
171
+ name: string;
172
+ description: string;
173
+ parameters: {
174
+ type: 'object';
175
+ properties: {
176
+ [prop: string]: {
177
+ [prop: string]: any;
178
+ };
179
+ };
180
+ required_properties: string[];
181
+ };
182
+ };
183
+ }
184
+ export declare const API_URL = "wss://api.openai.com/v1/realtime";
185
+ export declare const SAMPLE_RATE = 24000;
186
+ export declare const NUM_CHANNELS = 1;
187
+ export declare const INPUT_PCM_FRAME_SIZE = 2400;
188
+ export declare const OUTPUT_PCM_FRAME_SIZE = 1200;
189
+ export type SessionConfig = Partial<{
190
+ turn_detection: 'disabled' | 'server_vad';
191
+ input_audio_format: AudioFormat;
192
+ transcribe_input: boolean;
193
+ vad: Partial<{
194
+ threshold: number;
195
+ prefix_padding_ms: number;
196
+ silence_duration_ms: number;
197
+ }>;
198
+ }>;
199
+ export type ConversationConfig = Partial<{
200
+ system_message: string;
201
+ voice: Voice;
202
+ subscribe_to_user_audio: boolean;
203
+ output_audio_format: AudioFormat;
204
+ tools: Tool[];
205
+ tool_choice: ToolChoice;
206
+ temperature: number;
207
+ max_tokens: number;
208
+ disable_audio: boolean;
209
+ transcribe_input: boolean;
210
+ conversation_label: string;
211
+ }>;
212
+ export declare enum State {
213
+ INITIALIZING = "initializing",
214
+ LISTENING = "listening",
215
+ THINKING = "thinking",
216
+ SPEAKING = "speaking"
217
+ }
218
+ //# sourceMappingURL=proto.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"proto.d.ts","sourceRoot":"","sources":["../../src/omni_assistant/proto.ts"],"names":[],"mappings":"AAGA,oBAAY,KAAK;IACf,KAAK,UAAU;IACf,OAAO,YAAY;IACnB,IAAI,SAAS;CACd;AAED,oBAAY,WAAW;IACrB,gBAAgB,qBAAqB;IACrC,eAAe,oBAAoB;CACpC;AAED,oBAAY,WAAW;IACrB,KAAK,UAAU;CAGhB;AAED,oBAAY,eAAe;IACzB,aAAa,kBAAkB;IAC/B,KAAK,UAAU;IACf,WAAW,gBAAgB;IAC3B,WAAW,gBAAgB;IAC3B,aAAa,kBAAkB;IAC/B,kBAAkB,uBAAuB;IACzC,kBAAkB,uBAAuB;IACzC,iBAAiB,sBAAsB;IACvC,mBAAmB,wBAAwB;IAC3C,UAAU,eAAe;IACzB,mBAAmB,wBAAwB;CAC5C;AAED,MAAM,MAAM,WAAW,GACnB;IACE,KAAK,EAAE,eAAe,CAAC,aAAa,CAAC;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,kBAAkB,EAAE,MAAM,CAAC;CAC5B,GACD;IACE,KAAK,EAAE,eAAe,CAAC,KAAK,CAAC;IAC7B,KAAK,EAAE,MAAM,CAAC;CACf,GACD;IACE,KAAK,EAAE,eAAe,CAAC,WAAW,CAAC;IACnC,WAAW,EAAE,MAAM,CAAC;IACpB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,OAAO,EAAE;QACP,IAAI,EAAE,WAAW,CAAC;QAClB,OAAO,EAAE,CACL;YACE,IAAI,EAAE,MAAM,CAAC;YACb,IAAI,EAAE,MAAM,CAAC;SACd,GACD;YACE,IAAI,EAAE,OAAO,CAAC;YACd,KAAK,EAAE,MAAM,CAAC;SACf,GACD;YACE,IAAI,EAAE,WAAW,CAAC;YAClB,IAAI,EAAE,MAAM,CAAC;YACb,SAAS,EAAE,MAAM,CAAC;YAClB,YAAY,EAAE,MAAM,CAAC;SACtB,CACJ,EAAE,CAAC;KACL,CAAC;CACH,GACD;IACE,KAAK,EAAE,eAAe,CAAC,WAAW,CAAC;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,WAAW,CAAC;IACrC,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,KAAK,EAAE,eAAe,CAAC,aAAa,CAAC;IACrC,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,OAAO,EACH;QACE,IAAI,EAAE,WAAW,CAAC;QAClB,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,EAAE,MAAM,CAAC;QACrB,SAAS,EAAE,MAAM,CAAC;KACnB,EAAE,GACH,IAAI,CAAC;CACV,GACD;IACE,KAAK,EAAE,eAAe,CAAC,mBAAmB,CAAC;IAC3C,MAAM,EAAE,MAAM,GAAG,YAAY,GAAG,gBAAgB,GAAG,WAAW,CAAC;IAC/D,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,EAAE,CAAC;CACvB,GACD;IACE,KAAK,EAAE,eAAe,CAAC,UAAU,CAAC;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,kBAAkB,EAAE,WAAW,CAAC;IAChC,UAAU,EAAE,OAAO,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,GAAG,CAAC;IACnB,cAAc,EAAE,aAAa,CAAC;CAC/B,GACD;IACE,KAAK,EACD,eAAe,CAAC,kBAAkB,GAClC,eAAe,CAAC,kBAAkB,GAClC,eAAe,CAAC,mBAAmB,CAAC;IACxC,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB,GACD;IACE,KAAK,EAAE,eAAe,CAAC,iBAAiB,CAAC;IACzC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEN,oBAAY,eAAe;IACzB,qBAAqB,0BAA0B;IAC/C,0BAA0B,+BAA+B;IACzD,WAAW,gBAAgB;IAC3B,cAAc,mBAAmB;IACjC,cAAc,mBAAmB;IACjC,iBAAiB,sBAAsB;IACvC,iBAAiB,sBAAsB;IACvC,QAAQ,aAAa;IACrB,mBAAmB,wBAAwB;IAC3C,mBAAmB,wBAAwB;IAC3C,gBAAgB,qBAAqB;IACrC,aAAa,kBAAkB;CAChC;AAED,MAAM,MAAM,WAAW,GACnB,CAAC;IACC,KAAK,EAAE,eAAe,CAAC,qBAAqB,CAAC;CAC9C,GAAG,aAAa,CAAC,GAClB,CAAC;IACC,KAAK,EAAE,eAAe,CAAC,0BAA0B,CAAC;CACnD,GAAG,kBAAkB,CAAC,GACvB;IACE,KAAK,EAAE,eAAe,CAAC,WAAW,CAAC;IAEnC,OAAO,EAAE,CACL;QACE,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,EAAE,MAAM,CAAC;KACtB,GACD;QACE,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;KACvC,CACJ,GACC,CACI;QACE,OAAO,EAAE,CACL;YACE,IAAI,EAAE,MAAM,CAAC;YACb,IAAI,EAAE,MAAM,CAAC;SACd,GACD;YACE,IAAI,EAAE,WAAW,CAAC;YAClB,IAAI,EAAE,MAAM,CAAC;YACb,SAAS,EAAE,MAAM,CAAC;YAClB,YAAY,EAAE,MAAM,CAAC;SACtB,CACJ,EAAE,CAAC;KACL,GACD;QACE,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;QACtB,OAAO,EAAE,CACL;YACE,IAAI,EAAE,MAAM,CAAC;YACb,IAAI,EAAE,MAAM,CAAC;SACd,GACD;YACE,IAAI,EAAE,WAAW,CAAC;YAClB,IAAI,EAAE,MAAM,CAAC;YACb,SAAS,EAAE,MAAM,CAAC;YAClB,YAAY,EAAE,MAAM,CAAC;SACtB,GACD;YACE,IAAI,EAAE,OAAO,CAAC;YACd,KAAK,EAAE,MAAM,CAAC;SACf,CACJ,EAAE,CAAC;KACL,CACJ,CAAC;CACL,GACD;IACE,KAAK,EAAE,eAAe,CAAC,cAAc,CAAC;IACtC,EAAE,EAAE,MAAM,CAAC;IACX,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B,GACD;IACE,KAAK,EAAE,eAAe,CAAC,cAAc,CAAC;IACtC,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,KAAK,EAAE,eAAe,CAAC,iBAAiB,GAAG,eAAe,CAAC,iBAAiB,CAAC;CAC9E,GACD;IACE,KAAK,EAAE,eAAe,CAAC,QAAQ,CAAC;IAChC,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B,GACD;IACE,KAAK,EACD,eAAe,CAAC,mBAAmB,GACnC,eAAe,CAAC,mBAAmB,GACnC,eAAe,CAAC,aAAa,CAAC;IAClC,KAAK,EAAE,MAAM,CAAC;CACf,GACD;IACE,KAAK,EAAE,eAAe,CAAC,gBAAgB,CAAC;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,CAAC;AAEN,oBAAY,UAAU;IACpB,IAAI,SAAS;IACb,IAAI,SAAS;IACb,QAAQ,aAAa;CACtB;AAED,MAAM,WAAW,IAAI;IACnB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE;YACV,IAAI,EAAE,QAAQ,CAAC;YACf,UAAU,EAAE;gBACV,CAAC,IAAI,EAAE,MAAM,GAAG;oBACd,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC;iBACrB,CAAC;aACH,CAAC;YACF,mBAAmB,EAAE,MAAM,EAAE,CAAC;SAC/B,CAAC;KACH,CAAC;CACH;AAED,eAAO,MAAM,OAAO,qCAAqC,CAAC;AAC1D,eAAO,MAAM,WAAW,QAAQ,CAAC;AACjC,eAAO,MAAM,YAAY,IAAI,CAAC;AAE9B,eAAO,MAAM,oBAAoB,OAAO,CAAC;AACzC,eAAO,MAAM,qBAAqB,OAAO,CAAC;AAE1C,MAAM,MAAM,aAAa,GAAG,OAAO,CAAC;IAClC,cAAc,EAAE,UAAU,GAAG,YAAY,CAAC;IAC1C,kBAAkB,EAAE,WAAW,CAAC;IAChC,gBAAgB,EAAE,OAAO,CAAC;IAC1B,GAAG,EAAE,OAAO,CAAC;QACX,SAAS,EAAE,MAAM,CAAC;QAClB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,mBAAmB,EAAE,MAAM,CAAC;KAC7B,CAAC,CAAC;CACJ,CAAC,CAAC;AAEH,MAAM,MAAM,kBAAkB,GAAG,OAAO,CAAC;IACvC,cAAc,EAAE,MAAM,CAAC;IACvB,KAAK,EAAE,KAAK,CAAC;IACb,uBAAuB,EAAE,OAAO,CAAC;IACjC,mBAAmB,EAAE,WAAW,CAAC;IACjC,KAAK,EAAE,IAAI,EAAE,CAAC;IACd,WAAW,EAAE,UAAU,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,OAAO,CAAC;IACvB,gBAAgB,EAAE,OAAO,CAAC;IAC1B,kBAAkB,EAAE,MAAM,CAAC;CAC5B,CAAC,CAAC;AAEH,oBAAY,KAAK;IACf,YAAY,iBAAiB;IAC7B,SAAS,cAAc;IACvB,QAAQ,aAAa;IACrB,QAAQ,aAAa;CACtB"}
@@ -0,0 +1,68 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ export var Voice;
5
+ (function (Voice) {
6
+ Voice["ALLOY"] = "alloy";
7
+ Voice["SHIMMER"] = "shimmer";
8
+ Voice["ECHO"] = "echo";
9
+ })(Voice || (Voice = {}));
10
+ export var TurnEndType;
11
+ (function (TurnEndType) {
12
+ TurnEndType["SERVER_DETECTION"] = "server_detection";
13
+ TurnEndType["CLIENT_DECISION"] = "client_decision";
14
+ })(TurnEndType || (TurnEndType = {}));
15
+ export var AudioFormat;
16
+ (function (AudioFormat) {
17
+ AudioFormat["PCM16"] = "pcm16";
18
+ // G711_ULAW = 'g711-ulaw',
19
+ // G711_ALAW = 'g711-alaw',
20
+ })(AudioFormat || (AudioFormat = {}));
21
+ export var ServerEventType;
22
+ (function (ServerEventType) {
23
+ ServerEventType["START_SESSION"] = "start_session";
24
+ ServerEventType["ERROR"] = "error";
25
+ ServerEventType["ADD_MESSAGE"] = "add_message";
26
+ ServerEventType["ADD_CONTENT"] = "add_content";
27
+ ServerEventType["MESSAGE_ADDED"] = "message_added";
28
+ ServerEventType["VAD_SPEECH_STARTED"] = "vad_speech_started";
29
+ ServerEventType["VAD_SPEECH_STOPPED"] = "vad_speech_stopped";
30
+ ServerEventType["INPUT_TRANSCRIBED"] = "input_transcribed";
31
+ ServerEventType["GENERATION_CANCELED"] = "generation_canceled";
32
+ ServerEventType["SEND_STATE"] = "send_state";
33
+ ServerEventType["GENERATION_FINISHED"] = "generation_finished";
34
+ })(ServerEventType || (ServerEventType = {}));
35
+ export var ClientEventType;
36
+ (function (ClientEventType) {
37
+ ClientEventType["UPDATE_SESSION_CONFIG"] = "update_session_config";
38
+ ClientEventType["UPDATE_CONVERSATION_CONFIG"] = "update_conversation_config";
39
+ ClientEventType["ADD_MESSAGE"] = "add_message";
40
+ ClientEventType["DELETE_MESSAGE"] = "delete_message";
41
+ ClientEventType["ADD_USER_AUDIO"] = "add_user_audio";
42
+ ClientEventType["COMMIT_USER_AUDIO"] = "commit_user_audio";
43
+ ClientEventType["CANCEL_GENERATION"] = "cancel_generation";
44
+ ClientEventType["GENERATE"] = "generate";
45
+ ClientEventType["CREATE_CONVERSATION"] = "create_conversation";
46
+ ClientEventType["DELETE_CONVERSATION"] = "delete_conversation";
47
+ ClientEventType["TRUNCATE_CONTENT"] = "truncate_content";
48
+ ClientEventType["REQUEST_STATE"] = "request_state";
49
+ })(ClientEventType || (ClientEventType = {}));
50
+ export var ToolChoice;
51
+ (function (ToolChoice) {
52
+ ToolChoice["AUTO"] = "auto";
53
+ ToolChoice["NONE"] = "none";
54
+ ToolChoice["REQUIRED"] = "required";
55
+ })(ToolChoice || (ToolChoice = {}));
56
+ export const API_URL = 'wss://api.openai.com/v1/realtime';
57
+ export const SAMPLE_RATE = 24000;
58
+ export const NUM_CHANNELS = 1;
59
+ export const INPUT_PCM_FRAME_SIZE = 2400; // 100ms
60
+ export const OUTPUT_PCM_FRAME_SIZE = 1200; // 50ms
61
+ export var State;
62
+ (function (State) {
63
+ State["INITIALIZING"] = "initializing";
64
+ State["LISTENING"] = "listening";
65
+ State["THINKING"] = "thinking";
66
+ State["SPEAKING"] = "speaking";
67
+ })(State || (State = {}));
68
+ //# sourceMappingURL=proto.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"proto.js","sourceRoot":"","sources":["../../src/omni_assistant/proto.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,MAAM,CAAN,IAAY,KAIX;AAJD,WAAY,KAAK;IACf,wBAAe,CAAA;IACf,4BAAmB,CAAA;IACnB,sBAAa,CAAA;AACf,CAAC,EAJW,KAAK,KAAL,KAAK,QAIhB;AAED,MAAM,CAAN,IAAY,WAGX;AAHD,WAAY,WAAW;IACrB,oDAAqC,CAAA;IACrC,kDAAmC,CAAA;AACrC,CAAC,EAHW,WAAW,KAAX,WAAW,QAGtB;AAED,MAAM,CAAN,IAAY,WAIX;AAJD,WAAY,WAAW;IACrB,8BAAe,CAAA;IACf,2BAA2B;IAC3B,2BAA2B;AAC7B,CAAC,EAJW,WAAW,KAAX,WAAW,QAItB;AAED,MAAM,CAAN,IAAY,eAYX;AAZD,WAAY,eAAe;IACzB,kDAA+B,CAAA;IAC/B,kCAAe,CAAA;IACf,8CAA2B,CAAA;IAC3B,8CAA2B,CAAA;IAC3B,kDAA+B,CAAA;IAC/B,4DAAyC,CAAA;IACzC,4DAAyC,CAAA;IACzC,0DAAuC,CAAA;IACvC,8DAA2C,CAAA;IAC3C,4CAAyB,CAAA;IACzB,8DAA2C,CAAA;AAC7C,CAAC,EAZW,eAAe,KAAf,eAAe,QAY1B;AAsFD,MAAM,CAAN,IAAY,eAaX;AAbD,WAAY,eAAe;IACzB,kEAA+C,CAAA;IAC/C,4EAAyD,CAAA;IACzD,8CAA2B,CAAA;IAC3B,oDAAiC,CAAA;IACjC,oDAAiC,CAAA;IACjC,0DAAuC,CAAA;IACvC,0DAAuC,CAAA;IACvC,wCAAqB,CAAA;IACrB,8DAA2C,CAAA;IAC3C,8DAA2C,CAAA;IAC3C,wDAAqC,CAAA;IACrC,kDAA+B,CAAA;AACjC,CAAC,EAbW,eAAe,KAAf,eAAe,QAa1B;AAwFD,MAAM,CAAN,IAAY,UAIX;AAJD,WAAY,UAAU;IACpB,2BAAa,CAAA;IACb,2BAAa,CAAA;IACb,mCAAqB,CAAA;AACvB,CAAC,EAJW,UAAU,KAAV,UAAU,QAIrB;AAmBD,MAAM,CAAC,MAAM,OAAO,GAAG,kCAAkC,CAAC;AAC1D,MAAM,CAAC,MAAM,WAAW,GAAG,KAAK,CAAC;AACjC,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,CAAC;AAE9B,MAAM,CAAC,MAAM,oBAAoB,GAAG,IAAI,CAAC,CAAC,QAAQ;AAClD,MAAM,CAAC,MAAM,qBAAqB,GAAG,IAAI,CAAC,CAAC,OAAO;AA2BlD,MAAM,CAAN,IAAY,KAKX;AALD,WAAY,KAAK;IACf,sCAA6B,CAAA;IAC7B,gCAAuB,CAAA;IACvB,8BAAqB,CAAA;IACrB,8BAAqB,CAAA;AACvB,CAAC,EALW,KAAK,KAAL,KAAK,QAKhB"}
@@ -0,0 +1,28 @@
1
+ import type { AudioFrame, Room } from '@livekit/rtc-node';
2
+ export interface TranscriptionForwarder {
3
+ start(): void;
4
+ pushAudio(frame: AudioFrame): void;
5
+ pushText(text: string): void;
6
+ markTextComplete(): void;
7
+ markAudioComplete(): void;
8
+ close(interrupt: boolean): Promise<void>;
9
+ currentCharacterIndex: number;
10
+ }
11
+ export declare class BasicTranscriptionForwarder implements TranscriptionForwarder {
12
+ #private;
13
+ currentCharacterIndex: number;
14
+ constructor(room: Room, participantIdentity: string, trackSid: string, messageId: string);
15
+ start(): void;
16
+ pushAudio(frame: AudioFrame): void;
17
+ pushText(text: string): void;
18
+ private textIsComplete;
19
+ private audioIsComplete;
20
+ markTextComplete(): void;
21
+ markAudioComplete(): void;
22
+ private adjustTimingIfBothFinished;
23
+ private computeSleepInterval;
24
+ private startPublishingLoop;
25
+ private publishTranscription;
26
+ close(interrupt: boolean): Promise<void>;
27
+ }
28
+ //# sourceMappingURL=transcription_forwarder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transcription_forwarder.d.ts","sourceRoot":"","sources":["../../src/omni_assistant/transcription_forwarder.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AAE1D,MAAM,WAAW,sBAAsB;IACrC,KAAK,IAAI,IAAI,CAAC;IACd,SAAS,CAAC,KAAK,EAAE,UAAU,GAAG,IAAI,CAAC;IACnC,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gBAAgB,IAAI,IAAI,CAAC;IACzB,iBAAiB,IAAI,IAAI,CAAC;IAC1B,KAAK,CAAC,SAAS,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACzC,qBAAqB,EAAE,MAAM,CAAC;CAC/B;AAED,qBAAa,2BAA4B,YAAW,sBAAsB;;IAWxE,qBAAqB,EAAE,MAAM,CAAK;gBAEtB,IAAI,EAAE,IAAI,EAAE,mBAAmB,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;IAOxF,KAAK,IAAI,IAAI;IAUb,SAAS,CAAC,KAAK,EAAE,UAAU,GAAG,IAAI;IAIlC,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAI5B,OAAO,CAAC,cAAc,CAAkB;IACxC,OAAO,CAAC,eAAe,CAAkB;IAEzC,gBAAgB,IAAI,IAAI;IAKxB,iBAAiB,IAAI,IAAI;IAKzB,OAAO,CAAC,0BAA0B;IASlC,OAAO,CAAC,oBAAoB;YAId,mBAAmB;YAoBnB,oBAAoB;IAkB5B,KAAK,CAAC,SAAS,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;CAS/C"}
@@ -0,0 +1,117 @@
1
+ var __classPrivateFieldSet = (this && this.__classPrivateFieldSet) || function (receiver, state, value, kind, f) {
2
+ if (kind === "m") throw new TypeError("Private method is not writable");
3
+ if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a setter");
4
+ if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot write private member to an object whose class did not declare it");
5
+ return (kind === "a" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;
6
+ };
7
+ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (receiver, state, kind, f) {
8
+ if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a getter");
9
+ if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
10
+ return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
11
+ };
12
+ var _BasicTranscriptionForwarder_room, _BasicTranscriptionForwarder_participantIdentity, _BasicTranscriptionForwarder_trackSid, _BasicTranscriptionForwarder_currentText, _BasicTranscriptionForwarder_totalAudioDuration, _BasicTranscriptionForwarder_currentPlayoutTime, _BasicTranscriptionForwarder_DEFAULT_CHARS_PER_SECOND, _BasicTranscriptionForwarder_charsPerSecond, _BasicTranscriptionForwarder_messageId, _BasicTranscriptionForwarder_isRunning;
13
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
14
+ //
15
+ // SPDX-License-Identifier: Apache-2.0
16
+ import { log } from '@livekit/agents';
17
+ export class BasicTranscriptionForwarder {
18
+ constructor(room, participantIdentity, trackSid, messageId) {
19
+ _BasicTranscriptionForwarder_room.set(this, void 0);
20
+ _BasicTranscriptionForwarder_participantIdentity.set(this, void 0);
21
+ _BasicTranscriptionForwarder_trackSid.set(this, void 0);
22
+ _BasicTranscriptionForwarder_currentText.set(this, '');
23
+ _BasicTranscriptionForwarder_totalAudioDuration.set(this, 0);
24
+ _BasicTranscriptionForwarder_currentPlayoutTime.set(this, 0);
25
+ _BasicTranscriptionForwarder_DEFAULT_CHARS_PER_SECOND.set(this, 16);
26
+ _BasicTranscriptionForwarder_charsPerSecond.set(this, __classPrivateFieldGet(this, _BasicTranscriptionForwarder_DEFAULT_CHARS_PER_SECOND, "f"));
27
+ _BasicTranscriptionForwarder_messageId.set(this, void 0);
28
+ _BasicTranscriptionForwarder_isRunning.set(this, false);
29
+ this.currentCharacterIndex = 0;
30
+ this.textIsComplete = false;
31
+ this.audioIsComplete = false;
32
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_room, room, "f");
33
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_participantIdentity, participantIdentity, "f");
34
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_trackSid, trackSid, "f");
35
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_messageId, messageId, "f");
36
+ }
37
+ start() {
38
+ if (!__classPrivateFieldGet(this, _BasicTranscriptionForwarder_isRunning, "f")) {
39
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_isRunning, true, "f");
40
+ this.startPublishingLoop().catch((error) => {
41
+ log().error('Error in publishing loop:', error);
42
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_isRunning, false, "f");
43
+ });
44
+ }
45
+ }
46
+ pushAudio(frame) {
47
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_totalAudioDuration, __classPrivateFieldGet(this, _BasicTranscriptionForwarder_totalAudioDuration, "f") + frame.samplesPerChannel / frame.sampleRate, "f");
48
+ }
49
+ pushText(text) {
50
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_currentText, __classPrivateFieldGet(this, _BasicTranscriptionForwarder_currentText, "f") + text, "f");
51
+ }
52
+ markTextComplete() {
53
+ this.textIsComplete = true;
54
+ this.adjustTimingIfBothFinished();
55
+ }
56
+ markAudioComplete() {
57
+ this.audioIsComplete = true;
58
+ this.adjustTimingIfBothFinished();
59
+ }
60
+ adjustTimingIfBothFinished() {
61
+ if (this.textIsComplete && this.audioIsComplete) {
62
+ const actualDuration = __classPrivateFieldGet(this, _BasicTranscriptionForwarder_totalAudioDuration, "f");
63
+ if (actualDuration > 0 && __classPrivateFieldGet(this, _BasicTranscriptionForwarder_currentText, "f").length > 0) {
64
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_charsPerSecond, __classPrivateFieldGet(this, _BasicTranscriptionForwarder_currentText, "f").length / actualDuration, "f");
65
+ }
66
+ }
67
+ }
68
+ computeSleepInterval() {
69
+ return Math.min(Math.max(1 / __classPrivateFieldGet(this, _BasicTranscriptionForwarder_charsPerSecond, "f"), 0.0625), 0.5);
70
+ }
71
+ async startPublishingLoop() {
72
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_isRunning, true, "f");
73
+ let sleepInterval = this.computeSleepInterval();
74
+ let isComplete = false;
75
+ while (__classPrivateFieldGet(this, _BasicTranscriptionForwarder_isRunning, "f") && !isComplete) {
76
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_currentPlayoutTime, __classPrivateFieldGet(this, _BasicTranscriptionForwarder_currentPlayoutTime, "f") + sleepInterval, "f");
77
+ this.currentCharacterIndex = Math.floor(__classPrivateFieldGet(this, _BasicTranscriptionForwarder_currentPlayoutTime, "f") * __classPrivateFieldGet(this, _BasicTranscriptionForwarder_charsPerSecond, "f"));
78
+ isComplete = this.textIsComplete && this.currentCharacterIndex >= __classPrivateFieldGet(this, _BasicTranscriptionForwarder_currentText, "f").length;
79
+ await this.publishTranscription(false);
80
+ if (__classPrivateFieldGet(this, _BasicTranscriptionForwarder_isRunning, "f") && !isComplete) {
81
+ sleepInterval = this.computeSleepInterval();
82
+ await new Promise((resolve) => setTimeout(resolve, sleepInterval * 1000));
83
+ }
84
+ }
85
+ if (__classPrivateFieldGet(this, _BasicTranscriptionForwarder_isRunning, "f")) {
86
+ this.close(false);
87
+ }
88
+ }
89
+ async publishTranscription(final) {
90
+ var _a;
91
+ const textToPublish = __classPrivateFieldGet(this, _BasicTranscriptionForwarder_currentText, "f").slice(0, this.currentCharacterIndex);
92
+ await ((_a = __classPrivateFieldGet(this, _BasicTranscriptionForwarder_room, "f").localParticipant) === null || _a === void 0 ? void 0 : _a.publishTranscription({
93
+ participantIdentity: __classPrivateFieldGet(this, _BasicTranscriptionForwarder_participantIdentity, "f"),
94
+ trackSid: __classPrivateFieldGet(this, _BasicTranscriptionForwarder_trackSid, "f"),
95
+ segments: [
96
+ {
97
+ text: textToPublish,
98
+ final: final,
99
+ id: __classPrivateFieldGet(this, _BasicTranscriptionForwarder_messageId, "f"),
100
+ startTime: BigInt(0),
101
+ endTime: BigInt(0),
102
+ language: '',
103
+ },
104
+ ],
105
+ }));
106
+ }
107
+ async close(interrupt) {
108
+ __classPrivateFieldSet(this, _BasicTranscriptionForwarder_isRunning, false, "f");
109
+ // Publish whatever we had as final
110
+ if (!interrupt) {
111
+ this.currentCharacterIndex = __classPrivateFieldGet(this, _BasicTranscriptionForwarder_currentText, "f").length;
112
+ }
113
+ await this.publishTranscription(true);
114
+ }
115
+ }
116
+ _BasicTranscriptionForwarder_room = new WeakMap(), _BasicTranscriptionForwarder_participantIdentity = new WeakMap(), _BasicTranscriptionForwarder_trackSid = new WeakMap(), _BasicTranscriptionForwarder_currentText = new WeakMap(), _BasicTranscriptionForwarder_totalAudioDuration = new WeakMap(), _BasicTranscriptionForwarder_currentPlayoutTime = new WeakMap(), _BasicTranscriptionForwarder_DEFAULT_CHARS_PER_SECOND = new WeakMap(), _BasicTranscriptionForwarder_charsPerSecond = new WeakMap(), _BasicTranscriptionForwarder_messageId = new WeakMap(), _BasicTranscriptionForwarder_isRunning = new WeakMap();
117
+ //# sourceMappingURL=transcription_forwarder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transcription_forwarder.js","sourceRoot":"","sources":["../../src/omni_assistant/transcription_forwarder.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,OAAO,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAatC,MAAM,OAAO,2BAA2B;IAatC,YAAY,IAAU,EAAE,mBAA2B,EAAE,QAAgB,EAAE,SAAiB;QAZxF,oDAAY;QACZ,mEAA6B;QAC7B,wDAAkB;QAClB,mDAAuB,EAAE,EAAC;QAC1B,0DAA8B,CAAC,EAAC;QAChC,0DAA8B,CAAC,EAAC;QAChC,gEAA4B,EAAE,EAAC;QAC/B,sDAA0B,uBAAA,IAAI,6DAA0B,EAAC;QACzD,yDAAmB;QACnB,iDAAsB,KAAK,EAAC;QAC5B,0BAAqB,GAAW,CAAC,CAAC;QA2B1B,mBAAc,GAAY,KAAK,CAAC;QAChC,oBAAe,GAAY,KAAK,CAAC;QAzBvC,uBAAA,IAAI,qCAAS,IAAI,MAAA,CAAC;QAClB,uBAAA,IAAI,oDAAwB,mBAAmB,MAAA,CAAC;QAChD,uBAAA,IAAI,yCAAa,QAAQ,MAAA,CAAC;QAC1B,uBAAA,IAAI,0CAAc,SAAS,MAAA,CAAC;IAC9B,CAAC;IAED,KAAK;QACH,IAAI,CAAC,uBAAA,IAAI,8CAAW,EAAE,CAAC;YACrB,uBAAA,IAAI,0CAAc,IAAI,MAAA,CAAC;YACvB,IAAI,CAAC,mBAAmB,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;gBACzC,GAAG,EAAE,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC,CAAC;gBAChD,uBAAA,IAAI,0CAAc,KAAK,MAAA,CAAC;YAC1B,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,SAAS,CAAC,KAAiB;QACzB,mKAA4B,KAAK,CAAC,iBAAiB,GAAG,KAAK,CAAC,UAAU,MAAA,CAAC;IACzE,CAAC;IAED,QAAQ,CAAC,IAAY;QACnB,qJAAqB,IAAI,MAAA,CAAC;IAC5B,CAAC;IAKD,gBAAgB;QACd,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC;QAC3B,IAAI,CAAC,0BAA0B,EAAE,CAAC;IACpC,CAAC;IAED,iBAAiB;QACf,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,0BAA0B,EAAE,CAAC;IACpC,CAAC;IAEO,0BAA0B;QAChC,IAAI,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YAChD,MAAM,cAAc,GAAG,uBAAA,IAAI,uDAAoB,CAAC;YAChD,IAAI,cAAc,GAAG,CAAC,IAAI,uBAAA,IAAI,gDAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvD,uBAAA,IAAI,+CAAmB,uBAAA,IAAI,gDAAa,CAAC,MAAM,GAAG,cAAc,MAAA,CAAC;YACnE,CAAC;QACH,CAAC;IACH,CAAC;IAEO,oBAAoB;QAC1B,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,uBAAA,IAAI,mDAAgB,EAAE,MAAM,CAAC,EAAE,GAAG,CAAC,CAAC;IACnE,CAAC;IAEO,KAAK,CAAC,mBAAmB;QAC/B,uBAAA,IAAI,0CAAc,IAAI,MAAA,CAAC;QACvB,IAAI,aAAa,GAAG,IAAI,CAAC,oBAAoB,EAAE,CAAC;QAChD,IAAI,UAAU,GAAG,KAAK,CAAC;QACvB,OAAO,uBAAA,IAAI,8CAAW,IAAI,CAAC,UAAU,EAAE,CAAC;YACtC,mKAA4B,aAAa,MAAA,CAAC;YAC1C,IAAI,CAAC,qBAAqB,GAAG,IAAI,CAAC,KAAK,CAAC,uBAAA,IAAI,uDAAoB,GAAG,uBAAA,IAAI,mDAAgB,CAAC,CAAC;YACzF,UAAU,GAAG,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,qBAAqB,IAAI,uBAAA,IAAI,gDAAa,CAAC,MAAM,CAAC;YAC3F,MAAM,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC,CAAC;YACvC,IAAI,uBAAA,IAAI,8CAAW,IAAI,CAAC,UAAU,EAAE,CAAC;gBACnC,aAAa,GAAG,IAAI,CAAC,oBAAoB,EAAE,CAAC;gBAC5C,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI,CAAC,CAAC,CAAC;YAC5E,CAAC;QACH,CAAC;QAED,IAAI,uBAAA,IAAI,8CAAW,EAAE,CAAC;YACpB,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,oBAAoB,CAAC,KAAc;;QAC/C,MAAM,aAAa,GAAG,uBAAA,IAAI,gDAAa,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAC7E,MAAM,CAAA,MAAA,uBAAA,IAAI,yCAAM,CAAC,gBAAgB,0CAAE,oBAAoB,CAAC;YACtD,mBAAmB,EAAE,uBAAA,IAAI,wDAAqB;YAC9C,QAAQ,EAAE,uBAAA,IAAI,6CAAU;YACxB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,aAAa;oBACnB,KAAK,EAAE,KAAK;oBACZ,EAAE,EAAE,uBAAA,IAAI,8CAAW;oBACnB,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;oBACpB,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;oBAClB,QAAQ,EAAE,EAAE;iBACb;aACF;SACF,CAAC,CAAA,CAAC;IACL,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,SAAkB;QAC5B,uBAAA,IAAI,0CAAc,KAAK,MAAA,CAAC;QAExB,mCAAmC;QACnC,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,IAAI,CAAC,qBAAqB,GAAG,uBAAA,IAAI,gDAAa,CAAC,MAAM,CAAC;QACxD,CAAC;QACD,MAAM,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;IACxC,CAAC;CACF"}
package/package.json ADDED
@@ -0,0 +1,25 @@
1
+ {
2
+ "name": "@livekit/agents-plugin-openai",
3
+ "version": "0.2.0",
4
+ "description": "OpenAI plugin for LiveKit Node Agents",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "author": "LiveKit",
8
+ "type": "module",
9
+ "devDependencies": {
10
+ "@microsoft/api-extractor": "^7.35.0",
11
+ "@types/ws": "^8.5.10",
12
+ "typescript": "^5.0.0"
13
+ },
14
+ "dependencies": {
15
+ "@livekit/rtc-node": "^0.8.1",
16
+ "ws": "^8.16.0",
17
+ "@livekit/agents": "0.2.0"
18
+ },
19
+ "scripts": {
20
+ "build": "tsc",
21
+ "lint": "eslint -f unix \"src/**/*.{ts,js}\"",
22
+ "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
23
+ "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
24
+ }
25
+ }
package/src/index.ts ADDED
@@ -0,0 +1,5 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ export * from './omni_assistant/index.js';
@@ -0,0 +1,127 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { AudioByteStream } from '@livekit/agents';
5
+ import { Queue } from '@livekit/agents';
6
+ import { AudioFrame, type AudioSource } from '@livekit/rtc-node';
7
+ import { EventEmitter } from 'events';
8
+ import * as proto from './proto.js';
9
+ import type { TranscriptionForwarder } from './transcription_forwarder';
10
+
11
+ export class AgentPlayout {
12
+ #audioSource: AudioSource;
13
+ #currentPlayoutHandle: PlayoutHandle | null;
14
+ #currentPlayoutTask: Promise<void> | null;
15
+
16
+ constructor(audioSource: AudioSource) {
17
+ this.#audioSource = audioSource;
18
+ this.#currentPlayoutHandle = null;
19
+ this.#currentPlayoutTask = null;
20
+ }
21
+
22
+ play(messageId: string, transcriptionFwd: TranscriptionForwarder): PlayoutHandle {
23
+ if (this.#currentPlayoutHandle) {
24
+ this.#currentPlayoutHandle.interrupt();
25
+ }
26
+ this.#currentPlayoutHandle = new PlayoutHandle(messageId, transcriptionFwd);
27
+ this.#currentPlayoutTask = this.playoutTask(
28
+ this.#currentPlayoutTask,
29
+ this.#currentPlayoutHandle,
30
+ );
31
+ return this.#currentPlayoutHandle;
32
+ }
33
+
34
+ private async playoutTask(oldTask: Promise<void> | null, handle: PlayoutHandle): Promise<void> {
35
+ let firstFrame = true;
36
+ try {
37
+ const bstream = new AudioByteStream(
38
+ proto.SAMPLE_RATE,
39
+ proto.NUM_CHANNELS,
40
+ proto.OUTPUT_PCM_FRAME_SIZE,
41
+ );
42
+
43
+ while (!handle.interrupted) {
44
+ const frame = await handle.playoutQueue.get();
45
+ if (frame === null) break;
46
+ if (firstFrame) {
47
+ handle.transcriptionFwd.start();
48
+ firstFrame = false;
49
+ }
50
+
51
+ for (const f of bstream.write(frame.data.buffer)) {
52
+ handle.playedAudioSamples += f.samplesPerChannel;
53
+ if (handle.interrupted) break;
54
+
55
+ await this.#audioSource.captureFrame(f);
56
+ }
57
+ }
58
+
59
+ if (!handle.interrupted) {
60
+ for (const f of bstream.flush()) {
61
+ await this.#audioSource.captureFrame(f);
62
+ }
63
+ }
64
+ } finally {
65
+ if (!firstFrame && !handle.interrupted) {
66
+ handle.transcriptionFwd.markTextComplete();
67
+ }
68
+ await handle.transcriptionFwd.close(handle.interrupted);
69
+ handle.complete();
70
+ }
71
+ }
72
+ }
73
+
74
+ export class PlayoutHandle extends EventEmitter {
75
+ messageId: string;
76
+ transcriptionFwd: TranscriptionForwarder;
77
+ playedAudioSamples: number;
78
+ done: boolean;
79
+ interrupted: boolean;
80
+ playoutQueue: Queue<AudioFrame | null>;
81
+
82
+ constructor(messageId: string, transcriptionFwd: TranscriptionForwarder) {
83
+ super();
84
+ this.messageId = messageId;
85
+ this.transcriptionFwd = transcriptionFwd;
86
+ this.playedAudioSamples = 0;
87
+ this.done = false;
88
+ this.interrupted = false;
89
+ this.playoutQueue = new Queue<AudioFrame | null>();
90
+ }
91
+
92
+ pushAudio(data: Uint8Array) {
93
+ const frame = new AudioFrame(
94
+ new Int16Array(data.buffer),
95
+ proto.SAMPLE_RATE,
96
+ proto.NUM_CHANNELS,
97
+ data.length / 2,
98
+ );
99
+ this.transcriptionFwd.pushAudio(frame);
100
+ this.playoutQueue.put(frame);
101
+ }
102
+
103
+ pushText(text: string) {
104
+ this.transcriptionFwd.pushText(text);
105
+ }
106
+
107
+ endInput() {
108
+ this.transcriptionFwd.markAudioComplete();
109
+ this.transcriptionFwd.markTextComplete();
110
+ this.playoutQueue.put(null);
111
+ }
112
+
113
+ interrupt() {
114
+ if (this.done) return;
115
+ this.interrupted = true;
116
+ }
117
+
118
+ publishedTextChars(): number {
119
+ return this.transcriptionFwd.currentCharacterIndex;
120
+ }
121
+
122
+ complete() {
123
+ if (this.done) return;
124
+ this.done = true;
125
+ this.emit('complete', this.interrupted);
126
+ }
127
+ }