@livekit/agents-plugin-phonic 1.0.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,431 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var realtime_model_exports = {};
20
+ __export(realtime_model_exports, {
21
+ RealtimeModel: () => RealtimeModel,
22
+ RealtimeSession: () => RealtimeSession
23
+ });
24
+ module.exports = __toCommonJS(realtime_model_exports);
25
+ var import_agents = require("@livekit/agents");
26
+ var import_rtc_node = require("@livekit/rtc-node");
27
+ var import_phonic = require("phonic");
28
+ const PHONIC_INPUT_SAMPLE_RATE = 44100;
29
+ const PHONIC_OUTPUT_SAMPLE_RATE = 44100;
30
+ const PHONIC_NUM_CHANNELS = 1;
31
+ const PHONIC_INPUT_FRAME_MS = 20;
32
+ const DEFAULT_MODEL = "merritt";
33
+ const WS_CLOSE_NORMAL = 1e3;
34
+ class RealtimeModel extends import_agents.llm.RealtimeModel {
35
+ /** @internal */
36
+ _options;
37
+ get model() {
38
+ return this._options.model;
39
+ }
40
+ constructor(options = {}) {
41
+ super({
42
+ messageTruncation: false,
43
+ turnDetection: true,
44
+ userTranscription: true,
45
+ // TODO @Phonic-Co: Implement tool support
46
+ // Phonic has automatic tool reply generation, but tools are not supported with LiveKit Agents yet.
47
+ autoToolReplyGeneration: true,
48
+ audioOutput: true
49
+ });
50
+ const apiKey = options.apiKey || process.env.PHONIC_API_KEY;
51
+ if (!apiKey) {
52
+ throw new Error("Phonic API key is required. Provide apiKey or set PHONIC_API_KEY.");
53
+ }
54
+ this._options = {
55
+ apiKey,
56
+ voice: options.voice,
57
+ phonicAgent: options.phonicAgent,
58
+ project: options.project,
59
+ welcomeMessage: options.welcomeMessage,
60
+ generateWelcomeMessage: options.generateWelcomeMessage,
61
+ languages: options.languages,
62
+ audioSpeed: options.audioSpeed,
63
+ phonicTools: options.phonicTools,
64
+ boostedKeywords: options.boostedKeywords,
65
+ generateNoInputPokeText: options.generateNoInputPokeText,
66
+ noInputPokeSec: options.noInputPokeSec,
67
+ noInputPokeText: options.noInputPokeText,
68
+ noInputEndConversationSec: options.noInputEndConversationSec,
69
+ connOptions: options.connOptions ?? import_agents.DEFAULT_API_CONNECT_OPTIONS,
70
+ model: options.model ?? DEFAULT_MODEL,
71
+ baseUrl: options.baseUrl
72
+ };
73
+ }
74
+ /**
75
+ * Create a new realtime session
76
+ */
77
+ session() {
78
+ return new RealtimeSession(this);
79
+ }
80
+ async close() {
81
+ }
82
+ }
83
+ class RealtimeSession extends import_agents.llm.RealtimeSession {
84
+ _tools = {};
85
+ _chatCtx = import_agents.llm.ChatContext.empty();
86
+ options;
87
+ bstream;
88
+ inputResampler;
89
+ inputResamplerInputRate;
90
+ currentGeneration;
91
+ conversationId;
92
+ client;
93
+ socket;
94
+ logger = (0, import_agents.log)();
95
+ closed = false;
96
+ configSent = false;
97
+ instructionsReady;
98
+ resolveInstructionsReady;
99
+ connectTask;
100
+ constructor(realtimeModel) {
101
+ super(realtimeModel);
102
+ this.options = realtimeModel._options;
103
+ this.resolveInstructionsReady = () => {
104
+ };
105
+ this.instructionsReady = new Promise((resolve) => {
106
+ this.resolveInstructionsReady = resolve;
107
+ });
108
+ this.client = new import_phonic.PhonicClient({
109
+ apiKey: this.options.apiKey,
110
+ baseUrl: this.options.baseUrl
111
+ });
112
+ this.bstream = new import_agents.AudioByteStream(
113
+ PHONIC_INPUT_SAMPLE_RATE,
114
+ PHONIC_NUM_CHANNELS,
115
+ PHONIC_INPUT_SAMPLE_RATE * PHONIC_INPUT_FRAME_MS / 1e3
116
+ );
117
+ this.connectTask = this.connect().catch((error) => {
118
+ const normalizedError = error instanceof Error ? error : new Error(String(error));
119
+ this.emitError(normalizedError, false);
120
+ });
121
+ }
122
+ get chatCtx() {
123
+ return this._chatCtx.copy();
124
+ }
125
+ get tools() {
126
+ return { ...this._tools };
127
+ }
128
+ async updateInstructions(instructions) {
129
+ if (this.configSent) {
130
+ this.logger.warn(
131
+ "updateInstructions called after config was already sent. Phonic does not support updating instructions mid-session."
132
+ );
133
+ return;
134
+ }
135
+ this.options.instructions = instructions;
136
+ this.resolveInstructionsReady();
137
+ }
138
+ async updateChatCtx(_chatCtx) {
139
+ this.logger.warn("updateChatCtx is not supported by the Phonic realtime model.");
140
+ }
141
+ async updateTools(tools) {
142
+ if (Object.keys(tools).length > 0) {
143
+ this.logger.warn("Tool use is not supported by the Phonic realtime model.");
144
+ }
145
+ }
146
+ updateOptions(_options) {
147
+ this.logger.warn("updateOptions is not supported by the Phonic realtime model.");
148
+ }
149
+ pushAudio(frame) {
150
+ if (this.closed) {
151
+ return;
152
+ }
153
+ for (const resampledFrame of this.resampleAudio(frame)) {
154
+ for (const chunk of this.bstream.write(resampledFrame.data.buffer)) {
155
+ const bytes = Buffer.from(chunk.data.buffer, chunk.data.byteOffset, chunk.data.byteLength);
156
+ const payload = {
157
+ type: "audio_chunk",
158
+ audio: bytes.toString("base64")
159
+ };
160
+ if (!this.socket) {
161
+ continue;
162
+ }
163
+ this.socket.sendAudioChunk(payload);
164
+ }
165
+ }
166
+ }
167
+ // TODO @Phonic-Co: Implement generateReply
168
+ async generateReply(_instructions) {
169
+ throw new Error(
170
+ "generateReply is not yet supported by the Phonic realtime model. Consider using `welcomeMessage` instead."
171
+ );
172
+ }
173
+ async commitAudio() {
174
+ this.logger.warn("commitAudio is not supported by the Phonic realtime model.");
175
+ }
176
+ async clearAudio() {
177
+ this.logger.warn("clearAudio is not supported by the Phonic realtime model.");
178
+ }
179
+ async interrupt() {
180
+ this.logger.warn("interrupt is not supported by the Phonic realtime model.");
181
+ }
182
+ async truncate(_options) {
183
+ this.logger.warn("truncate is not supported by the Phonic realtime model.");
184
+ }
185
+ async close() {
186
+ var _a;
187
+ this.closed = true;
188
+ this.resolveInstructionsReady();
189
+ this.closeCurrentGeneration({ interrupted: false });
190
+ (_a = this.socket) == null ? void 0 : _a.close();
191
+ await this.connectTask;
192
+ await super.close();
193
+ }
194
+ async connect() {
195
+ this.socket = await this.client.conversations.connect({
196
+ reconnectAttempts: this.options.connOptions.maxRetry
197
+ });
198
+ if (this.closed) {
199
+ this.socket.close();
200
+ return;
201
+ }
202
+ this.socket.on(
203
+ "message",
204
+ (message) => this.handleServerMessage(message)
205
+ );
206
+ this.socket.on("error", (error) => this.emitError(error, false));
207
+ this.socket.on("close", (event) => {
208
+ this.closeCurrentGeneration({ interrupted: false });
209
+ if (!this.closed && event.code !== WS_CLOSE_NORMAL) {
210
+ this.emitError(new Error(`Phonic STS socket closed with code ${event.code ?? -1}`), false);
211
+ }
212
+ });
213
+ await this.socket.waitForOpen();
214
+ await this.instructionsReady;
215
+ if (this.closed) return;
216
+ this.configSent = true;
217
+ this.socket.sendConfig({
218
+ type: "config",
219
+ model: this.options.model,
220
+ agent: this.options.phonicAgent,
221
+ project: this.options.project,
222
+ welcome_message: this.options.welcomeMessage,
223
+ generate_welcome_message: this.options.generateWelcomeMessage,
224
+ system_prompt: this.options.instructions,
225
+ voice_id: this.options.voice,
226
+ input_format: "pcm_44100",
227
+ output_format: "pcm_44100",
228
+ recognized_languages: this.options.languages,
229
+ audio_speed: this.options.audioSpeed,
230
+ tools: this.options.phonicTools,
231
+ boosted_keywords: this.options.boostedKeywords,
232
+ generate_no_input_poke_text: this.options.generateNoInputPokeText,
233
+ no_input_poke_sec: this.options.noInputPokeSec,
234
+ no_input_poke_text: this.options.noInputPokeText,
235
+ no_input_end_conversation_sec: this.options.noInputEndConversationSec
236
+ });
237
+ }
238
+ handleServerMessage(message) {
239
+ if (this.closed) {
240
+ return;
241
+ }
242
+ switch (message.type) {
243
+ case "assistant_started_speaking":
244
+ this.startNewAssistantTurn();
245
+ break;
246
+ case "assistant_finished_speaking":
247
+ this.finishAssistantTurn();
248
+ break;
249
+ case "audio_chunk":
250
+ this.handleAudioChunk(message);
251
+ break;
252
+ case "input_text":
253
+ this.handleInputText(message);
254
+ break;
255
+ case "user_started_speaking":
256
+ this.handleInputSpeechStarted();
257
+ break;
258
+ case "user_finished_speaking":
259
+ this.handleInputSpeechStopped();
260
+ break;
261
+ case "error":
262
+ this.emitError(new Error(message.error.message), false);
263
+ break;
264
+ case "tool_call":
265
+ this.emitError(
266
+ new Error(
267
+ `WebSocket tool calls are not yet supported by the Phonic realtime model with LiveKit Agents.`
268
+ ),
269
+ false
270
+ );
271
+ break;
272
+ case "assistant_ended_conversation":
273
+ this.emitError(
274
+ new Error(
275
+ "assistant_ended_conversation is not supported by the Phonic realtime model with LiveKit Agents."
276
+ ),
277
+ false
278
+ );
279
+ break;
280
+ case "conversation_created":
281
+ this.conversationId = message.conversation_id;
282
+ this.logger.info(`Phonic Conversation began with ID: ${this.conversationId}`);
283
+ break;
284
+ case "assistant_chose_not_to_respond":
285
+ case "ready_to_start_conversation":
286
+ case "input_cancelled":
287
+ case "tool_call_output_processed":
288
+ case "tool_call_interrupted":
289
+ case "dtmf":
290
+ default:
291
+ break;
292
+ }
293
+ }
294
+ handleAudioChunk(message) {
295
+ const gen = this.currentGeneration;
296
+ if (!gen) return;
297
+ if (message.text) {
298
+ gen.outputText += message.text;
299
+ gen.textChannel.write(message.text);
300
+ }
301
+ if (message.audio) {
302
+ const bytes = Buffer.from(message.audio, "base64");
303
+ const sampleCount = Math.floor(bytes.byteLength / Int16Array.BYTES_PER_ELEMENT);
304
+ if (sampleCount > 0) {
305
+ const pcm = new Int16Array(
306
+ bytes.buffer.slice(
307
+ bytes.byteOffset,
308
+ bytes.byteOffset + sampleCount * Int16Array.BYTES_PER_ELEMENT
309
+ )
310
+ );
311
+ const frame = new import_rtc_node.AudioFrame(
312
+ pcm,
313
+ PHONIC_OUTPUT_SAMPLE_RATE,
314
+ PHONIC_NUM_CHANNELS,
315
+ sampleCount / PHONIC_NUM_CHANNELS
316
+ );
317
+ gen.audioChannel.write(frame);
318
+ }
319
+ }
320
+ }
321
+ handleInputText(message) {
322
+ const itemId = (0, import_agents.shortuuid)("PI_");
323
+ this.emit("input_audio_transcription_completed", {
324
+ itemId,
325
+ transcript: message.text,
326
+ isFinal: true
327
+ });
328
+ this._chatCtx.addMessage({
329
+ role: "user",
330
+ content: message.text,
331
+ id: itemId
332
+ });
333
+ }
334
+ handleInputSpeechStarted() {
335
+ this.emit("input_speech_started", {});
336
+ this.closeCurrentGeneration({ interrupted: true });
337
+ }
338
+ handleInputSpeechStopped() {
339
+ this.emit("input_speech_stopped", {
340
+ userTranscriptionEnabled: true
341
+ });
342
+ }
343
+ startNewAssistantTurn() {
344
+ if (this.currentGeneration) {
345
+ this.closeCurrentGeneration({ interrupted: true });
346
+ }
347
+ const responseId = (0, import_agents.shortuuid)("PS_");
348
+ const textChannel = import_agents.stream.createStreamChannel();
349
+ const audioChannel = import_agents.stream.createStreamChannel();
350
+ const functionChannel = import_agents.stream.createStreamChannel();
351
+ const messageChannel = import_agents.stream.createStreamChannel();
352
+ messageChannel.write({
353
+ messageId: responseId,
354
+ textStream: textChannel.stream(),
355
+ audioStream: audioChannel.stream(),
356
+ modalities: Promise.resolve(["audio", "text"])
357
+ });
358
+ this.currentGeneration = {
359
+ responseId,
360
+ messageChannel,
361
+ functionChannel,
362
+ textChannel,
363
+ audioChannel,
364
+ outputText: ""
365
+ };
366
+ this.emit("generation_created", {
367
+ messageStream: messageChannel.stream(),
368
+ functionStream: functionChannel.stream(),
369
+ userInitiated: false,
370
+ responseId
371
+ });
372
+ }
373
+ finishAssistantTurn() {
374
+ this.closeCurrentGeneration({ interrupted: false });
375
+ }
376
+ closeCurrentGeneration({ interrupted }) {
377
+ const gen = this.currentGeneration;
378
+ if (!gen) return;
379
+ if (gen.outputText) {
380
+ this._chatCtx.addMessage({
381
+ role: "assistant",
382
+ content: gen.outputText,
383
+ id: gen.responseId,
384
+ interrupted
385
+ });
386
+ }
387
+ gen.textChannel.close();
388
+ gen.audioChannel.close();
389
+ gen.functionChannel.close();
390
+ gen.messageChannel.close();
391
+ this.currentGeneration = void 0;
392
+ }
393
+ emitError(error, recoverable) {
394
+ this.emit("error", {
395
+ timestamp: Date.now(),
396
+ label: "phonic_realtime",
397
+ type: "realtime_model_error",
398
+ error,
399
+ recoverable
400
+ });
401
+ }
402
+ *resampleAudio(frame) {
403
+ if (this.inputResampler) {
404
+ if (frame.sampleRate !== this.inputResamplerInputRate) {
405
+ this.inputResampler = void 0;
406
+ this.inputResamplerInputRate = void 0;
407
+ }
408
+ }
409
+ if (this.inputResampler === void 0 && (frame.sampleRate !== PHONIC_INPUT_SAMPLE_RATE || frame.channels !== PHONIC_NUM_CHANNELS)) {
410
+ this.inputResampler = new import_rtc_node.AudioResampler(
411
+ frame.sampleRate,
412
+ PHONIC_INPUT_SAMPLE_RATE,
413
+ PHONIC_NUM_CHANNELS
414
+ );
415
+ this.inputResamplerInputRate = frame.sampleRate;
416
+ }
417
+ if (this.inputResampler) {
418
+ for (const resampledFrame of this.inputResampler.push(frame)) {
419
+ yield resampledFrame;
420
+ }
421
+ } else {
422
+ yield frame;
423
+ }
424
+ }
425
+ }
426
+ // Annotate the CommonJS export names for ESM import in node:
427
+ 0 && (module.exports = {
428
+ RealtimeModel,
429
+ RealtimeSession
430
+ });
431
+ //# sourceMappingURL=realtime_model.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/realtime/realtime_model.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { APIConnectOptions } from '@livekit/agents';\nimport {\n AudioByteStream,\n DEFAULT_API_CONNECT_OPTIONS,\n llm,\n log,\n shortuuid,\n stream,\n} from '@livekit/agents';\nimport { AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { Phonic } from 'phonic';\nimport { PhonicClient } from 'phonic';\nimport type { ServerEvent, Voice } from './api_proto.js';\n\nconst PHONIC_INPUT_SAMPLE_RATE = 44100;\nconst PHONIC_OUTPUT_SAMPLE_RATE = 44100;\nconst PHONIC_NUM_CHANNELS = 1;\nconst PHONIC_INPUT_FRAME_MS = 20;\nconst DEFAULT_MODEL = 'merritt';\nconst WS_CLOSE_NORMAL = 1000;\n\nexport interface RealtimeModelOptions {\n apiKey: string;\n model: string;\n phonicAgent?: string;\n voice?: Voice | string;\n welcomeMessage?: string;\n generateWelcomeMessage?: boolean;\n project?: string;\n connOptions: APIConnectOptions;\n baseUrl?: string;\n languages?: string[];\n audioSpeed?: number;\n phonicTools?: string[];\n boostedKeywords?: string[];\n generateNoInputPokeText?: boolean;\n noInputPokeSec?: number;\n noInputPokeText?: string;\n noInputEndConversationSec?: number;\n /** Set by `updateInstructions` via `voice.Agent` rather than the RealtimeModel constructor */\n instructions?: string;\n}\n\nexport class RealtimeModel extends llm.RealtimeModel {\n /** @internal */\n _options: RealtimeModelOptions;\n\n get model(): string {\n return this._options.model;\n }\n\n constructor(\n options: {\n /**\n * Phonic API key. If not provided, will attempt to read from PHONIC_API_KEY environment variable\n */\n apiKey?: string;\n /**\n * The name of the model to use. Defaults to 'merritt'\n */\n model?: Phonic.ConfigPayload['model'] | string;\n /**\n * Phonic agent to use for the conversation. Options explicitly set here will override the agent settings.\n */\n phonicAgent?: string;\n /**\n * Voice ID for agent outputs\n */\n voice?: Voice;\n /**\n * Welcome message for the agent to say when the conversation starts. Ignored when generateWelcomeMessage is true\n */\n welcomeMessage?: string;\n /**\n * When true, the welcome message will be automatically generated and welcomeMessage will be ignored\n */\n generateWelcomeMessage?: boolean;\n /**\n * Project name to use for the conversation. Defaults to `main`\n */\n project?: string;\n /**\n * ISO 639-1 language codes the agent should recognize and speak\n */\n languages?: string[];\n /**\n * Audio playback speed\n */\n audioSpeed?: number;\n /**\n * Phonic tool names available to the assistant\n */\n phonicTools?: string[];\n /**\n * Keywords to boost in speech recognition\n */\n boostedKeywords?: string[];\n /**\n * Auto-generate poke text when user is silent\n */\n generateNoInputPokeText?: boolean;\n /**\n * Seconds of silence before sending poke message\n */\n noInputPokeSec?: number;\n /**\n * Poke message text (ignored when generateNoInputPokeText is true)\n */\n noInputPokeText?: string;\n /**\n * Seconds of silence before ending conversation\n */\n noInputEndConversationSec?: number;\n /**\n * Connection options for the API connection\n */\n connOptions?: APIConnectOptions;\n baseUrl?: string;\n } = {},\n ) {\n super({\n messageTruncation: false,\n turnDetection: true,\n userTranscription: true,\n // TODO @Phonic-Co: Implement tool support\n // Phonic has automatic tool reply generation, but tools are not supported with LiveKit Agents yet.\n autoToolReplyGeneration: true,\n audioOutput: true,\n });\n\n const apiKey = options.apiKey || process.env.PHONIC_API_KEY;\n if (!apiKey) {\n throw new Error('Phonic API key is required. Provide apiKey or set PHONIC_API_KEY.');\n }\n\n this._options = {\n apiKey,\n voice: options.voice,\n phonicAgent: options.phonicAgent,\n project: options.project,\n welcomeMessage: options.welcomeMessage,\n generateWelcomeMessage: options.generateWelcomeMessage,\n languages: options.languages,\n audioSpeed: options.audioSpeed,\n phonicTools: options.phonicTools,\n boostedKeywords: options.boostedKeywords,\n generateNoInputPokeText: options.generateNoInputPokeText,\n noInputPokeSec: options.noInputPokeSec,\n noInputPokeText: options.noInputPokeText,\n noInputEndConversationSec: options.noInputEndConversationSec,\n connOptions: options.connOptions ?? DEFAULT_API_CONNECT_OPTIONS,\n model: options.model ?? DEFAULT_MODEL,\n baseUrl: options.baseUrl,\n };\n }\n\n /**\n * Create a new realtime session\n */\n session(): RealtimeSession {\n return new RealtimeSession(this);\n }\n\n async close(): Promise<void> {}\n}\n\ninterface GenerationState {\n responseId: string;\n messageChannel: stream.StreamChannel<llm.MessageGeneration>;\n functionChannel: stream.StreamChannel<llm.FunctionCall>;\n textChannel: stream.StreamChannel<string>;\n audioChannel: stream.StreamChannel<AudioFrame>;\n outputText: string;\n}\n\n/**\n * Realtime session for Phonic (https://docs.phonic.co/)\n */\nexport class RealtimeSession extends llm.RealtimeSession {\n private _tools: llm.ToolContext = {};\n private _chatCtx = llm.ChatContext.empty();\n\n private options: RealtimeModelOptions;\n private bstream: AudioByteStream;\n private inputResampler?: AudioResampler;\n private inputResamplerInputRate?: number;\n\n private currentGeneration?: GenerationState;\n private conversationId?: string;\n\n private client: PhonicClient;\n private socket?: Awaited<ReturnType<PhonicClient['conversations']['connect']>>;\n private logger = log();\n private closed = false;\n private configSent = false;\n private instructionsReady: Promise<void>;\n private resolveInstructionsReady: () => void;\n private connectTask: Promise<void>;\n\n constructor(realtimeModel: RealtimeModel) {\n super(realtimeModel);\n this.options = realtimeModel._options;\n\n this.resolveInstructionsReady = () => {};\n this.instructionsReady = new Promise<void>((resolve) => {\n this.resolveInstructionsReady = resolve;\n });\n\n this.client = new PhonicClient({\n apiKey: this.options.apiKey,\n baseUrl: this.options.baseUrl,\n });\n this.bstream = new AudioByteStream(\n PHONIC_INPUT_SAMPLE_RATE,\n PHONIC_NUM_CHANNELS,\n (PHONIC_INPUT_SAMPLE_RATE * PHONIC_INPUT_FRAME_MS) / 1000,\n );\n this.connectTask = this.connect().catch((error: unknown) => {\n const normalizedError = error instanceof Error ? error : new Error(String(error));\n this.emitError(normalizedError, false);\n });\n }\n\n get chatCtx(): llm.ChatContext {\n return this._chatCtx.copy();\n }\n\n get tools(): llm.ToolContext {\n return { ...this._tools };\n }\n\n async updateInstructions(instructions: string): Promise<void> {\n if (this.configSent) {\n this.logger.warn(\n 'updateInstructions called after config was already sent. Phonic does not support updating instructions mid-session.',\n );\n return;\n }\n this.options.instructions = instructions;\n this.resolveInstructionsReady();\n }\n\n async updateChatCtx(_chatCtx: llm.ChatContext): Promise<void> {\n this.logger.warn('updateChatCtx is not supported by the Phonic realtime model.');\n }\n\n async updateTools(tools: llm.ToolContext): Promise<void> {\n if (Object.keys(tools).length > 0) {\n this.logger.warn('Tool use is not supported by the Phonic realtime model.');\n }\n }\n\n updateOptions(_options: { toolChoice?: llm.ToolChoice | null }): void {\n this.logger.warn('updateOptions is not supported by the Phonic realtime model.');\n }\n\n pushAudio(frame: AudioFrame): void {\n if (this.closed) {\n return;\n }\n\n for (const resampledFrame of this.resampleAudio(frame)) {\n for (const chunk of this.bstream.write(resampledFrame.data.buffer as ArrayBuffer)) {\n const bytes = Buffer.from(chunk.data.buffer, chunk.data.byteOffset, chunk.data.byteLength);\n const payload: Phonic.AudioChunkPayload = {\n type: 'audio_chunk',\n audio: bytes.toString('base64'),\n };\n\n if (!this.socket) {\n continue;\n }\n this.socket.sendAudioChunk(payload);\n }\n }\n }\n\n // TODO @Phonic-Co: Implement generateReply\n async generateReply(_instructions?: string): Promise<llm.GenerationCreatedEvent> {\n throw new Error(\n 'generateReply is not yet supported by the Phonic realtime model. Consider using `welcomeMessage` instead.',\n );\n }\n\n async commitAudio(): Promise<void> {\n this.logger.warn('commitAudio is not supported by the Phonic realtime model.');\n }\n async clearAudio(): Promise<void> {\n this.logger.warn('clearAudio is not supported by the Phonic realtime model.');\n }\n\n async interrupt(): Promise<void> {\n this.logger.warn('interrupt is not supported by the Phonic realtime model.');\n }\n\n async truncate(_options: { messageId: string; audioEndMs: number; audioTranscript?: string }) {\n this.logger.warn('truncate is not supported by the Phonic realtime model.');\n }\n\n async close(): Promise<void> {\n this.closed = true;\n this.resolveInstructionsReady();\n this.closeCurrentGeneration({ interrupted: false });\n this.socket?.close();\n await this.connectTask;\n await super.close();\n }\n\n private async connect(): Promise<void> {\n this.socket = await this.client.conversations.connect({\n reconnectAttempts: this.options.connOptions.maxRetry,\n });\n\n if (this.closed) {\n this.socket.close();\n return;\n }\n\n this.socket.on('message', (message: unknown) =>\n this.handleServerMessage(message as ServerEvent),\n );\n this.socket.on('error', (error: Error) => this.emitError(error, false));\n this.socket.on('close', (event: { code?: number }) => {\n this.closeCurrentGeneration({ interrupted: false });\n if (!this.closed && event.code !== WS_CLOSE_NORMAL) {\n this.emitError(new Error(`Phonic STS socket closed with code ${event.code ?? -1}`), false);\n }\n });\n\n await this.socket.waitForOpen();\n await this.instructionsReady;\n if (this.closed) return;\n this.configSent = true;\n this.socket.sendConfig({\n type: 'config',\n model: this.options.model as Phonic.ConfigPayload['model'],\n agent: this.options.phonicAgent,\n project: this.options.project,\n welcome_message: this.options.welcomeMessage,\n generate_welcome_message: this.options.generateWelcomeMessage,\n system_prompt: this.options.instructions,\n voice_id: this.options.voice,\n input_format: 'pcm_44100',\n output_format: 'pcm_44100',\n recognized_languages: this.options.languages,\n audio_speed: this.options.audioSpeed,\n tools: this.options.phonicTools,\n boosted_keywords: this.options.boostedKeywords,\n generate_no_input_poke_text: this.options.generateNoInputPokeText,\n no_input_poke_sec: this.options.noInputPokeSec,\n no_input_poke_text: this.options.noInputPokeText,\n no_input_end_conversation_sec: this.options.noInputEndConversationSec,\n });\n }\n\n private handleServerMessage(message: ServerEvent): void {\n if (this.closed) {\n return;\n }\n\n switch (message.type) {\n case 'assistant_started_speaking':\n this.startNewAssistantTurn();\n break;\n case 'assistant_finished_speaking':\n this.finishAssistantTurn();\n break;\n case 'audio_chunk':\n this.handleAudioChunk(message);\n break;\n case 'input_text':\n this.handleInputText(message);\n break;\n case 'user_started_speaking':\n this.handleInputSpeechStarted();\n break;\n case 'user_finished_speaking':\n this.handleInputSpeechStopped();\n break;\n case 'error':\n this.emitError(new Error(message.error.message), false);\n break;\n case 'tool_call':\n this.emitError(\n new Error(\n `WebSocket tool calls are not yet supported by the Phonic realtime model with LiveKit Agents.`,\n ),\n false,\n );\n break;\n case 'assistant_ended_conversation':\n this.emitError(\n new Error(\n 'assistant_ended_conversation is not supported by the Phonic realtime model with LiveKit Agents.',\n ),\n false,\n );\n break;\n case 'conversation_created':\n this.conversationId = message.conversation_id;\n this.logger.info(`Phonic Conversation began with ID: ${this.conversationId}`);\n break;\n case 'assistant_chose_not_to_respond':\n case 'ready_to_start_conversation':\n case 'input_cancelled':\n case 'tool_call_output_processed':\n case 'tool_call_interrupted':\n case 'dtmf':\n default:\n break;\n }\n }\n\n private handleAudioChunk(message: Phonic.AudioChunkResponsePayload): void {\n /**\n * Although Phonic sends audio chunks when the assistant is not speaking (i.e. containing silence or background noise),\n * we only process the chunks when the assistant is speaking to align with the generations model, whereby new streams are created for each turn.\n */\n const gen = this.currentGeneration;\n if (!gen) return;\n\n if (message.text) {\n gen.outputText += message.text;\n gen.textChannel.write(message.text);\n }\n\n if (message.audio) {\n const bytes = Buffer.from(message.audio, 'base64');\n const sampleCount = Math.floor(bytes.byteLength / Int16Array.BYTES_PER_ELEMENT);\n if (sampleCount > 0) {\n const pcm = new Int16Array(\n bytes.buffer.slice(\n bytes.byteOffset,\n bytes.byteOffset + sampleCount * Int16Array.BYTES_PER_ELEMENT,\n ),\n );\n const frame = new AudioFrame(\n pcm,\n PHONIC_OUTPUT_SAMPLE_RATE,\n PHONIC_NUM_CHANNELS,\n sampleCount / PHONIC_NUM_CHANNELS,\n );\n gen.audioChannel.write(frame);\n }\n }\n }\n\n private handleInputText(message: Phonic.InputTextPayload): void {\n const itemId = shortuuid('PI_');\n this.emit('input_audio_transcription_completed', {\n itemId,\n transcript: message.text,\n isFinal: true,\n });\n\n this._chatCtx.addMessage({\n role: 'user',\n content: message.text,\n id: itemId,\n });\n }\n\n private handleInputSpeechStarted(): void {\n this.emit('input_speech_started', {});\n this.closeCurrentGeneration({ interrupted: true });\n }\n\n private handleInputSpeechStopped(): void {\n this.emit('input_speech_stopped', {\n userTranscriptionEnabled: true,\n });\n }\n\n private startNewAssistantTurn(): void {\n if (this.currentGeneration) {\n this.closeCurrentGeneration({ interrupted: true });\n }\n\n const responseId = shortuuid('PS_');\n\n const textChannel = stream.createStreamChannel<string>();\n const audioChannel = stream.createStreamChannel<AudioFrame>();\n const functionChannel = stream.createStreamChannel<llm.FunctionCall>();\n const messageChannel = stream.createStreamChannel<llm.MessageGeneration>();\n\n messageChannel.write({\n messageId: responseId,\n textStream: textChannel.stream(),\n audioStream: audioChannel.stream(),\n modalities: Promise.resolve(['audio', 'text']),\n });\n\n this.currentGeneration = {\n responseId,\n messageChannel,\n functionChannel,\n textChannel,\n audioChannel,\n outputText: '',\n };\n\n this.emit('generation_created', {\n messageStream: messageChannel.stream(),\n functionStream: functionChannel.stream(),\n userInitiated: false,\n responseId,\n });\n }\n\n private finishAssistantTurn(): void {\n this.closeCurrentGeneration({ interrupted: false });\n }\n\n private closeCurrentGeneration({ interrupted }: { interrupted: boolean }): void {\n const gen = this.currentGeneration;\n if (!gen) return;\n\n if (gen.outputText) {\n this._chatCtx.addMessage({\n role: 'assistant',\n content: gen.outputText,\n id: gen.responseId,\n interrupted,\n });\n }\n\n gen.textChannel.close();\n gen.audioChannel.close();\n gen.functionChannel.close();\n gen.messageChannel.close();\n this.currentGeneration = undefined;\n }\n\n private emitError(error: Error, recoverable: boolean): void {\n this.emit('error', {\n timestamp: Date.now(),\n label: 'phonic_realtime',\n type: 'realtime_model_error',\n error,\n recoverable,\n } satisfies llm.RealtimeModelError);\n }\n\n private *resampleAudio(frame: AudioFrame): Generator<AudioFrame> {\n if (this.inputResampler) {\n if (frame.sampleRate !== this.inputResamplerInputRate) {\n this.inputResampler = undefined;\n this.inputResamplerInputRate = undefined;\n }\n }\n\n if (\n this.inputResampler === undefined &&\n (frame.sampleRate !== PHONIC_INPUT_SAMPLE_RATE || frame.channels !== PHONIC_NUM_CHANNELS)\n ) {\n this.inputResampler = new AudioResampler(\n frame.sampleRate,\n PHONIC_INPUT_SAMPLE_RATE,\n PHONIC_NUM_CHANNELS,\n );\n this.inputResamplerInputRate = frame.sampleRate;\n }\n\n if (this.inputResampler) {\n for (const resampledFrame of this.inputResampler.push(frame)) {\n yield resampledFrame;\n }\n } else {\n yield frame;\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,oBAOO;AACP,sBAA2C;AAE3C,oBAA6B;AAG7B,MAAM,2BAA2B;AACjC,MAAM,4BAA4B;AAClC,MAAM,sBAAsB;AAC5B,MAAM,wBAAwB;AAC9B,MAAM,gBAAgB;AACtB,MAAM,kBAAkB;AAwBjB,MAAM,sBAAsB,kBAAI,cAAc;AAAA;AAAA,EAEnD;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EAEA,YACE,UAkEI,CAAC,GACL;AACA,UAAM;AAAA,MACJ,mBAAmB;AAAA,MACnB,eAAe;AAAA,MACf,mBAAmB;AAAA;AAAA;AAAA,MAGnB,yBAAyB;AAAA,MACzB,aAAa;AAAA,IACf,CAAC;AAED,UAAM,SAAS,QAAQ,UAAU,QAAQ,IAAI;AAC7C,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI,MAAM,mEAAmE;AAAA,IACrF;AAEA,SAAK,WAAW;AAAA,MACd;AAAA,MACA,OAAO,QAAQ;AAAA,MACf,aAAa,QAAQ;AAAA,MACrB,SAAS,QAAQ;AAAA,MACjB,gBAAgB,QAAQ;AAAA,MACxB,wBAAwB,QAAQ;AAAA,MAChC,WAAW,QAAQ;AAAA,MACnB,YAAY,QAAQ;AAAA,MACpB,aAAa,QAAQ;AAAA,MACrB,iBAAiB,QAAQ;AAAA,MACzB,yBAAyB,QAAQ;AAAA,MACjC,gBAAgB,QAAQ;AAAA,MACxB,iBAAiB,QAAQ;AAAA,MACzB,2BAA2B,QAAQ;AAAA,MACnC,aAAa,QAAQ,eAAe;AAAA,MACpC,OAAO,QAAQ,SAAS;AAAA,MACxB,SAAS,QAAQ;AAAA,IACnB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,UAA2B;AACzB,WAAO,IAAI,gBAAgB,IAAI;AAAA,EACjC;AAAA,EAEA,MAAM,QAAuB;AAAA,EAAC;AAChC;AAcO,MAAM,wBAAwB,kBAAI,gBAAgB;AAAA,EAC/C,SAA0B,CAAC;AAAA,EAC3B,WAAW,kBAAI,YAAY,MAAM;AAAA,EAEjC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EACA,aAAS,mBAAI;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,eAA8B;AACxC,UAAM,aAAa;AACnB,SAAK,UAAU,cAAc;AAE7B,SAAK,2BAA2B,MAAM;AAAA,IAAC;AACvC,SAAK,oBAAoB,IAAI,QAAc,CAAC,YAAY;AACtD,WAAK,2BAA2B;AAAA,IAClC,CAAC;AAED,SAAK,SAAS,IAAI,2BAAa;AAAA,MAC7B,QAAQ,KAAK,QAAQ;AAAA,MACrB,SAAS,KAAK,QAAQ;AAAA,IACxB,CAAC;AACD,SAAK,UAAU,IAAI;AAAA,MACjB;AAAA,MACA;AAAA,MACC,2BAA2B,wBAAyB;AAAA,IACvD;AACA,SAAK,cAAc,KAAK,QAAQ,EAAE,MAAM,CAAC,UAAmB;AAC1D,YAAM,kBAAkB,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAChF,WAAK,UAAU,iBAAiB,KAAK;AAAA,IACvC,CAAC;AAAA,EACH;AAAA,EAEA,IAAI,UAA2B;AAC7B,WAAO,KAAK,SAAS,KAAK;AAAA,EAC5B;AAAA,EAEA,IAAI,QAAyB;AAC3B,WAAO,EAAE,GAAG,KAAK,OAAO;AAAA,EAC1B;AAAA,EAEA,MAAM,mBAAmB,cAAqC;AAC5D,QAAI,KAAK,YAAY;AACnB,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AACA;AAAA,IACF;AACA,SAAK,QAAQ,eAAe;AAC5B,SAAK,yBAAyB;AAAA,EAChC;AAAA,EAEA,MAAM,cAAc,UAA0C;AAC5D,SAAK,OAAO,KAAK,8DAA8D;AAAA,EACjF;AAAA,EAEA,MAAM,YAAY,OAAuC;AACvD,QAAI,OAAO,KAAK,KAAK,EAAE,SAAS,GAAG;AACjC,WAAK,OAAO,KAAK,yDAAyD;AAAA,IAC5E;AAAA,EACF;AAAA,EAEA,cAAc,UAAwD;AACpE,SAAK,OAAO,KAAK,8DAA8D;AAAA,EACjF;AAAA,EAEA,UAAU,OAAyB;AACjC,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AAEA,eAAW,kBAAkB,KAAK,cAAc,KAAK,GAAG;AACtD,iBAAW,SAAS,KAAK,QAAQ,MAAM,eAAe,KAAK,MAAqB,GAAG;AACjF,cAAM,QAAQ,OAAO,KAAK,MAAM,KAAK,QAAQ,MAAM,KAAK,YAAY,MAAM,KAAK,UAAU;AACzF,cAAM,UAAoC;AAAA,UACxC,MAAM;AAAA,UACN,OAAO,MAAM,SAAS,QAAQ;AAAA,QAChC;AAEA,YAAI,CAAC,KAAK,QAAQ;AAChB;AAAA,QACF;AACA,aAAK,OAAO,eAAe,OAAO;AAAA,MACpC;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,cAAc,eAA6D;AAC/E,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cAA6B;AACjC,SAAK,OAAO,KAAK,4DAA4D;AAAA,EAC/E;AAAA,EACA,MAAM,aAA4B;AAChC,SAAK,OAAO,KAAK,2DAA2D;AAAA,EAC9E;AAAA,EAEA,MAAM,YAA2B;AAC/B,SAAK,OAAO,KAAK,0DAA0D;AAAA,EAC7E;AAAA,EAEA,MAAM,SAAS,UAA+E;AAC5F,SAAK,OAAO,KAAK,yDAAyD;AAAA,EAC5E;AAAA,EAEA,MAAM,QAAuB;AA9S/B;AA+SI,SAAK,SAAS;AACd,SAAK,yBAAyB;AAC9B,SAAK,uBAAuB,EAAE,aAAa,MAAM,CAAC;AAClD,eAAK,WAAL,mBAAa;AACb,UAAM,KAAK;AACX,UAAM,MAAM,MAAM;AAAA,EACpB;AAAA,EAEA,MAAc,UAAyB;AACrC,SAAK,SAAS,MAAM,KAAK,OAAO,cAAc,QAAQ;AAAA,MACpD,mBAAmB,KAAK,QAAQ,YAAY;AAAA,IAC9C,CAAC;AAED,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,MAAM;AAClB;AAAA,IACF;AAEA,SAAK,OAAO;AAAA,MAAG;AAAA,MAAW,CAAC,YACzB,KAAK,oBAAoB,OAAsB;AAAA,IACjD;AACA,SAAK,OAAO,GAAG,SAAS,CAAC,UAAiB,KAAK,UAAU,OAAO,KAAK,CAAC;AACtE,SAAK,OAAO,GAAG,SAAS,CAAC,UAA6B;AACpD,WAAK,uBAAuB,EAAE,aAAa,MAAM,CAAC;AAClD,UAAI,CAAC,KAAK,UAAU,MAAM,SAAS,iBAAiB;AAClD,aAAK,UAAU,IAAI,MAAM,sCAAsC,MAAM,QAAQ,EAAE,EAAE,GAAG,KAAK;AAAA,MAC3F;AAAA,IACF,CAAC;AAED,UAAM,KAAK,OAAO,YAAY;AAC9B,UAAM,KAAK;AACX,QAAI,KAAK,OAAQ;AACjB,SAAK,aAAa;AAClB,SAAK,OAAO,WAAW;AAAA,MACrB,MAAM;AAAA,MACN,OAAO,KAAK,QAAQ;AAAA,MACpB,OAAO,KAAK,QAAQ;AAAA,MACpB,SAAS,KAAK,QAAQ;AAAA,MACtB,iBAAiB,KAAK,QAAQ;AAAA,MAC9B,0BAA0B,KAAK,QAAQ;AAAA,MACvC,eAAe,KAAK,QAAQ;AAAA,MAC5B,UAAU,KAAK,QAAQ;AAAA,MACvB,cAAc;AAAA,MACd,eAAe;AAAA,MACf,sBAAsB,KAAK,QAAQ;AAAA,MACnC,aAAa,KAAK,QAAQ;AAAA,MAC1B,OAAO,KAAK,QAAQ;AAAA,MACpB,kBAAkB,KAAK,QAAQ;AAAA,MAC/B,6BAA6B,KAAK,QAAQ;AAAA,MAC1C,mBAAmB,KAAK,QAAQ;AAAA,MAChC,oBAAoB,KAAK,QAAQ;AAAA,MACjC,+BAA+B,KAAK,QAAQ;AAAA,IAC9C,CAAC;AAAA,EACH;AAAA,EAEQ,oBAAoB,SAA4B;AACtD,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AAEA,YAAQ,QAAQ,MAAM;AAAA,MACpB,KAAK;AACH,aAAK,sBAAsB;AAC3B;AAAA,MACF,KAAK;AACH,aAAK,oBAAoB;AACzB;AAAA,MACF,KAAK;AACH,aAAK,iBAAiB,OAAO;AAC7B;AAAA,MACF,KAAK;AACH,aAAK,gBAAgB,OAAO;AAC5B;AAAA,MACF,KAAK;AACH,aAAK,yBAAyB;AAC9B;AAAA,MACF,KAAK;AACH,aAAK,yBAAyB;AAC9B;AAAA,MACF,KAAK;AACH,aAAK,UAAU,IAAI,MAAM,QAAQ,MAAM,OAAO,GAAG,KAAK;AACtD;AAAA,MACF,KAAK;AACH,aAAK;AAAA,UACH,IAAI;AAAA,YACF;AAAA,UACF;AAAA,UACA;AAAA,QACF;AACA;AAAA,MACF,KAAK;AACH,aAAK;AAAA,UACH,IAAI;AAAA,YACF;AAAA,UACF;AAAA,UACA;AAAA,QACF;AACA;AAAA,MACF,KAAK;AACH,aAAK,iBAAiB,QAAQ;AAC9B,aAAK,OAAO,KAAK,sCAAsC,KAAK,cAAc,EAAE;AAC5E;AAAA,MACF,KAAK;AAAA,MACL,KAAK;AAAA,MACL,KAAK;AAAA,MACL,KAAK;AAAA,MACL,KAAK;AAAA,MACL,KAAK;AAAA,MACL;AACE;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,iBAAiB,SAAiD;AAKxE,UAAM,MAAM,KAAK;AACjB,QAAI,CAAC,IAAK;AAEV,QAAI,QAAQ,MAAM;AAChB,UAAI,cAAc,QAAQ;AAC1B,UAAI,YAAY,MAAM,QAAQ,IAAI;AAAA,IACpC;AAEA,QAAI,QAAQ,OAAO;AACjB,YAAM,QAAQ,OAAO,KAAK,QAAQ,OAAO,QAAQ;AACjD,YAAM,cAAc,KAAK,MAAM,MAAM,aAAa,WAAW,iBAAiB;AAC9E,UAAI,cAAc,GAAG;AACnB,cAAM,MAAM,IAAI;AAAA,UACd,MAAM,OAAO;AAAA,YACX,MAAM;AAAA,YACN,MAAM,aAAa,cAAc,WAAW;AAAA,UAC9C;AAAA,QACF;AACA,cAAM,QAAQ,IAAI;AAAA,UAChB;AAAA,UACA;AAAA,UACA;AAAA,UACA,cAAc;AAAA,QAChB;AACA,YAAI,aAAa,MAAM,KAAK;AAAA,MAC9B;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAwC;AAC9D,UAAM,aAAS,yBAAU,KAAK;AAC9B,SAAK,KAAK,uCAAuC;AAAA,MAC/C;AAAA,MACA,YAAY,QAAQ;AAAA,MACpB,SAAS;AAAA,IACX,CAAC;AAED,SAAK,SAAS,WAAW;AAAA,MACvB,MAAM;AAAA,MACN,SAAS,QAAQ;AAAA,MACjB,IAAI;AAAA,IACN,CAAC;AAAA,EACH;AAAA,EAEQ,2BAAiC;AACvC,SAAK,KAAK,wBAAwB,CAAC,CAAC;AACpC,SAAK,uBAAuB,EAAE,aAAa,KAAK,CAAC;AAAA,EACnD;AAAA,EAEQ,2BAAiC;AACvC,SAAK,KAAK,wBAAwB;AAAA,MAChC,0BAA0B;AAAA,IAC5B,CAAC;AAAA,EACH;AAAA,EAEQ,wBAA8B;AACpC,QAAI,KAAK,mBAAmB;AAC1B,WAAK,uBAAuB,EAAE,aAAa,KAAK,CAAC;AAAA,IACnD;AAEA,UAAM,iBAAa,yBAAU,KAAK;AAElC,UAAM,cAAc,qBAAO,oBAA4B;AACvD,UAAM,eAAe,qBAAO,oBAAgC;AAC5D,UAAM,kBAAkB,qBAAO,oBAAsC;AACrE,UAAM,iBAAiB,qBAAO,oBAA2C;AAEzE,mBAAe,MAAM;AAAA,MACnB,WAAW;AAAA,MACX,YAAY,YAAY,OAAO;AAAA,MAC/B,aAAa,aAAa,OAAO;AAAA,MACjC,YAAY,QAAQ,QAAQ,CAAC,SAAS,MAAM,CAAC;AAAA,IAC/C,CAAC;AAED,SAAK,oBAAoB;AAAA,MACvB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,YAAY;AAAA,IACd;AAEA,SAAK,KAAK,sBAAsB;AAAA,MAC9B,eAAe,eAAe,OAAO;AAAA,MACrC,gBAAgB,gBAAgB,OAAO;AAAA,MACvC,eAAe;AAAA,MACf;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEQ,sBAA4B;AAClC,SAAK,uBAAuB,EAAE,aAAa,MAAM,CAAC;AAAA,EACpD;AAAA,EAEQ,uBAAuB,EAAE,YAAY,GAAmC;AAC9E,UAAM,MAAM,KAAK;AACjB,QAAI,CAAC,IAAK;AAEV,QAAI,IAAI,YAAY;AAClB,WAAK,SAAS,WAAW;AAAA,QACvB,MAAM;AAAA,QACN,SAAS,IAAI;AAAA,QACb,IAAI,IAAI;AAAA,QACR;AAAA,MACF,CAAC;AAAA,IACH;AAEA,QAAI,YAAY,MAAM;AACtB,QAAI,aAAa,MAAM;AACvB,QAAI,gBAAgB,MAAM;AAC1B,QAAI,eAAe,MAAM;AACzB,SAAK,oBAAoB;AAAA,EAC3B;AAAA,EAEQ,UAAU,OAAc,aAA4B;AAC1D,SAAK,KAAK,SAAS;AAAA,MACjB,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO;AAAA,MACP,MAAM;AAAA,MACN;AAAA,MACA;AAAA,IACF,CAAkC;AAAA,EACpC;AAAA,EAEA,CAAS,cAAc,OAA0C;AAC/D,QAAI,KAAK,gBAAgB;AACvB,UAAI,MAAM,eAAe,KAAK,yBAAyB;AACrD,aAAK,iBAAiB;AACtB,aAAK,0BAA0B;AAAA,MACjC;AAAA,IACF;AAEA,QACE,KAAK,mBAAmB,WACvB,MAAM,eAAe,4BAA4B,MAAM,aAAa,sBACrE;AACA,WAAK,iBAAiB,IAAI;AAAA,QACxB,MAAM;AAAA,QACN;AAAA,QACA;AAAA,MACF;AACA,WAAK,0BAA0B,MAAM;AAAA,IACvC;AAEA,QAAI,KAAK,gBAAgB;AACvB,iBAAW,kBAAkB,KAAK,eAAe,KAAK,KAAK,GAAG;AAC5D,cAAM;AAAA,MACR;AAAA,IACF,OAAO;AACL,YAAM;AAAA,IACR;AAAA,EACF;AACF;","names":[]}
@@ -0,0 +1,156 @@
1
+ import type { APIConnectOptions } from '@livekit/agents';
2
+ import { llm } from '@livekit/agents';
3
+ import { AudioFrame } from '@livekit/rtc-node';
4
+ import type { Phonic } from 'phonic';
5
+ import type { Voice } from './api_proto.js';
6
+ export interface RealtimeModelOptions {
7
+ apiKey: string;
8
+ model: string;
9
+ phonicAgent?: string;
10
+ voice?: Voice | string;
11
+ welcomeMessage?: string;
12
+ generateWelcomeMessage?: boolean;
13
+ project?: string;
14
+ connOptions: APIConnectOptions;
15
+ baseUrl?: string;
16
+ languages?: string[];
17
+ audioSpeed?: number;
18
+ phonicTools?: string[];
19
+ boostedKeywords?: string[];
20
+ generateNoInputPokeText?: boolean;
21
+ noInputPokeSec?: number;
22
+ noInputPokeText?: string;
23
+ noInputEndConversationSec?: number;
24
+ /** Set by `updateInstructions` via `voice.Agent` rather than the RealtimeModel constructor */
25
+ instructions?: string;
26
+ }
27
+ export declare class RealtimeModel extends llm.RealtimeModel {
28
+ /** @internal */
29
+ _options: RealtimeModelOptions;
30
+ get model(): string;
31
+ constructor(options?: {
32
+ /**
33
+ * Phonic API key. If not provided, will attempt to read from PHONIC_API_KEY environment variable
34
+ */
35
+ apiKey?: string;
36
+ /**
37
+ * The name of the model to use. Defaults to 'merritt'
38
+ */
39
+ model?: Phonic.ConfigPayload['model'] | string;
40
+ /**
41
+ * Phonic agent to use for the conversation. Options explicitly set here will override the agent settings.
42
+ */
43
+ phonicAgent?: string;
44
+ /**
45
+ * Voice ID for agent outputs
46
+ */
47
+ voice?: Voice;
48
+ /**
49
+ * Welcome message for the agent to say when the conversation starts. Ignored when generateWelcomeMessage is true
50
+ */
51
+ welcomeMessage?: string;
52
+ /**
53
+ * When true, the welcome message will be automatically generated and welcomeMessage will be ignored
54
+ */
55
+ generateWelcomeMessage?: boolean;
56
+ /**
57
+ * Project name to use for the conversation. Defaults to `main`
58
+ */
59
+ project?: string;
60
+ /**
61
+ * ISO 639-1 language codes the agent should recognize and speak
62
+ */
63
+ languages?: string[];
64
+ /**
65
+ * Audio playback speed
66
+ */
67
+ audioSpeed?: number;
68
+ /**
69
+ * Phonic tool names available to the assistant
70
+ */
71
+ phonicTools?: string[];
72
+ /**
73
+ * Keywords to boost in speech recognition
74
+ */
75
+ boostedKeywords?: string[];
76
+ /**
77
+ * Auto-generate poke text when user is silent
78
+ */
79
+ generateNoInputPokeText?: boolean;
80
+ /**
81
+ * Seconds of silence before sending poke message
82
+ */
83
+ noInputPokeSec?: number;
84
+ /**
85
+ * Poke message text (ignored when generateNoInputPokeText is true)
86
+ */
87
+ noInputPokeText?: string;
88
+ /**
89
+ * Seconds of silence before ending conversation
90
+ */
91
+ noInputEndConversationSec?: number;
92
+ /**
93
+ * Connection options for the API connection
94
+ */
95
+ connOptions?: APIConnectOptions;
96
+ baseUrl?: string;
97
+ });
98
+ /**
99
+ * Create a new realtime session
100
+ */
101
+ session(): RealtimeSession;
102
+ close(): Promise<void>;
103
+ }
104
+ /**
105
+ * Realtime session for Phonic (https://docs.phonic.co/)
106
+ */
107
+ export declare class RealtimeSession extends llm.RealtimeSession {
108
+ private _tools;
109
+ private _chatCtx;
110
+ private options;
111
+ private bstream;
112
+ private inputResampler?;
113
+ private inputResamplerInputRate?;
114
+ private currentGeneration?;
115
+ private conversationId?;
116
+ private client;
117
+ private socket?;
118
+ private logger;
119
+ private closed;
120
+ private configSent;
121
+ private instructionsReady;
122
+ private resolveInstructionsReady;
123
+ private connectTask;
124
+ constructor(realtimeModel: RealtimeModel);
125
+ get chatCtx(): llm.ChatContext;
126
+ get tools(): llm.ToolContext;
127
+ updateInstructions(instructions: string): Promise<void>;
128
+ updateChatCtx(_chatCtx: llm.ChatContext): Promise<void>;
129
+ updateTools(tools: llm.ToolContext): Promise<void>;
130
+ updateOptions(_options: {
131
+ toolChoice?: llm.ToolChoice | null;
132
+ }): void;
133
+ pushAudio(frame: AudioFrame): void;
134
+ generateReply(_instructions?: string): Promise<llm.GenerationCreatedEvent>;
135
+ commitAudio(): Promise<void>;
136
+ clearAudio(): Promise<void>;
137
+ interrupt(): Promise<void>;
138
+ truncate(_options: {
139
+ messageId: string;
140
+ audioEndMs: number;
141
+ audioTranscript?: string;
142
+ }): Promise<void>;
143
+ close(): Promise<void>;
144
+ private connect;
145
+ private handleServerMessage;
146
+ private handleAudioChunk;
147
+ private handleInputText;
148
+ private handleInputSpeechStarted;
149
+ private handleInputSpeechStopped;
150
+ private startNewAssistantTurn;
151
+ private finishAssistantTurn;
152
+ private closeCurrentGeneration;
153
+ private emitError;
154
+ private resampleAudio;
155
+ }
156
+ //# sourceMappingURL=realtime_model.d.ts.map