@livekit/agents 1.0.17 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/inference/api_protos.d.cts +12 -12
- package/dist/inference/api_protos.d.ts +12 -12
- package/dist/inference/llm.cjs +35 -13
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +10 -5
- package/dist/inference/llm.d.ts +10 -5
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +35 -13
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/tts.cjs +1 -1
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.js +1 -1
- package/dist/inference/tts.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +6 -2
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +6 -2
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +31 -0
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +6 -0
- package/dist/job.d.ts +6 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +31 -0
- package/dist/job.js.map +1 -1
- package/dist/llm/chat_context.cjs +33 -0
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +22 -2
- package/dist/llm/chat_context.d.ts +22 -2
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +32 -0
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -1
- package/dist/llm/llm.d.ts +1 -1
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/google.cjs.map +1 -1
- package/dist/llm/provider_format/google.d.cts +1 -1
- package/dist/llm/provider_format/google.d.ts +1 -1
- package/dist/llm/provider_format/google.d.ts.map +1 -1
- package/dist/llm/provider_format/google.js.map +1 -1
- package/dist/llm/provider_format/google.test.cjs +48 -0
- package/dist/llm/provider_format/google.test.cjs.map +1 -1
- package/dist/llm/provider_format/google.test.js +54 -1
- package/dist/llm/provider_format/google.test.js.map +1 -1
- package/dist/llm/provider_format/index.d.cts +1 -1
- package/dist/llm/provider_format/index.d.ts +1 -1
- package/dist/llm/provider_format/index.d.ts.map +1 -1
- package/dist/llm/provider_format/openai.cjs +1 -2
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.js +1 -2
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +32 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +38 -1
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +4 -0
- package/dist/llm/realtime.d.ts +4 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js.map +1 -1
- package/dist/llm/utils.cjs +2 -2
- package/dist/llm/utils.cjs.map +1 -1
- package/dist/llm/utils.d.cts +1 -1
- package/dist/llm/utils.d.ts +1 -1
- package/dist/llm/utils.d.ts.map +1 -1
- package/dist/llm/utils.js +2 -2
- package/dist/llm/utils.js.map +1 -1
- package/dist/llm/zod-utils.cjs +6 -3
- package/dist/llm/zod-utils.cjs.map +1 -1
- package/dist/llm/zod-utils.d.cts +1 -1
- package/dist/llm/zod-utils.d.ts +1 -1
- package/dist/llm/zod-utils.d.ts.map +1 -1
- package/dist/llm/zod-utils.js +6 -3
- package/dist/llm/zod-utils.js.map +1 -1
- package/dist/llm/zod-utils.test.cjs +83 -0
- package/dist/llm/zod-utils.test.cjs.map +1 -1
- package/dist/llm/zod-utils.test.js +83 -0
- package/dist/llm/zod-utils.test.js.map +1 -1
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js.map +1 -1
- package/dist/telemetry/index.cjs +51 -0
- package/dist/telemetry/index.cjs.map +1 -0
- package/dist/telemetry/index.d.cts +4 -0
- package/dist/telemetry/index.d.ts +4 -0
- package/dist/telemetry/index.d.ts.map +1 -0
- package/dist/telemetry/index.js +12 -0
- package/dist/telemetry/index.js.map +1 -0
- package/dist/telemetry/trace_types.cjs +191 -0
- package/dist/telemetry/trace_types.cjs.map +1 -0
- package/dist/telemetry/trace_types.d.cts +56 -0
- package/dist/telemetry/trace_types.d.ts +56 -0
- package/dist/telemetry/trace_types.d.ts.map +1 -0
- package/dist/telemetry/trace_types.js +113 -0
- package/dist/telemetry/trace_types.js.map +1 -0
- package/dist/telemetry/traces.cjs +196 -0
- package/dist/telemetry/traces.cjs.map +1 -0
- package/dist/telemetry/traces.d.cts +97 -0
- package/dist/telemetry/traces.d.ts +97 -0
- package/dist/telemetry/traces.d.ts.map +1 -0
- package/dist/telemetry/traces.js +173 -0
- package/dist/telemetry/traces.js.map +1 -0
- package/dist/telemetry/utils.cjs +86 -0
- package/dist/telemetry/utils.cjs.map +1 -0
- package/dist/telemetry/utils.d.cts +5 -0
- package/dist/telemetry/utils.d.ts +5 -0
- package/dist/telemetry/utils.d.ts.map +1 -0
- package/dist/telemetry/utils.js +51 -0
- package/dist/telemetry/utils.js.map +1 -0
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +7 -0
- package/dist/utils.d.ts +7 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent.cjs +15 -0
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +4 -1
- package/dist/voice/agent.d.ts +4 -1
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +15 -0
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +71 -20
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +71 -20
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +69 -2
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +11 -2
- package/dist/voice/agent_session.d.ts +11 -2
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +70 -3
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/interruption_detection.test.cjs +114 -0
- package/dist/voice/interruption_detection.test.cjs.map +1 -0
- package/dist/voice/interruption_detection.test.js +113 -0
- package/dist/voice/interruption_detection.test.js.map +1 -0
- package/dist/voice/report.cjs +69 -0
- package/dist/voice/report.cjs.map +1 -0
- package/dist/voice/report.d.cts +26 -0
- package/dist/voice/report.d.ts +26 -0
- package/dist/voice/report.d.ts.map +1 -0
- package/dist/voice/report.js +44 -0
- package/dist/voice/report.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +3 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +1 -0
- package/dist/voice/room_io/room_io.d.ts +1 -0
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +3 -0
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/package.json +12 -5
- package/src/index.ts +2 -1
- package/src/inference/llm.ts +53 -21
- package/src/inference/tts.ts +1 -1
- package/src/ipc/job_proc_lazy_main.ts +10 -2
- package/src/job.ts +48 -0
- package/src/llm/__snapshots__/zod-utils.test.ts.snap +218 -0
- package/src/llm/chat_context.ts +53 -1
- package/src/llm/index.ts +1 -0
- package/src/llm/llm.ts +3 -1
- package/src/llm/provider_format/google.test.ts +72 -1
- package/src/llm/provider_format/google.ts +4 -4
- package/src/llm/provider_format/openai.test.ts +55 -1
- package/src/llm/provider_format/openai.ts +3 -2
- package/src/llm/realtime.ts +8 -1
- package/src/llm/utils.ts +7 -2
- package/src/llm/zod-utils.test.ts +101 -0
- package/src/llm/zod-utils.ts +12 -3
- package/src/log.ts +1 -0
- package/src/telemetry/index.ts +10 -0
- package/src/telemetry/trace_types.ts +88 -0
- package/src/telemetry/traces.ts +266 -0
- package/src/telemetry/utils.ts +61 -0
- package/src/tts/tts.ts +4 -0
- package/src/utils.ts +17 -0
- package/src/voice/agent.ts +22 -0
- package/src/voice/agent_activity.ts +102 -24
- package/src/voice/agent_session.ts +98 -1
- package/src/voice/audio_recognition.ts +2 -0
- package/src/voice/generation.ts +3 -0
- package/src/voice/index.ts +1 -0
- package/src/voice/interruption_detection.test.ts +151 -0
- package/src/voice/report.ts +77 -0
- package/src/voice/room_io/room_io.ts +4 -0
|
@@ -152,6 +152,11 @@ class AgentActivity {
|
|
|
152
152
|
} catch (error) {
|
|
153
153
|
this.logger.error(error, "failed to update the tools");
|
|
154
154
|
}
|
|
155
|
+
if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
|
|
156
|
+
this.logger.error(
|
|
157
|
+
"audio output is enabled but RealtimeModel has no audio modality and no TTS is set. Either enable audio modality in the RealtimeModel or set a TTS model."
|
|
158
|
+
);
|
|
159
|
+
}
|
|
155
160
|
} else if (this.llm instanceof LLM) {
|
|
156
161
|
try {
|
|
157
162
|
updateInstructions({
|
|
@@ -449,7 +454,9 @@ class AgentActivity {
|
|
|
449
454
|
}
|
|
450
455
|
if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
|
|
451
456
|
const text = this.audioRecognition.currentTranscript;
|
|
452
|
-
|
|
457
|
+
const normalizedText = text ?? "";
|
|
458
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
459
|
+
if (wordCount < this.agentSession.options.minInterruptionWords) {
|
|
453
460
|
return;
|
|
454
461
|
}
|
|
455
462
|
}
|
|
@@ -551,10 +558,19 @@ class AgentActivity {
|
|
|
551
558
|
this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
|
|
552
559
|
return true;
|
|
553
560
|
}
|
|
554
|
-
if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0
|
|
555
|
-
|
|
556
|
-
this.
|
|
557
|
-
|
|
561
|
+
if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0) {
|
|
562
|
+
const wordCount = splitWords(info.newTranscript, true).length;
|
|
563
|
+
if (wordCount < this.agentSession.options.minInterruptionWords) {
|
|
564
|
+
this.cancelPreemptiveGeneration();
|
|
565
|
+
this.logger.info(
|
|
566
|
+
{
|
|
567
|
+
wordCount,
|
|
568
|
+
minInterruptionWords: this.agentSession.options.minInterruptionWords
|
|
569
|
+
},
|
|
570
|
+
"skipping user input, word count below minimum interruption threshold"
|
|
571
|
+
);
|
|
572
|
+
return false;
|
|
573
|
+
}
|
|
558
574
|
}
|
|
559
575
|
const oldTask = this._userTurnCompletedTask;
|
|
560
576
|
this._userTurnCompletedTask = this.createSpeechTask({
|
|
@@ -884,6 +900,7 @@ ${instructions}` : instructions,
|
|
|
884
900
|
this.agentSession._updateAgentState("listening");
|
|
885
901
|
}
|
|
886
902
|
}
|
|
903
|
+
// TODO(brian): PR3 - Wrap entire pipelineReplyTask() method with tracer.startActiveSpan('agent_turn')
|
|
887
904
|
async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) {
|
|
888
905
|
var _a, _b, _c;
|
|
889
906
|
speechHandleStorage.enterWith(speechHandle);
|
|
@@ -1197,7 +1214,22 @@ ${instructions}` : instructions,
|
|
|
1197
1214
|
);
|
|
1198
1215
|
break;
|
|
1199
1216
|
}
|
|
1200
|
-
const
|
|
1217
|
+
const msgModalities = msg.modalities ? await msg.modalities : void 0;
|
|
1218
|
+
let ttsTextInput = null;
|
|
1219
|
+
let trTextInput;
|
|
1220
|
+
if (msgModalities && !msgModalities.includes("audio") && this.tts) {
|
|
1221
|
+
if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
|
|
1222
|
+
this.logger.warn(
|
|
1223
|
+
"text response received from realtime API, falling back to use a TTS model."
|
|
1224
|
+
);
|
|
1225
|
+
}
|
|
1226
|
+
const [_ttsTextInput, _trTextInput] = msg.textStream.tee();
|
|
1227
|
+
ttsTextInput = _ttsTextInput;
|
|
1228
|
+
trTextInput = _trTextInput;
|
|
1229
|
+
} else {
|
|
1230
|
+
trTextInput = msg.textStream;
|
|
1231
|
+
}
|
|
1232
|
+
const trNodeResult = await this.agent.transcriptionNode(trTextInput, modelSettings);
|
|
1201
1233
|
let textOut = null;
|
|
1202
1234
|
if (trNodeResult) {
|
|
1203
1235
|
const [textForwardTask, _textOut] = performTextForwarding(
|
|
@@ -1210,28 +1242,44 @@ ${instructions}` : instructions,
|
|
|
1210
1242
|
}
|
|
1211
1243
|
let audioOut = null;
|
|
1212
1244
|
if (audioOutput) {
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1245
|
+
let realtimeAudioResult = null;
|
|
1246
|
+
if (ttsTextInput) {
|
|
1247
|
+
const [ttsTask, ttsStream] = performTTSInference(
|
|
1248
|
+
(...args) => this.agent.ttsNode(...args),
|
|
1249
|
+
ttsTextInput,
|
|
1250
|
+
modelSettings,
|
|
1251
|
+
abortController
|
|
1252
|
+
);
|
|
1253
|
+
tasks.push(ttsTask);
|
|
1254
|
+
realtimeAudioResult = ttsStream;
|
|
1255
|
+
} else if (msgModalities && msgModalities.includes("audio")) {
|
|
1256
|
+
realtimeAudioResult = await this.agent.realtimeAudioOutputNode(
|
|
1257
|
+
msg.audioStream,
|
|
1258
|
+
modelSettings
|
|
1259
|
+
);
|
|
1260
|
+
} else if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
|
|
1261
|
+
this.logger.error(
|
|
1262
|
+
"Text message received from Realtime API with audio modality. This usually happens when text chat context is synced to the API. Try to add a TTS model as fallback or use text modality with TTS instead."
|
|
1263
|
+
);
|
|
1264
|
+
} else {
|
|
1265
|
+
this.logger.warn(
|
|
1266
|
+
"audio output is enabled but neither tts nor realtime audio is available"
|
|
1267
|
+
);
|
|
1268
|
+
}
|
|
1269
|
+
if (realtimeAudioResult) {
|
|
1218
1270
|
const [forwardTask, _audioOut] = performAudioForwarding(
|
|
1219
|
-
|
|
1271
|
+
realtimeAudioResult,
|
|
1220
1272
|
audioOutput,
|
|
1221
1273
|
abortController
|
|
1222
1274
|
);
|
|
1223
1275
|
forwardTasks.push(forwardTask);
|
|
1224
1276
|
audioOut = _audioOut;
|
|
1225
1277
|
audioOut.firstFrameFut.await.finally(onFirstFrame);
|
|
1226
|
-
} else {
|
|
1227
|
-
this.logger.warn(
|
|
1228
|
-
"audio output is enabled but neither tts nor realtime audio is available"
|
|
1229
|
-
);
|
|
1230
1278
|
}
|
|
1231
1279
|
} else if (textOut) {
|
|
1232
1280
|
textOut.firstTextFut.await.finally(onFirstFrame);
|
|
1233
1281
|
}
|
|
1234
|
-
outputs.push([msg.messageId, textOut, audioOut]);
|
|
1282
|
+
outputs.push([msg.messageId, textOut, audioOut, msgModalities]);
|
|
1235
1283
|
}
|
|
1236
1284
|
await waitFor(forwardTasks);
|
|
1237
1285
|
} catch (error) {
|
|
@@ -1301,7 +1349,7 @@ ${instructions}` : instructions,
|
|
|
1301
1349
|
replyAbortController.abort();
|
|
1302
1350
|
await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
1303
1351
|
if (messageOutputs.length > 0) {
|
|
1304
|
-
const [msgId, textOut, audioOut] = messageOutputs[0];
|
|
1352
|
+
const [msgId, textOut, audioOut, msgModalities] = messageOutputs[0];
|
|
1305
1353
|
let forwardedText = (textOut == null ? void 0 : textOut.text) || "";
|
|
1306
1354
|
if (audioOutput) {
|
|
1307
1355
|
audioOutput.clearBuffer();
|
|
@@ -1321,7 +1369,9 @@ ${instructions}` : instructions,
|
|
|
1321
1369
|
}
|
|
1322
1370
|
this.realtimeSession.truncate({
|
|
1323
1371
|
messageId: msgId,
|
|
1324
|
-
audioEndMs: Math.floor(playbackPosition)
|
|
1372
|
+
audioEndMs: Math.floor(playbackPosition),
|
|
1373
|
+
modalities: msgModalities,
|
|
1374
|
+
audioTranscript: forwardedText
|
|
1325
1375
|
});
|
|
1326
1376
|
}
|
|
1327
1377
|
if (forwardedText) {
|
|
@@ -1345,7 +1395,7 @@ ${instructions}` : instructions,
|
|
|
1345
1395
|
return;
|
|
1346
1396
|
}
|
|
1347
1397
|
if (messageOutputs.length > 0) {
|
|
1348
|
-
const [msgId, textOut, _] = messageOutputs[0];
|
|
1398
|
+
const [msgId, textOut, _, __] = messageOutputs[0];
|
|
1349
1399
|
const message = ChatMessage.create({
|
|
1350
1400
|
role: "assistant",
|
|
1351
1401
|
content: (textOut == null ? void 0 : textOut.text) || "",
|
|
@@ -1507,6 +1557,7 @@ ${instructions}` : instructions,
|
|
|
1507
1557
|
speechHandle._markScheduled();
|
|
1508
1558
|
this.wakeupMainTask();
|
|
1509
1559
|
}
|
|
1560
|
+
// TODO(brian): PR3 - Wrap entire drain() method with tracer.startActiveSpan('drain_agent_activity', { attributes: { 'lk.agent_label': this.agent.label } })
|
|
1510
1561
|
async drain() {
|
|
1511
1562
|
var _a;
|
|
1512
1563
|
const unlock = await this.lock.lock();
|