@livekit/agents 1.0.15 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +12 -12
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.cts +3 -3
- package/dist/cli.d.ts +3 -3
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +13 -13
- package/dist/cli.js.map +1 -1
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +1 -1
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -1
- package/dist/inference/tts.d.ts +2 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +1 -5
- package/dist/inference/tts.js.map +1 -1
- package/dist/llm/chat_context.cjs +78 -0
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +16 -0
- package/dist/llm/chat_context.d.ts +16 -0
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +78 -0
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/chat_context.test.cjs +531 -0
- package/dist/llm/chat_context.test.cjs.map +1 -1
- package/dist/llm/chat_context.test.js +531 -0
- package/dist/llm/chat_context.test.js.map +1 -1
- package/dist/llm/tool_context.cjs +40 -0
- package/dist/llm/tool_context.cjs.map +1 -1
- package/dist/llm/tool_context.d.cts +2 -0
- package/dist/llm/tool_context.d.ts +2 -0
- package/dist/llm/tool_context.d.ts.map +1 -1
- package/dist/llm/tool_context.js +38 -0
- package/dist/llm/tool_context.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +7 -0
- package/dist/metrics/base.d.ts +7 -0
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/stt/stt.cjs +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +7 -1
- package/dist/stt/stt.d.ts +7 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/tts.cjs +2 -4
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +3 -5
- package/dist/tts/tts.js.map +1 -1
- package/dist/voice/agent_activity.cjs +83 -8
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +6 -2
- package/dist/voice/agent_activity.d.ts +6 -2
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +83 -8
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +3 -2
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +2 -1
- package/dist/voice/agent_session.d.ts +2 -1
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +3 -2
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +138 -16
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +11 -0
- package/dist/voice/audio_recognition.d.ts +11 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +138 -16
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +0 -1
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/worker.cjs +17 -11
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.cts +16 -9
- package/dist/worker.d.ts +16 -9
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +16 -12
- package/dist/worker.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +17 -17
- package/src/inference/stt.ts +2 -1
- package/src/inference/tts.ts +2 -5
- package/src/llm/chat_context.test.ts +607 -0
- package/src/llm/chat_context.ts +106 -0
- package/src/llm/tool_context.ts +44 -0
- package/src/metrics/base.ts +7 -0
- package/src/stt/stt.ts +8 -1
- package/src/tts/tts.ts +7 -5
- package/src/voice/agent_activity.ts +119 -9
- package/src/voice/agent_session.ts +3 -1
- package/src/voice/audio_recognition.ts +235 -57
- package/src/voice/room_io/_input.ts +1 -1
- package/src/worker.ts +29 -18
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio_recognition.d.ts","sourceRoot":"","sources":["../../src/voice/audio_recognition.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAK1D,OAAO,EAAE,KAAK,WAAW,EAAmB,MAAM,eAAe,CAAC;AAElE,OAAO,EAAE,KAAK,GAAG,EAAE,KAAK,QAAQ,EAAgB,MAAM,WAAW,CAAC;AAClE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAEvC,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAC;IACtB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"audio_recognition.d.ts","sourceRoot":"","sources":["../../src/voice/audio_recognition.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAK1D,OAAO,EAAE,KAAK,WAAW,EAAmB,MAAM,eAAe,CAAC;AAElE,OAAO,EAAE,KAAK,GAAG,EAAE,KAAK,QAAQ,EAAgB,MAAM,WAAW,CAAC;AAClE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAEvC,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAC;IACtB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,iBAAiB,EAAE,MAAM,GAAG,SAAS,CAAC;IACtC,iBAAiB,EAAE,MAAM,GAAG,SAAS,CAAC;CACvC;AAED,MAAM,WAAW,wBAAwB;IACvC,aAAa,EAAE,MAAM,CAAC;IACtB,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,gBAAgB;IAC/B,eAAe,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACxC,kBAAkB,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC3C,aAAa,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACtC,mBAAmB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC/C,iBAAiB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC7C,WAAW,EAAE,CAAC,IAAI,EAAE,aAAa,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IACvD,sBAAsB,EAAE,CAAC,IAAI,EAAE,wBAAwB,KAAK,IAAI,CAAC;IAEjE,eAAe,EAAE,MAAM,WAAW,CAAC;CACpC;AAED,MAAM,WAAW,aAAa;IAC5B,iBAAiB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;IACtE,gBAAgB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAC1D,gBAAgB,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACzD;AAED,MAAM,WAAW,uBAAuB;IACtC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,iBAAiB,CAAC,EAAE,OAAO,CAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;IAC9D,mBAAmB,EAAE,MAAM,CAAC;IAC5B,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,KAAK,CAAmB;IAChC,OAAO,CAAC,GAAG,CAAC,CAAU;IACtB,OAAO,CAAC,GAAG,CAAC,CAAM;IAClB,OAAO,CAAC,YAAY,CAAC,CAAgB;IACrC,OAAO,CAAC,iBAAiB,CAAC,CAA4C;IACtE,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,YAAY,CAAC,CAAS;IAE9B,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,uBAAuB,CAAK;IACpC,OAAO,CAAC,eAAe,CAAM;IAC7B,OAAO,CAAC,sBAAsB,CAAM;IACpC,OAAO,CAAC,wBAAwB,CAAM;IACtC,OAAO,CAAC,yBAAyB,CAAgB;IACjD,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,eAAe,CAAqB;IAC5C,OAAO,CAAC,iBAAiB,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,UAAU,CAAC,CAAS;IAE5B,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,qBAAqB,CAAuC;IACpE,OAAO,CAAC,kBAAkB,CAA0C;IAGpE,OAAO,CAAC,aAAa,CAAC,CAAa;IACnC,OAAO,CAAC,kBAAkB,CAAC,CAAa;IACxC,OAAO,CAAC,OAAO,CAAC,CAAa;IAC7B,OAAO,CAAC,OAAO,CAAC,CAAa;gBAEjB,IAAI,EAAE,uBAAuB;IAiBzC;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAK9B;IAEK,KAAK;YAYG,UAAU;IAgMxB,OAAO,CAAC,eAAe;YAyIT,aAAa;YAqDb,aAAa;IAiE3B,mBAAmB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAI3D,sBAAsB;IAItB,aAAa;IAeb,cAAc,CAAC,aAAa,EAAE,OAAO;IA0C/B,KAAK;IAQX,OAAO,KAAK,oBAAoB,GAE/B;CACF"}
|
|
@@ -22,7 +22,10 @@ class AudioRecognition {
|
|
|
22
22
|
lastFinalTranscriptTime = 0;
|
|
23
23
|
audioTranscript = "";
|
|
24
24
|
audioInterimTranscript = "";
|
|
25
|
-
|
|
25
|
+
audioPreflightTranscript = "";
|
|
26
|
+
finalTranscriptConfidence = [];
|
|
27
|
+
lastSpeakingTime;
|
|
28
|
+
speechStartTime;
|
|
26
29
|
userTurnCommitted = false;
|
|
27
30
|
speaking = false;
|
|
28
31
|
sampleRate;
|
|
@@ -70,7 +73,7 @@ class AudioRecognition {
|
|
|
70
73
|
});
|
|
71
74
|
}
|
|
72
75
|
async onSTTEvent(ev) {
|
|
73
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
76
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r;
|
|
74
77
|
if (this.turnDetectionMode === "manual" && this.userTurnCommitted && (this.bounceEOUTask === void 0 || this.bounceEOUTask.done || ev.type == SpeechEventType.INTERIM_TRANSCRIPT)) {
|
|
75
78
|
this.logger.debug(
|
|
76
79
|
{
|
|
@@ -87,7 +90,8 @@ class AudioRecognition {
|
|
|
87
90
|
case SpeechEventType.FINAL_TRANSCRIPT:
|
|
88
91
|
this.hooks.onFinalTranscript(ev);
|
|
89
92
|
const transcript = (_c = (_b = ev.alternatives) == null ? void 0 : _b[0]) == null ? void 0 : _c.text;
|
|
90
|
-
|
|
93
|
+
const confidence = ((_e = (_d = ev.alternatives) == null ? void 0 : _d[0]) == null ? void 0 : _e.confidence) ?? 0;
|
|
94
|
+
this.lastLanguage = (_g = (_f = ev.alternatives) == null ? void 0 : _f[0]) == null ? void 0 : _g.language;
|
|
91
95
|
if (!transcript) {
|
|
92
96
|
return;
|
|
93
97
|
}
|
|
@@ -101,26 +105,112 @@ class AudioRecognition {
|
|
|
101
105
|
this.lastFinalTranscriptTime = Date.now();
|
|
102
106
|
this.audioTranscript += ` ${transcript}`;
|
|
103
107
|
this.audioTranscript = this.audioTranscript.trimStart();
|
|
108
|
+
this.finalTranscriptConfidence.push(confidence);
|
|
109
|
+
const transcriptChanged = this.audioTranscript !== this.audioPreflightTranscript;
|
|
104
110
|
this.audioInterimTranscript = "";
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
111
|
+
this.audioPreflightTranscript = "";
|
|
112
|
+
if (!this.vad || this.lastSpeakingTime === void 0) {
|
|
113
|
+
this.lastSpeakingTime = Date.now();
|
|
114
|
+
}
|
|
115
|
+
if (this.vadBaseTurnDetection || this.userTurnCommitted) {
|
|
116
|
+
if (transcriptChanged) {
|
|
117
|
+
this.logger.debug(
|
|
118
|
+
{ transcript: this.audioTranscript },
|
|
119
|
+
"triggering preemptive generation (FINAL_TRANSCRIPT)"
|
|
120
|
+
);
|
|
121
|
+
this.hooks.onPreemptiveGeneration({
|
|
122
|
+
newTranscript: this.audioTranscript,
|
|
123
|
+
transcriptConfidence: this.finalTranscriptConfidence.length > 0 ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) / this.finalTranscriptConfidence.length : 0
|
|
124
|
+
});
|
|
108
125
|
}
|
|
109
|
-
if (this.
|
|
126
|
+
if (!this.speaking) {
|
|
110
127
|
const chatCtx = this.hooks.retrieveChatCtx();
|
|
111
128
|
this.logger.debug("running EOU detection on stt FINAL_TRANSCRIPT");
|
|
112
129
|
this.runEOUDetection(chatCtx);
|
|
113
130
|
}
|
|
114
131
|
}
|
|
115
132
|
break;
|
|
133
|
+
case SpeechEventType.PREFLIGHT_TRANSCRIPT:
|
|
134
|
+
this.hooks.onInterimTranscript(ev);
|
|
135
|
+
const preflightTranscript = ((_i = (_h = ev.alternatives) == null ? void 0 : _h[0]) == null ? void 0 : _i.text) ?? "";
|
|
136
|
+
const preflightConfidence = ((_k = (_j = ev.alternatives) == null ? void 0 : _j[0]) == null ? void 0 : _k.confidence) ?? 0;
|
|
137
|
+
const preflightLanguage = (_m = (_l = ev.alternatives) == null ? void 0 : _l[0]) == null ? void 0 : _m.language;
|
|
138
|
+
const MIN_LANGUAGE_DETECTION_LENGTH = 5;
|
|
139
|
+
if (!this.lastLanguage || preflightLanguage && preflightTranscript.length > MIN_LANGUAGE_DETECTION_LENGTH) {
|
|
140
|
+
this.lastLanguage = preflightLanguage;
|
|
141
|
+
}
|
|
142
|
+
if (!preflightTranscript) {
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
this.logger.debug(
|
|
146
|
+
{
|
|
147
|
+
user_transcript: preflightTranscript,
|
|
148
|
+
language: this.lastLanguage
|
|
149
|
+
},
|
|
150
|
+
"received user preflight transcript"
|
|
151
|
+
);
|
|
152
|
+
this.lastFinalTranscriptTime = Date.now();
|
|
153
|
+
this.audioPreflightTranscript = `${this.audioTranscript} ${preflightTranscript}`.trimStart();
|
|
154
|
+
this.audioInterimTranscript = preflightTranscript;
|
|
155
|
+
if (!this.vad || this.lastSpeakingTime === void 0) {
|
|
156
|
+
this.lastSpeakingTime = Date.now();
|
|
157
|
+
}
|
|
158
|
+
if (this.turnDetectionMode !== "manual" || this.userTurnCommitted) {
|
|
159
|
+
const confidenceVals = [...this.finalTranscriptConfidence, preflightConfidence];
|
|
160
|
+
this.logger.debug(
|
|
161
|
+
{
|
|
162
|
+
transcript: this.audioPreflightTranscript.length > 100 ? this.audioPreflightTranscript.slice(0, 100) + "..." : this.audioPreflightTranscript
|
|
163
|
+
},
|
|
164
|
+
"triggering preemptive generation (PREFLIGHT_TRANSCRIPT)"
|
|
165
|
+
);
|
|
166
|
+
this.hooks.onPreemptiveGeneration({
|
|
167
|
+
newTranscript: this.audioPreflightTranscript,
|
|
168
|
+
transcriptConfidence: confidenceVals.length > 0 ? confidenceVals.reduce((a, b) => a + b, 0) / confidenceVals.length : 0
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
break;
|
|
116
172
|
case SpeechEventType.INTERIM_TRANSCRIPT:
|
|
117
|
-
this.logger.debug({ transcript: (
|
|
173
|
+
this.logger.debug({ transcript: (_o = (_n = ev.alternatives) == null ? void 0 : _n[0]) == null ? void 0 : _o.text }, "interim transcript");
|
|
118
174
|
this.hooks.onInterimTranscript(ev);
|
|
119
|
-
this.audioInterimTranscript = ((
|
|
175
|
+
this.audioInterimTranscript = ((_q = (_p = ev.alternatives) == null ? void 0 : _p[0]) == null ? void 0 : _q.text) ?? "";
|
|
176
|
+
break;
|
|
177
|
+
case SpeechEventType.START_OF_SPEECH:
|
|
178
|
+
if (this.turnDetectionMode !== "stt") break;
|
|
179
|
+
this.hooks.onStartOfSpeech({
|
|
180
|
+
type: VADEventType.START_OF_SPEECH,
|
|
181
|
+
samplesIndex: 0,
|
|
182
|
+
timestamp: Date.now(),
|
|
183
|
+
speechDuration: 0,
|
|
184
|
+
silenceDuration: 0,
|
|
185
|
+
frames: [],
|
|
186
|
+
probability: 0,
|
|
187
|
+
inferenceDuration: 0,
|
|
188
|
+
speaking: true,
|
|
189
|
+
rawAccumulatedSilence: 0,
|
|
190
|
+
rawAccumulatedSpeech: 0
|
|
191
|
+
});
|
|
192
|
+
this.speaking = true;
|
|
193
|
+
this.lastSpeakingTime = Date.now();
|
|
194
|
+
(_r = this.bounceEOUTask) == null ? void 0 : _r.cancel();
|
|
120
195
|
break;
|
|
121
196
|
case SpeechEventType.END_OF_SPEECH:
|
|
122
197
|
if (this.turnDetectionMode !== "stt") break;
|
|
198
|
+
this.hooks.onEndOfSpeech({
|
|
199
|
+
type: VADEventType.END_OF_SPEECH,
|
|
200
|
+
samplesIndex: 0,
|
|
201
|
+
timestamp: Date.now(),
|
|
202
|
+
speechDuration: 0,
|
|
203
|
+
silenceDuration: 0,
|
|
204
|
+
frames: [],
|
|
205
|
+
probability: 0,
|
|
206
|
+
inferenceDuration: 0,
|
|
207
|
+
speaking: false,
|
|
208
|
+
rawAccumulatedSilence: 0,
|
|
209
|
+
rawAccumulatedSpeech: 0
|
|
210
|
+
});
|
|
211
|
+
this.speaking = false;
|
|
123
212
|
this.userTurnCommitted = true;
|
|
213
|
+
this.lastSpeakingTime = Date.now();
|
|
124
214
|
if (!this.speaking) {
|
|
125
215
|
const chatCtx = this.hooks.retrieveChatCtx();
|
|
126
216
|
this.logger.debug("running EOU detection on stt END_OF_SPEECH");
|
|
@@ -148,7 +238,7 @@ class AudioRecognition {
|
|
|
148
238
|
// disable EOU model if manual turn detection enabled
|
|
149
239
|
this.audioTranscript && this.turnDetectionMode !== "manual" ? this.turnDetector : void 0
|
|
150
240
|
);
|
|
151
|
-
const bounceEOUTask = (lastSpeakingTime) => async (controller) => {
|
|
241
|
+
const bounceEOUTask = (lastSpeakingTime, lastFinalTranscriptTime, speechStartTime) => async (controller) => {
|
|
152
242
|
let endpointingDelay = this.minEndpointingDelay;
|
|
153
243
|
if (turnDetector) {
|
|
154
244
|
this.logger.debug("Running turn detector model");
|
|
@@ -175,21 +265,46 @@ class AudioRecognition {
|
|
|
175
265
|
}
|
|
176
266
|
}
|
|
177
267
|
}
|
|
178
|
-
|
|
179
|
-
|
|
268
|
+
let extraSleep = endpointingDelay;
|
|
269
|
+
if (lastSpeakingTime !== void 0) {
|
|
270
|
+
extraSleep += lastSpeakingTime - Date.now();
|
|
271
|
+
}
|
|
272
|
+
if (extraSleep > 0) {
|
|
273
|
+
await delay(Math.max(extraSleep, 0), { signal: controller.signal });
|
|
274
|
+
}
|
|
180
275
|
this.logger.debug({ transcript: this.audioTranscript }, "end of user turn");
|
|
276
|
+
const confidenceAvg = this.finalTranscriptConfidence.length > 0 ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) / this.finalTranscriptConfidence.length : 0;
|
|
277
|
+
let startedSpeakingAt;
|
|
278
|
+
let stoppedSpeakingAt;
|
|
279
|
+
let transcriptionDelay;
|
|
280
|
+
let endOfUtteranceDelay;
|
|
281
|
+
if (lastFinalTranscriptTime !== 0 && lastSpeakingTime !== void 0 && speechStartTime !== void 0) {
|
|
282
|
+
startedSpeakingAt = speechStartTime;
|
|
283
|
+
stoppedSpeakingAt = lastSpeakingTime;
|
|
284
|
+
transcriptionDelay = Math.max(lastFinalTranscriptTime - lastSpeakingTime, 0);
|
|
285
|
+
endOfUtteranceDelay = Date.now() - lastSpeakingTime;
|
|
286
|
+
}
|
|
181
287
|
const committed = await this.hooks.onEndOfTurn({
|
|
182
288
|
newTranscript: this.audioTranscript,
|
|
183
|
-
|
|
184
|
-
|
|
289
|
+
transcriptConfidence: confidenceAvg,
|
|
290
|
+
transcriptionDelay: transcriptionDelay ?? 0,
|
|
291
|
+
endOfUtteranceDelay: endOfUtteranceDelay ?? 0,
|
|
292
|
+
startedSpeakingAt,
|
|
293
|
+
stoppedSpeakingAt
|
|
185
294
|
});
|
|
186
295
|
if (committed) {
|
|
187
296
|
this.audioTranscript = "";
|
|
297
|
+
this.finalTranscriptConfidence = [];
|
|
298
|
+
this.lastSpeakingTime = void 0;
|
|
299
|
+
this.lastFinalTranscriptTime = 0;
|
|
300
|
+
this.speechStartTime = void 0;
|
|
188
301
|
}
|
|
189
302
|
this.userTurnCommitted = false;
|
|
190
303
|
};
|
|
191
304
|
(_a = this.bounceEOUTask) == null ? void 0 : _a.cancel();
|
|
192
|
-
this.bounceEOUTask = Task.from(
|
|
305
|
+
this.bounceEOUTask = Task.from(
|
|
306
|
+
bounceEOUTask(this.lastSpeakingTime, this.lastFinalTranscriptTime, this.speechStartTime)
|
|
307
|
+
);
|
|
193
308
|
this.bounceEOUTask.result.then(() => {
|
|
194
309
|
this.logger.debug("EOU detection task completed");
|
|
195
310
|
}).catch((err) => {
|
|
@@ -269,12 +384,17 @@ class AudioRecognition {
|
|
|
269
384
|
break;
|
|
270
385
|
case VADEventType.INFERENCE_DONE:
|
|
271
386
|
this.hooks.onVADInferenceDone(ev);
|
|
387
|
+
if (ev.rawAccumulatedSpeech > 0) {
|
|
388
|
+
this.lastSpeakingTime = Date.now();
|
|
389
|
+
if (this.speechStartTime === void 0) {
|
|
390
|
+
this.speechStartTime = Date.now();
|
|
391
|
+
}
|
|
392
|
+
}
|
|
272
393
|
break;
|
|
273
394
|
case VADEventType.END_OF_SPEECH:
|
|
274
395
|
this.logger.debug("VAD task: END_OF_SPEECH");
|
|
275
396
|
this.hooks.onEndOfSpeech(ev);
|
|
276
397
|
this.speaking = false;
|
|
277
|
-
this.lastSpeakingTime = Date.now() - ev.silenceDuration;
|
|
278
398
|
if (this.vadBaseTurnDetection || this.turnDetectionMode === "stt" && this.userTurnCommitted) {
|
|
279
399
|
const chatCtx = this.hooks.retrieveChatCtx();
|
|
280
400
|
this.runEOUDetection(chatCtx);
|
|
@@ -298,6 +418,8 @@ class AudioRecognition {
|
|
|
298
418
|
var _a;
|
|
299
419
|
this.audioTranscript = "";
|
|
300
420
|
this.audioInterimTranscript = "";
|
|
421
|
+
this.audioPreflightTranscript = "";
|
|
422
|
+
this.finalTranscriptConfidence = [];
|
|
301
423
|
this.userTurnCommitted = false;
|
|
302
424
|
(_a = this.sttTask) == null ? void 0 : _a.cancelAndWait().finally(() => {
|
|
303
425
|
this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private lastSpeakingTime = 0;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.audioInterimTranscript = '';\n\n if (!this.speaking) {\n if (!this.vad) {\n // Copied from python agents:\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for _last_speaking_time\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.userTurnCommitted = true;\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask = (lastSpeakingTime: number) => async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n // TODO(AJS-74): need to support actual turn detection model plugins for following code to run\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n const extraSleep = lastSpeakingTime + endpointingDelay - Date.now();\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptionDelay: Math.max(this.lastFinalTranscriptTime - lastSpeakingTime, 0),\n endOfUtteranceDelay: Date.now() - lastSpeakingTime,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n this.bounceEOUTask = Task.from(bounceEOUTask(this.lastSpeakingTime));\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n this.speaking = false;\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.lastSpeakingTime = Date.now() - ev.silenceDuration;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAGA,SAAS,kBAAkB;AAE3B,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AAqC/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AAxH5C;AAyHI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,UAAU;AAClB,cAAI,CAAC,KAAK,KAAK;AAOb,iBAAK,mBAAmB,KAAK,IAAI;AAAA,UACnC;AAEA,cAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,oBAAoB;AAEzB,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AAzMhD;AA0MI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBAAgB,CAAC,qBAA6B,OAAO,eAAgC;AACzF,UAAI,mBAAmB,KAAK;AAG5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,YAAM,aAAa,mBAAmB,mBAAmB,KAAK,IAAI;AAElE,YAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAElE,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,oBAAoB,KAAK,IAAI,KAAK,0BAA0B,kBAAkB,CAAC;AAAA,QAC/E,qBAAqB,KAAK,IAAI,IAAI;AAAA,MACpC,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGA,eAAK,kBAAL,mBAAoB;AACpB,SAAK,gBAAgB,KAAK,KAAK,cAAc,KAAK,gBAAgB,CAAC;AAEnE,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA1VzE;AA2VI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAChC;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAC3B,iBAAK,WAAW;AAEhB,iBAAK,mBAAmB,KAAK,IAAI,IAAI,GAAG;AAExC,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA3ZlB;AA4ZI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AAxazC;AAyaI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AAldhB;AAmdI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptConfidence: number;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n startedSpeakingAt: number | undefined;\n stoppedSpeakingAt: number | undefined;\n}\n\nexport interface PreemptiveGenerationInfo {\n newTranscript: string;\n transcriptConfidence: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private audioPreflightTranscript = '';\n private finalTranscriptConfidence: number[] = [];\n private lastSpeakingTime: number | undefined;\n private speechStartTime: number | undefined;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n const confidence = ev.alternatives?.[0]?.confidence ?? 0;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.finalTranscriptConfidence.push(confidence);\n const transcriptChanged = this.audioTranscript !== this.audioPreflightTranscript;\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for lastSpeakingTime\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n if (transcriptChanged) {\n this.logger.debug(\n { transcript: this.audioTranscript },\n 'triggering preemptive generation (FINAL_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioTranscript,\n transcriptConfidence:\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0,\n });\n }\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.PREFLIGHT_TRANSCRIPT:\n this.hooks.onInterimTranscript(ev);\n const preflightTranscript = ev.alternatives?.[0]?.text ?? '';\n const preflightConfidence = ev.alternatives?.[0]?.confidence ?? 0;\n const preflightLanguage = ev.alternatives?.[0]?.language;\n\n const MIN_LANGUAGE_DETECTION_LENGTH = 5;\n if (\n !this.lastLanguage ||\n (preflightLanguage && preflightTranscript.length > MIN_LANGUAGE_DETECTION_LENGTH)\n ) {\n this.lastLanguage = preflightLanguage;\n }\n\n if (!preflightTranscript) {\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: preflightTranscript,\n language: this.lastLanguage,\n },\n 'received user preflight transcript',\n );\n\n // still need to increment it as it's used for turn detection,\n this.lastFinalTranscriptTime = Date.now();\n // preflight transcript includes all pre-committed transcripts (including final transcript from the previous STT run)\n this.audioPreflightTranscript =\n `${this.audioTranscript} ${preflightTranscript}`.trimStart();\n this.audioInterimTranscript = preflightTranscript;\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.turnDetectionMode !== 'manual' || this.userTurnCommitted) {\n const confidenceVals = [...this.finalTranscriptConfidence, preflightConfidence];\n this.logger.debug(\n {\n transcript:\n this.audioPreflightTranscript.length > 100\n ? this.audioPreflightTranscript.slice(0, 100) + '...'\n : this.audioPreflightTranscript,\n },\n 'triggering preemptive generation (PREFLIGHT_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioPreflightTranscript,\n transcriptConfidence:\n confidenceVals.length > 0\n ? confidenceVals.reduce((a, b) => a + b, 0) / confidenceVals.length\n : 0,\n });\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.START_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.hooks.onStartOfSpeech({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: true,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n this.speaking = true;\n this.lastSpeakingTime = Date.now();\n\n this.bounceEOUTask?.cancel();\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.hooks.onEndOfSpeech({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: false,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n this.speaking = false;\n this.userTurnCommitted = true;\n this.lastSpeakingTime = Date.now();\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask =\n (\n lastSpeakingTime: number | undefined,\n lastFinalTranscriptTime: number,\n speechStartTime: number | undefined,\n ) =>\n async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n let extraSleep = endpointingDelay;\n if (lastSpeakingTime !== undefined) {\n extraSleep += lastSpeakingTime - Date.now();\n }\n\n if (extraSleep > 0) {\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n }\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const confidenceAvg =\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0;\n\n let startedSpeakingAt: number | undefined;\n let stoppedSpeakingAt: number | undefined;\n let transcriptionDelay: number | undefined;\n let endOfUtteranceDelay: number | undefined;\n\n // sometimes, we can't calculate the metrics because VAD was unreliable.\n // in this case, we just ignore the calculation, it's better than providing likely wrong values\n if (\n lastFinalTranscriptTime !== 0 &&\n lastSpeakingTime !== undefined &&\n speechStartTime !== undefined\n ) {\n startedSpeakingAt = speechStartTime;\n stoppedSpeakingAt = lastSpeakingTime;\n transcriptionDelay = Math.max(lastFinalTranscriptTime - lastSpeakingTime, 0);\n endOfUtteranceDelay = Date.now() - lastSpeakingTime;\n }\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptConfidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n startedSpeakingAt,\n stoppedSpeakingAt,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n this.finalTranscriptConfidence = [];\n this.lastSpeakingTime = undefined;\n this.lastFinalTranscriptTime = 0;\n this.speechStartTime = undefined;\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n // copy the values before awaiting (the values can change)\n this.bounceEOUTask = Task.from(\n bounceEOUTask(this.lastSpeakingTime, this.lastFinalTranscriptTime, this.speechStartTime),\n );\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n // for metrics, get the \"earliest\" signal of speech as possible\n if (ev.rawAccumulatedSpeech > 0.0) {\n this.lastSpeakingTime = Date.now();\n\n if (this.speechStartTime === undefined) {\n this.speechStartTime = Date.now();\n }\n }\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.speaking = false;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n this.finalTranscriptConfidence = [];\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAGA,SAAS,kBAAkB;AAE3B,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AA8C/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,2BAA2B;AAAA,EAC3B,4BAAsC,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AApI5C;AAqII,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,cAAM,eAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AACvD,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,0BAA0B,KAAK,UAAU;AAC9C,cAAM,oBAAoB,KAAK,oBAAoB,KAAK;AACxD,aAAK,yBAAyB;AAC9B,aAAK,2BAA2B;AAEhC,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAMpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,cAAI,mBAAmB;AACrB,iBAAK,OAAO;AAAA,cACV,EAAE,YAAY,KAAK,gBAAgB;AAAA,cACnC;AAAA,YACF;AACA,iBAAK,MAAM,uBAAuB;AAAA,cAChC,eAAe,KAAK;AAAA,cACpB,sBACE,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAAA,YACR,CAAC;AAAA,UACH;AAEA,cAAI,CAAC,KAAK,UAAU;AAClB,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,MAAM,oBAAoB,EAAE;AACjC,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC1D,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AAChE,cAAM,qBAAoB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAEhD,cAAM,gCAAgC;AACtC,YACE,CAAC,KAAK,gBACL,qBAAqB,oBAAoB,SAAS,+BACnD;AACA,eAAK,eAAe;AAAA,QACtB;AAEA,YAAI,CAAC,qBAAqB;AACxB;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAGA,aAAK,0BAA0B,KAAK,IAAI;AAExC,aAAK,2BACH,GAAG,KAAK,eAAe,IAAI,mBAAmB,GAAG,UAAU;AAC7D,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAEpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,sBAAsB,YAAY,KAAK,mBAAmB;AACjE,gBAAM,iBAAiB,CAAC,GAAG,KAAK,2BAA2B,mBAAmB;AAC9E,eAAK,OAAO;AAAA,YACV;AAAA,cACE,YACE,KAAK,yBAAyB,SAAS,MACnC,KAAK,yBAAyB,MAAM,GAAG,GAAG,IAAI,QAC9C,KAAK;AAAA,YACb;AAAA,YACA;AAAA,UACF;AACA,eAAK,MAAM,uBAAuB;AAAA,YAChC,eAAe,KAAK;AAAA,YACpB,sBACE,eAAe,SAAS,IACpB,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,SAC3D;AAAA,UACR,CAAC;AAAA,QACH;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,MAAM,gBAAgB;AAAA,UACzB,MAAM,aAAa;AAAA,UACnB,cAAc;AAAA,UACd,WAAW,KAAK,IAAI;AAAA,UACpB,gBAAgB;AAAA,UAChB,iBAAiB;AAAA,UACjB,QAAQ,CAAC;AAAA,UACT,aAAa;AAAA,UACb,mBAAmB;AAAA,UACnB,UAAU;AAAA,UACV,uBAAuB;AAAA,UACvB,sBAAsB;AAAA,QACxB,CAAC;AACD,aAAK,WAAW;AAChB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,mBAAK,kBAAL,mBAAoB;AACpB;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,MAAM,cAAc;AAAA,UACvB,MAAM,aAAa;AAAA,UACnB,cAAc;AAAA,UACd,WAAW,KAAK,IAAI;AAAA,UACpB,gBAAgB;AAAA,UAChB,iBAAiB;AAAA,UACjB,QAAQ,CAAC;AAAA,UACT,aAAa;AAAA,UACb,mBAAmB;AAAA,UACnB,UAAU;AAAA,UACV,uBAAuB;AAAA,UACvB,sBAAsB;AAAA,QACxB,CAAC;AACD,aAAK,WAAW;AAChB,aAAK,oBAAoB;AACzB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AApUhD;AAqUI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBACJ,CACE,kBACA,yBACA,oBAEF,OAAO,eAAgC;AACrC,UAAI,mBAAmB,KAAK;AAE5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,UAAI,aAAa;AACjB,UAAI,qBAAqB,QAAW;AAClC,sBAAc,mBAAmB,KAAK,IAAI;AAAA,MAC5C;AAEA,UAAI,aAAa,GAAG;AAElB,cAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MACpE;AAEA,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,gBACJ,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAEN,UAAI;AACJ,UAAI;AACJ,UAAI;AACJ,UAAI;AAIJ,UACE,4BAA4B,KAC5B,qBAAqB,UACrB,oBAAoB,QACpB;AACA,4BAAoB;AACpB,4BAAoB;AACpB,6BAAqB,KAAK,IAAI,0BAA0B,kBAAkB,CAAC;AAC3E,8BAAsB,KAAK,IAAI,IAAI;AAAA,MACrC;AAEA,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,sBAAsB;AAAA,QACtB,oBAAoB,sBAAsB;AAAA,QAC1C,qBAAqB,uBAAuB;AAAA,QAC5C;AAAA,QACA;AAAA,MACF,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AACvB,aAAK,4BAA4B,CAAC;AAClC,aAAK,mBAAmB;AACxB,aAAK,0BAA0B;AAC/B,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,kBAAL,mBAAoB;AAEpB,SAAK,gBAAgB,KAAK;AAAA,MACxB,cAAc,KAAK,kBAAkB,KAAK,yBAAyB,KAAK,eAAe;AAAA,IACzF;AAEA,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AAlgBzE;AAmgBI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAEhC,gBAAI,GAAG,uBAAuB,GAAK;AACjC,mBAAK,mBAAmB,KAAK,IAAI;AAEjC,kBAAI,KAAK,oBAAoB,QAAW;AACtC,qBAAK,kBAAkB,KAAK,IAAI;AAAA,cAClC;AAAA,YACF;AACA;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAG3B,iBAAK,WAAW;AAEhB,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA3kBlB;AA4kBI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,2BAA2B;AAChC,SAAK,4BAA4B,CAAC;AAClC,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AA1lBzC;AA2lBI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AApoBhB;AAqoBI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/voice/room_io/_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n
|
|
1
|
+
{"version":3,"sources":["../../../src/voice/room_io/_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport {\n AudioStream,\n type NoiseCancellationOptions,\n RemoteParticipant,\n type RemoteTrack,\n type RemoteTrackPublication,\n type Room,\n RoomEvent,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { resampleStream } from '../../utils.js';\nimport { AudioInput } from '../io.js';\n\nexport class ParticipantAudioInputStream extends AudioInput {\n private room: Room;\n private sampleRate: number;\n private numChannels: number;\n private noiseCancellation?: NoiseCancellationOptions;\n private publication: RemoteTrackPublication | null = null;\n private participantIdentity: string | null = null;\n private logger = log();\n constructor({\n room,\n sampleRate,\n numChannels,\n noiseCancellation,\n }: {\n room: Room;\n sampleRate: number;\n numChannels: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.room = room;\n this.sampleRate = sampleRate;\n this.numChannels = numChannels;\n this.noiseCancellation = noiseCancellation;\n\n this.room.on(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.on(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n }\n\n setParticipant(participant: RemoteParticipant | string | null) {\n this.logger.debug({ participant }, 'setting participant audio input');\n const participantIdentity =\n participant instanceof RemoteParticipant ? participant.identity : participant;\n\n if (this.participantIdentity === participantIdentity) {\n return;\n }\n this.participantIdentity = participantIdentity;\n this.closeStream();\n\n if (!participantIdentity) {\n return;\n }\n\n const participantValue =\n participant instanceof RemoteParticipant\n ? participant\n : this.room.remoteParticipants.get(participantIdentity);\n\n // Convert Map iterator to array for Pino serialization\n const trackPublicationsArray = Array.from(participantValue?.trackPublications.values() ?? []);\n\n this.logger.info(\n {\n participantValue: participantValue?.identity,\n trackPublications: trackPublicationsArray,\n lengthOfTrackPublications: trackPublicationsArray.length,\n },\n 'participantValue.trackPublications',\n );\n // We need to check if the participant has a microphone track and subscribe to it\n // in case we miss the tracksubscribed event\n if (participantValue) {\n for (const publication of participantValue.trackPublications.values()) {\n if (publication.track && publication.source === TrackSource.SOURCE_MICROPHONE) {\n this.onTrackSubscribed(publication.track, publication, participantValue);\n break;\n }\n }\n }\n }\n\n private onTrackUnpublished = (\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) => {\n if (\n this.publication?.sid !== publication.sid ||\n participant.identity !== this.participantIdentity\n ) {\n return;\n }\n this.closeStream();\n\n // subscribe to the first available track\n for (const publication of participant.trackPublications.values()) {\n if (\n publication.track &&\n this.onTrackSubscribed(publication.track, publication, participant)\n ) {\n return;\n }\n }\n };\n\n private closeStream() {\n if (this.deferredStream.isSourceSet) {\n this.deferredStream.detachSource();\n }\n this.publication = null;\n }\n\n private onTrackSubscribed = (\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ): boolean => {\n this.logger.debug({ participant: participant.identity }, 'onTrackSubscribed in _input');\n if (\n this.participantIdentity !== participant.identity ||\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n (this.publication && this.publication.sid === publication.sid)\n ) {\n return false;\n }\n this.closeStream();\n this.publication = publication;\n this.deferredStream.setSource(\n resampleStream({\n stream: this.createStream(track),\n outputRate: this.sampleRate,\n }),\n );\n return true;\n };\n\n private createStream(track: RemoteTrack): ReadableStream<AudioFrame> {\n return new AudioStream(track, {\n sampleRate: this.sampleRate,\n numChannels: this.numChannels,\n noiseCancellation: this.noiseCancellation,\n // TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting\n }) as unknown as ReadableStream<AudioFrame>;\n }\n\n async close() {\n this.room.off(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.off(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n this.closeStream();\n this.deferredStream.stream.cancel();\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,sBASO;AAEP,iBAAoB;AACpB,mBAA+B;AAC/B,gBAA2B;AAEpB,MAAM,oCAAoC,qBAAW;AAAA,EAClD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAA6C;AAAA,EAC7C,sBAAqC;AAAA,EACrC,aAAS,gBAAI;AAAA,EACrB,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKG;AACD,UAAM;AACN,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,oBAAoB;AAEzB,SAAK,KAAK,GAAG,0BAAU,iBAAiB,KAAK,iBAAiB;AAC9D,SAAK,KAAK,GAAG,0BAAU,kBAAkB,KAAK,kBAAkB;AAAA,EAClE;AAAA,EAEA,eAAe,aAAgD;AAC7D,SAAK,OAAO,MAAM,EAAE,YAAY,GAAG,iCAAiC;AACpE,UAAM,sBACJ,uBAAuB,oCAAoB,YAAY,WAAW;AAEpE,QAAI,KAAK,wBAAwB,qBAAqB;AACpD;AAAA,IACF;AACA,SAAK,sBAAsB;AAC3B,SAAK,YAAY;AAEjB,QAAI,CAAC,qBAAqB;AACxB;AAAA,IACF;AAEA,UAAM,mBACJ,uBAAuB,oCACnB,cACA,KAAK,KAAK,mBAAmB,IAAI,mBAAmB;AAG1D,UAAM,yBAAyB,MAAM,MAAK,qDAAkB,kBAAkB,aAAY,CAAC,CAAC;AAE5F,SAAK,OAAO;AAAA,MACV;AAAA,QACE,kBAAkB,qDAAkB;AAAA,QACpC,mBAAmB;AAAA,QACnB,2BAA2B,uBAAuB;AAAA,MACpD;AAAA,MACA;AAAA,IACF;AAGA,QAAI,kBAAkB;AACpB,iBAAW,eAAe,iBAAiB,kBAAkB,OAAO,GAAG;AACrE,YAAI,YAAY,SAAS,YAAY,WAAW,4BAAY,mBAAmB;AAC7E,eAAK,kBAAkB,YAAY,OAAO,aAAa,gBAAgB;AACvE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAAqB,CAC3B,aACA,gBACG;AA9FP;AA+FI,UACE,UAAK,gBAAL,mBAAkB,SAAQ,YAAY,OACtC,YAAY,aAAa,KAAK,qBAC9B;AACA;AAAA,IACF;AACA,SAAK,YAAY;AAGjB,eAAWA,gBAAe,YAAY,kBAAkB,OAAO,GAAG;AAChE,UACEA,aAAY,SACZ,KAAK,kBAAkBA,aAAY,OAAOA,cAAa,WAAW,GAClE;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,QAAI,KAAK,eAAe,aAAa;AACnC,WAAK,eAAe,aAAa;AAAA,IACnC;AACA,SAAK,cAAc;AAAA,EACrB;AAAA,EAEQ,oBAAoB,CAC1B,OACA,aACA,gBACY;AACZ,SAAK,OAAO,MAAM,EAAE,aAAa,YAAY,SAAS,GAAG,6BAA6B;AACtF,QACE,KAAK,wBAAwB,YAAY,YACzC,YAAY,WAAW,4BAAY,qBAClC,KAAK,eAAe,KAAK,YAAY,QAAQ,YAAY,KAC1D;AACA,aAAO;AAAA,IACT;AACA,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,UAClB,6BAAe;AAAA,QACb,QAAQ,KAAK,aAAa,KAAK;AAAA,QAC/B,YAAY,KAAK;AAAA,MACnB,CAAC;AAAA,IACH;AACA,WAAO;AAAA,EACT;AAAA,EAEQ,aAAa,OAAgD;AACnE,WAAO,IAAI,4BAAY,OAAO;AAAA,MAC5B,YAAY,KAAK;AAAA,MACjB,aAAa,KAAK;AAAA,MAClB,mBAAmB,KAAK;AAAA;AAAA,IAE1B,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,KAAK,IAAI,0BAAU,iBAAiB,KAAK,iBAAiB;AAC/D,SAAK,KAAK,IAAI,0BAAU,kBAAkB,KAAK,kBAAkB;AACjE,SAAK,YAAY;AACjB,SAAK,eAAe,OAAO,OAAO;AAAA,EACpC;AACF;","names":["publication"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"_input.d.ts","sourceRoot":"","sources":["../../../src/voice/room_io/_input.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"_input.d.ts","sourceRoot":"","sources":["../../../src/voice/room_io/_input.ts"],"names":[],"mappings":"AAIA,OAAO,EAEL,KAAK,wBAAwB,EAC7B,iBAAiB,EAGjB,KAAK,IAAI,EAGV,MAAM,mBAAmB,CAAC;AAI3B,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAEtC,qBAAa,2BAA4B,SAAQ,UAAU;IACzD,OAAO,CAAC,IAAI,CAAO;IACnB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,iBAAiB,CAAC,CAA2B;IACrD,OAAO,CAAC,WAAW,CAAuC;IAC1D,OAAO,CAAC,mBAAmB,CAAuB;IAClD,OAAO,CAAC,MAAM,CAAS;gBACX,EACV,IAAI,EACJ,UAAU,EACV,WAAW,EACX,iBAAiB,GAClB,EAAE;QACD,IAAI,EAAE,IAAI,CAAC;QACX,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,MAAM,CAAC;QACpB,iBAAiB,CAAC,EAAE,wBAAwB,CAAC;KAC9C;IAWD,cAAc,CAAC,WAAW,EAAE,iBAAiB,GAAG,MAAM,GAAG,IAAI;IA2C7D,OAAO,CAAC,kBAAkB,CAqBxB;IAEF,OAAO,CAAC,WAAW;IAOnB,OAAO,CAAC,iBAAiB,CAsBvB;IAEF,OAAO,CAAC,YAAY;IASd,KAAK;CAMZ"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/voice/room_io/_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n
|
|
1
|
+
{"version":3,"sources":["../../../src/voice/room_io/_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport {\n AudioStream,\n type NoiseCancellationOptions,\n RemoteParticipant,\n type RemoteTrack,\n type RemoteTrackPublication,\n type Room,\n RoomEvent,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { resampleStream } from '../../utils.js';\nimport { AudioInput } from '../io.js';\n\nexport class ParticipantAudioInputStream extends AudioInput {\n private room: Room;\n private sampleRate: number;\n private numChannels: number;\n private noiseCancellation?: NoiseCancellationOptions;\n private publication: RemoteTrackPublication | null = null;\n private participantIdentity: string | null = null;\n private logger = log();\n constructor({\n room,\n sampleRate,\n numChannels,\n noiseCancellation,\n }: {\n room: Room;\n sampleRate: number;\n numChannels: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.room = room;\n this.sampleRate = sampleRate;\n this.numChannels = numChannels;\n this.noiseCancellation = noiseCancellation;\n\n this.room.on(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.on(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n }\n\n setParticipant(participant: RemoteParticipant | string | null) {\n this.logger.debug({ participant }, 'setting participant audio input');\n const participantIdentity =\n participant instanceof RemoteParticipant ? participant.identity : participant;\n\n if (this.participantIdentity === participantIdentity) {\n return;\n }\n this.participantIdentity = participantIdentity;\n this.closeStream();\n\n if (!participantIdentity) {\n return;\n }\n\n const participantValue =\n participant instanceof RemoteParticipant\n ? participant\n : this.room.remoteParticipants.get(participantIdentity);\n\n // Convert Map iterator to array for Pino serialization\n const trackPublicationsArray = Array.from(participantValue?.trackPublications.values() ?? []);\n\n this.logger.info(\n {\n participantValue: participantValue?.identity,\n trackPublications: trackPublicationsArray,\n lengthOfTrackPublications: trackPublicationsArray.length,\n },\n 'participantValue.trackPublications',\n );\n // We need to check if the participant has a microphone track and subscribe to it\n // in case we miss the tracksubscribed event\n if (participantValue) {\n for (const publication of participantValue.trackPublications.values()) {\n if (publication.track && publication.source === TrackSource.SOURCE_MICROPHONE) {\n this.onTrackSubscribed(publication.track, publication, participantValue);\n break;\n }\n }\n }\n }\n\n private onTrackUnpublished = (\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) => {\n if (\n this.publication?.sid !== publication.sid ||\n participant.identity !== this.participantIdentity\n ) {\n return;\n }\n this.closeStream();\n\n // subscribe to the first available track\n for (const publication of participant.trackPublications.values()) {\n if (\n publication.track &&\n this.onTrackSubscribed(publication.track, publication, participant)\n ) {\n return;\n }\n }\n };\n\n private closeStream() {\n if (this.deferredStream.isSourceSet) {\n this.deferredStream.detachSource();\n }\n this.publication = null;\n }\n\n private onTrackSubscribed = (\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ): boolean => {\n this.logger.debug({ participant: participant.identity }, 'onTrackSubscribed in _input');\n if (\n this.participantIdentity !== participant.identity ||\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n (this.publication && this.publication.sid === publication.sid)\n ) {\n return false;\n }\n this.closeStream();\n this.publication = publication;\n this.deferredStream.setSource(\n resampleStream({\n stream: this.createStream(track),\n outputRate: this.sampleRate,\n }),\n );\n return true;\n };\n\n private createStream(track: RemoteTrack): ReadableStream<AudioFrame> {\n return new AudioStream(track, {\n sampleRate: this.sampleRate,\n numChannels: this.numChannels,\n noiseCancellation: this.noiseCancellation,\n // TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting\n }) as unknown as ReadableStream<AudioFrame>;\n }\n\n async close() {\n this.room.off(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.off(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n this.closeStream();\n this.deferredStream.stream.cancel();\n }\n}\n"],"mappings":"AAIA;AAAA,EACE;AAAA,EAEA;AAAA,EAIA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,WAAW;AACpB,SAAS,sBAAsB;AAC/B,SAAS,kBAAkB;AAEpB,MAAM,oCAAoC,WAAW;AAAA,EAClD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAA6C;AAAA,EAC7C,sBAAqC;AAAA,EACrC,SAAS,IAAI;AAAA,EACrB,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKG;AACD,UAAM;AACN,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,oBAAoB;AAEzB,SAAK,KAAK,GAAG,UAAU,iBAAiB,KAAK,iBAAiB;AAC9D,SAAK,KAAK,GAAG,UAAU,kBAAkB,KAAK,kBAAkB;AAAA,EAClE;AAAA,EAEA,eAAe,aAAgD;AAC7D,SAAK,OAAO,MAAM,EAAE,YAAY,GAAG,iCAAiC;AACpE,UAAM,sBACJ,uBAAuB,oBAAoB,YAAY,WAAW;AAEpE,QAAI,KAAK,wBAAwB,qBAAqB;AACpD;AAAA,IACF;AACA,SAAK,sBAAsB;AAC3B,SAAK,YAAY;AAEjB,QAAI,CAAC,qBAAqB;AACxB;AAAA,IACF;AAEA,UAAM,mBACJ,uBAAuB,oBACnB,cACA,KAAK,KAAK,mBAAmB,IAAI,mBAAmB;AAG1D,UAAM,yBAAyB,MAAM,MAAK,qDAAkB,kBAAkB,aAAY,CAAC,CAAC;AAE5F,SAAK,OAAO;AAAA,MACV;AAAA,QACE,kBAAkB,qDAAkB;AAAA,QACpC,mBAAmB;AAAA,QACnB,2BAA2B,uBAAuB;AAAA,MACpD;AAAA,MACA;AAAA,IACF;AAGA,QAAI,kBAAkB;AACpB,iBAAW,eAAe,iBAAiB,kBAAkB,OAAO,GAAG;AACrE,YAAI,YAAY,SAAS,YAAY,WAAW,YAAY,mBAAmB;AAC7E,eAAK,kBAAkB,YAAY,OAAO,aAAa,gBAAgB;AACvE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAAqB,CAC3B,aACA,gBACG;AA9FP;AA+FI,UACE,UAAK,gBAAL,mBAAkB,SAAQ,YAAY,OACtC,YAAY,aAAa,KAAK,qBAC9B;AACA;AAAA,IACF;AACA,SAAK,YAAY;AAGjB,eAAWA,gBAAe,YAAY,kBAAkB,OAAO,GAAG;AAChE,UACEA,aAAY,SACZ,KAAK,kBAAkBA,aAAY,OAAOA,cAAa,WAAW,GAClE;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,QAAI,KAAK,eAAe,aAAa;AACnC,WAAK,eAAe,aAAa;AAAA,IACnC;AACA,SAAK,cAAc;AAAA,EACrB;AAAA,EAEQ,oBAAoB,CAC1B,OACA,aACA,gBACY;AACZ,SAAK,OAAO,MAAM,EAAE,aAAa,YAAY,SAAS,GAAG,6BAA6B;AACtF,QACE,KAAK,wBAAwB,YAAY,YACzC,YAAY,WAAW,YAAY,qBAClC,KAAK,eAAe,KAAK,YAAY,QAAQ,YAAY,KAC1D;AACA,aAAO;AAAA,IACT;AACA,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,MAClB,eAAe;AAAA,QACb,QAAQ,KAAK,aAAa,KAAK;AAAA,QAC/B,YAAY,KAAK;AAAA,MACnB,CAAC;AAAA,IACH;AACA,WAAO;AAAA,EACT;AAAA,EAEQ,aAAa,OAAgD;AACnE,WAAO,IAAI,YAAY,OAAO;AAAA,MAC5B,YAAY,KAAK;AAAA,MACjB,aAAa,KAAK;AAAA,MAClB,mBAAmB,KAAK;AAAA;AAAA,IAE1B,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,KAAK,IAAI,UAAU,iBAAiB,KAAK,iBAAiB;AAC/D,SAAK,KAAK,IAAI,UAAU,kBAAkB,KAAK,kBAAkB;AACjE,SAAK,YAAY;AACjB,SAAK,eAAe,OAAO,OAAO;AAAA,EACpC;AACF;","names":["publication"]}
|
package/dist/worker.cjs
CHANGED
|
@@ -28,7 +28,9 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
28
28
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
29
|
var worker_exports = {};
|
|
30
30
|
__export(worker_exports, {
|
|
31
|
+
AgentServer: () => AgentServer,
|
|
31
32
|
MissingCredentialsError: () => MissingCredentialsError,
|
|
33
|
+
ServerOptions: () => ServerOptions,
|
|
32
34
|
Worker: () => Worker,
|
|
33
35
|
WorkerError: () => WorkerError,
|
|
34
36
|
WorkerOptions: () => WorkerOptions,
|
|
@@ -127,7 +129,7 @@ class WorkerPermissions {
|
|
|
127
129
|
this.hidden = hidden;
|
|
128
130
|
}
|
|
129
131
|
}
|
|
130
|
-
class
|
|
132
|
+
class ServerOptions {
|
|
131
133
|
agent;
|
|
132
134
|
requestFunc;
|
|
133
135
|
loadFunc;
|
|
@@ -137,7 +139,7 @@ class WorkerOptions {
|
|
|
137
139
|
initializeProcessTimeout;
|
|
138
140
|
permissions;
|
|
139
141
|
agentName;
|
|
140
|
-
|
|
142
|
+
serverType;
|
|
141
143
|
maxRetry;
|
|
142
144
|
wsURL;
|
|
143
145
|
apiKey;
|
|
@@ -160,7 +162,7 @@ class WorkerOptions {
|
|
|
160
162
|
initializeProcessTimeout = 10 * 1e3,
|
|
161
163
|
permissions = new WorkerPermissions(),
|
|
162
164
|
agentName = "",
|
|
163
|
-
|
|
165
|
+
serverType = import_protocol.JobType.JT_ROOM,
|
|
164
166
|
maxRetry = MAX_RECONNECT_ATTEMPTS,
|
|
165
167
|
wsURL = "ws://localhost:7880",
|
|
166
168
|
apiKey = void 0,
|
|
@@ -185,7 +187,7 @@ class WorkerOptions {
|
|
|
185
187
|
this.initializeProcessTimeout = initializeProcessTimeout;
|
|
186
188
|
this.permissions = permissions;
|
|
187
189
|
this.agentName = agentName;
|
|
188
|
-
this.
|
|
190
|
+
this.serverType = serverType;
|
|
189
191
|
this.maxRetry = maxRetry;
|
|
190
192
|
this.wsURL = wsURL;
|
|
191
193
|
this.apiKey = apiKey;
|
|
@@ -207,7 +209,7 @@ class PendingAssignment {
|
|
|
207
209
|
arg;
|
|
208
210
|
}
|
|
209
211
|
}
|
|
210
|
-
class
|
|
212
|
+
class AgentServer {
|
|
211
213
|
#opts;
|
|
212
214
|
#procPool;
|
|
213
215
|
#id = "unregistered";
|
|
@@ -222,22 +224,22 @@ class Worker {
|
|
|
222
224
|
#httpServer;
|
|
223
225
|
#logger = (0, import_log.log)().child({ version: import_version.version });
|
|
224
226
|
#inferenceExecutor;
|
|
225
|
-
|
|
227
|
+
/* @throws {@link MissingCredentialsError} if URL, API key or API secret are missing */
|
|
226
228
|
constructor(opts) {
|
|
227
229
|
opts.wsURL = opts.wsURL || process.env.LIVEKIT_URL || "";
|
|
228
230
|
opts.apiKey = opts.apiKey || process.env.LIVEKIT_API_KEY || "";
|
|
229
231
|
opts.apiSecret = opts.apiSecret || process.env.LIVEKIT_API_SECRET || "";
|
|
230
232
|
if (opts.wsURL === "")
|
|
231
233
|
throw new MissingCredentialsError(
|
|
232
|
-
"URL is required: Set LIVEKIT_URL, run with --url, or pass wsURL in
|
|
234
|
+
"URL is required: Set LIVEKIT_URL, run with --url, or pass wsURL in ServerOptions"
|
|
233
235
|
);
|
|
234
236
|
if (opts.apiKey === "")
|
|
235
237
|
throw new MissingCredentialsError(
|
|
236
|
-
"API Key is required: Set LIVEKIT_API_KEY, run with --api-key, or pass apiKey in
|
|
238
|
+
"API Key is required: Set LIVEKIT_API_KEY, run with --api-key, or pass apiKey in ServerOptions"
|
|
237
239
|
);
|
|
238
240
|
if (opts.apiSecret === "")
|
|
239
241
|
throw new MissingCredentialsError(
|
|
240
|
-
"API Secret is required: Set LIVEKIT_API_SECRET, run with --api-secret, or pass apiSecret in
|
|
242
|
+
"API Secret is required: Set LIVEKIT_API_SECRET, run with --api-secret, or pass apiSecret in ServerOptions"
|
|
241
243
|
);
|
|
242
244
|
if (opts.workerToken) {
|
|
243
245
|
if (opts.loadFunc !== defaultCpuLoad) {
|
|
@@ -278,7 +280,7 @@ class Worker {
|
|
|
278
280
|
this.#opts = opts;
|
|
279
281
|
this.#httpServer = new import_http_server.HTTPServer(opts.host, opts.port, () => ({
|
|
280
282
|
agent_name: opts.agentName,
|
|
281
|
-
worker_type: import_protocol.JobType[opts.
|
|
283
|
+
worker_type: import_protocol.JobType[opts.serverType],
|
|
282
284
|
active_jobs: this.activeJobs.length,
|
|
283
285
|
sdk_version: import_version.version,
|
|
284
286
|
project_type: PROJECT_TYPE
|
|
@@ -511,7 +513,7 @@ class Worker {
|
|
|
511
513
|
message: {
|
|
512
514
|
case: "register",
|
|
513
515
|
value: {
|
|
514
|
-
type: this.#opts.
|
|
516
|
+
type: this.#opts.serverType,
|
|
515
517
|
agentName: this.#opts.agentName,
|
|
516
518
|
allowedPermissions: new import_protocol.ParticipantPermission({
|
|
517
519
|
canPublish: this.#opts.permissions.canPublish,
|
|
@@ -663,9 +665,13 @@ class Worker {
|
|
|
663
665
|
await this.#close.await;
|
|
664
666
|
}
|
|
665
667
|
}
|
|
668
|
+
const Worker = AgentServer;
|
|
669
|
+
const WorkerOptions = ServerOptions;
|
|
666
670
|
// Annotate the CommonJS export names for ESM import in node:
|
|
667
671
|
0 && (module.exports = {
|
|
672
|
+
AgentServer,
|
|
668
673
|
MissingCredentialsError,
|
|
674
|
+
ServerOptions,
|
|
669
675
|
Worker,
|
|
670
676
|
WorkerError,
|
|
671
677
|
WorkerOptions,
|