@livekit/agents 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/dist/cli.cjs +12 -12
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.cts +3 -3
  4. package/dist/cli.d.ts +3 -3
  5. package/dist/cli.d.ts.map +1 -1
  6. package/dist/cli.js +13 -13
  7. package/dist/cli.js.map +1 -1
  8. package/dist/inference/stt.cjs.map +1 -1
  9. package/dist/inference/stt.d.ts.map +1 -1
  10. package/dist/inference/stt.js +1 -1
  11. package/dist/inference/stt.js.map +1 -1
  12. package/dist/inference/tts.cjs.map +1 -1
  13. package/dist/inference/tts.d.cts +2 -1
  14. package/dist/inference/tts.d.ts +2 -1
  15. package/dist/inference/tts.d.ts.map +1 -1
  16. package/dist/inference/tts.js +1 -5
  17. package/dist/inference/tts.js.map +1 -1
  18. package/dist/llm/chat_context.cjs +78 -0
  19. package/dist/llm/chat_context.cjs.map +1 -1
  20. package/dist/llm/chat_context.d.cts +16 -0
  21. package/dist/llm/chat_context.d.ts +16 -0
  22. package/dist/llm/chat_context.d.ts.map +1 -1
  23. package/dist/llm/chat_context.js +78 -0
  24. package/dist/llm/chat_context.js.map +1 -1
  25. package/dist/llm/chat_context.test.cjs +531 -0
  26. package/dist/llm/chat_context.test.cjs.map +1 -1
  27. package/dist/llm/chat_context.test.js +531 -0
  28. package/dist/llm/chat_context.test.js.map +1 -1
  29. package/dist/llm/tool_context.cjs +40 -0
  30. package/dist/llm/tool_context.cjs.map +1 -1
  31. package/dist/llm/tool_context.d.cts +2 -0
  32. package/dist/llm/tool_context.d.ts +2 -0
  33. package/dist/llm/tool_context.d.ts.map +1 -1
  34. package/dist/llm/tool_context.js +38 -0
  35. package/dist/llm/tool_context.js.map +1 -1
  36. package/dist/metrics/base.cjs.map +1 -1
  37. package/dist/metrics/base.d.cts +7 -0
  38. package/dist/metrics/base.d.ts +7 -0
  39. package/dist/metrics/base.d.ts.map +1 -1
  40. package/dist/stt/stt.cjs +1 -1
  41. package/dist/stt/stt.cjs.map +1 -1
  42. package/dist/stt/stt.d.cts +7 -1
  43. package/dist/stt/stt.d.ts +7 -1
  44. package/dist/stt/stt.d.ts.map +1 -1
  45. package/dist/stt/stt.js +1 -1
  46. package/dist/stt/stt.js.map +1 -1
  47. package/dist/tts/tts.cjs +2 -4
  48. package/dist/tts/tts.cjs.map +1 -1
  49. package/dist/tts/tts.d.ts.map +1 -1
  50. package/dist/tts/tts.js +3 -5
  51. package/dist/tts/tts.js.map +1 -1
  52. package/dist/voice/agent_activity.cjs +83 -8
  53. package/dist/voice/agent_activity.cjs.map +1 -1
  54. package/dist/voice/agent_activity.d.cts +6 -2
  55. package/dist/voice/agent_activity.d.ts +6 -2
  56. package/dist/voice/agent_activity.d.ts.map +1 -1
  57. package/dist/voice/agent_activity.js +83 -8
  58. package/dist/voice/agent_activity.js.map +1 -1
  59. package/dist/voice/agent_session.cjs +3 -2
  60. package/dist/voice/agent_session.cjs.map +1 -1
  61. package/dist/voice/agent_session.d.cts +2 -1
  62. package/dist/voice/agent_session.d.ts +2 -1
  63. package/dist/voice/agent_session.d.ts.map +1 -1
  64. package/dist/voice/agent_session.js +3 -2
  65. package/dist/voice/agent_session.js.map +1 -1
  66. package/dist/voice/audio_recognition.cjs +138 -16
  67. package/dist/voice/audio_recognition.cjs.map +1 -1
  68. package/dist/voice/audio_recognition.d.cts +11 -0
  69. package/dist/voice/audio_recognition.d.ts +11 -0
  70. package/dist/voice/audio_recognition.d.ts.map +1 -1
  71. package/dist/voice/audio_recognition.js +138 -16
  72. package/dist/voice/audio_recognition.js.map +1 -1
  73. package/dist/voice/room_io/_input.cjs.map +1 -1
  74. package/dist/voice/room_io/_input.d.ts.map +1 -1
  75. package/dist/voice/room_io/_input.js +0 -1
  76. package/dist/voice/room_io/_input.js.map +1 -1
  77. package/dist/worker.cjs +17 -11
  78. package/dist/worker.cjs.map +1 -1
  79. package/dist/worker.d.cts +16 -9
  80. package/dist/worker.d.ts +16 -9
  81. package/dist/worker.d.ts.map +1 -1
  82. package/dist/worker.js +16 -12
  83. package/dist/worker.js.map +1 -1
  84. package/package.json +1 -1
  85. package/src/cli.ts +17 -17
  86. package/src/inference/stt.ts +2 -1
  87. package/src/inference/tts.ts +2 -5
  88. package/src/llm/chat_context.test.ts +607 -0
  89. package/src/llm/chat_context.ts +106 -0
  90. package/src/llm/tool_context.ts +44 -0
  91. package/src/metrics/base.ts +7 -0
  92. package/src/stt/stt.ts +8 -1
  93. package/src/tts/tts.ts +7 -5
  94. package/src/voice/agent_activity.ts +119 -9
  95. package/src/voice/agent_session.ts +3 -1
  96. package/src/voice/audio_recognition.ts +235 -57
  97. package/src/voice/room_io/_input.ts +1 -1
  98. package/src/worker.ts +29 -18
@@ -1 +1 @@
1
- {"version":3,"file":"audio_recognition.d.ts","sourceRoot":"","sources":["../../src/voice/audio_recognition.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAK1D,OAAO,EAAE,KAAK,WAAW,EAAmB,MAAM,eAAe,CAAC;AAElE,OAAO,EAAE,KAAK,GAAG,EAAE,KAAK,QAAQ,EAAgB,MAAM,WAAW,CAAC;AAClE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAEvC,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAC;IACtB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,eAAe,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACxC,kBAAkB,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC3C,aAAa,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACtC,mBAAmB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC/C,iBAAiB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC7C,WAAW,EAAE,CAAC,IAAI,EAAE,aAAa,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAEvD,eAAe,EAAE,MAAM,WAAW,CAAC;CACpC;AAED,MAAM,WAAW,aAAa;IAC5B,iBAAiB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;IACtE,gBAAgB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAC1D,gBAAgB,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACzD;AAED,MAAM,WAAW,uBAAuB;IACtC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,iBAAiB,CAAC,EAAE,OAAO,CAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;IAC9D,mBAAmB,EAAE,MAAM,CAAC;IAC5B,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,KAAK,CAAmB;IAChC,OAAO,CAAC,GAAG,CAAC,CAAU;IACtB,OAAO,CAAC,GAAG,CAAC,CAAM;IAClB,OAAO,CAAC,YAAY,CAAC,CAAgB;IACrC,OAAO,CAAC,iBAAiB,CAAC,CAA4C;IACtE,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,YAAY,CAAC,CAAS;IAE9B,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,uBAAuB,CAAK;IACpC,OAAO,CAAC,eAAe,CAAM;IAC7B,OAAO,CAAC,sBAAsB,CAAM;IACpC,OAAO,CAAC,gBAAgB,CAAK;IAC7B,OAAO,CAAC,iBAAiB,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,UAAU,CAAC,CAAS;IAE5B,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,qBAAqB,CAAuC;IACpE,OAAO,CAAC,kBAAkB,CAA0C;IAGpE,OAAO,CAAC,aAAa,CAAC,CAAa;IACnC,OAAO,CAAC,kBAAkB,CAAC,CAAa;IACxC,OAAO,CAAC,OAAO,CAAC,CAAa;IAC7B,OAAO,CAAC,OAAO,CAAC,CAAa;gBAEjB,IAAI,EAAE,uBAAuB;IAiBzC;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAK9B;IAEK,KAAK;YAYG,UAAU;IAiFxB,OAAO,CAAC,eAAe;YA4FT,aAAa;YAqDb,aAAa;IAyD3B,mBAAmB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAI3D,sBAAsB;IAItB,aAAa;IAab,cAAc,CAAC,aAAa,EAAE,OAAO;IA0C/B,KAAK;IAQX,OAAO,KAAK,oBAAoB,GAE/B;CACF"}
1
+ {"version":3,"file":"audio_recognition.d.ts","sourceRoot":"","sources":["../../src/voice/audio_recognition.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAK1D,OAAO,EAAE,KAAK,WAAW,EAAmB,MAAM,eAAe,CAAC;AAElE,OAAO,EAAE,KAAK,GAAG,EAAE,KAAK,QAAQ,EAAgB,MAAM,WAAW,CAAC;AAClE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAEvC,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAC;IACtB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,iBAAiB,EAAE,MAAM,GAAG,SAAS,CAAC;IACtC,iBAAiB,EAAE,MAAM,GAAG,SAAS,CAAC;CACvC;AAED,MAAM,WAAW,wBAAwB;IACvC,aAAa,EAAE,MAAM,CAAC;IACtB,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,gBAAgB;IAC/B,eAAe,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACxC,kBAAkB,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC3C,aAAa,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACtC,mBAAmB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC/C,iBAAiB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC7C,WAAW,EAAE,CAAC,IAAI,EAAE,aAAa,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IACvD,sBAAsB,EAAE,CAAC,IAAI,EAAE,wBAAwB,KAAK,IAAI,CAAC;IAEjE,eAAe,EAAE,MAAM,WAAW,CAAC;CACpC;AAED,MAAM,WAAW,aAAa;IAC5B,iBAAiB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;IACtE,gBAAgB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAC1D,gBAAgB,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACzD;AAED,MAAM,WAAW,uBAAuB;IACtC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,iBAAiB,CAAC,EAAE,OAAO,CAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;IAC9D,mBAAmB,EAAE,MAAM,CAAC;IAC5B,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,KAAK,CAAmB;IAChC,OAAO,CAAC,GAAG,CAAC,CAAU;IACtB,OAAO,CAAC,GAAG,CAAC,CAAM;IAClB,OAAO,CAAC,YAAY,CAAC,CAAgB;IACrC,OAAO,CAAC,iBAAiB,CAAC,CAA4C;IACtE,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,YAAY,CAAC,CAAS;IAE9B,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,uBAAuB,CAAK;IACpC,OAAO,CAAC,eAAe,CAAM;IAC7B,OAAO,CAAC,sBAAsB,CAAM;IACpC,OAAO,CAAC,wBAAwB,CAAM;IACtC,OAAO,CAAC,yBAAyB,CAAgB;IACjD,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,eAAe,CAAqB;IAC5C,OAAO,CAAC,iBAAiB,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,UAAU,CAAC,CAAS;IAE5B,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,qBAAqB,CAAuC;IACpE,OAAO,CAAC,kBAAkB,CAA0C;IAGpE,OAAO,CAAC,aAAa,CAAC,CAAa;IACnC,OAAO,CAAC,kBAAkB,CAAC,CAAa;IACxC,OAAO,CAAC,OAAO,CAAC,CAAa;IAC7B,OAAO,CAAC,OAAO,CAAC,CAAa;gBAEjB,IAAI,EAAE,uBAAuB;IAiBzC;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAK9B;IAEK,KAAK;YAYG,UAAU;IAgMxB,OAAO,CAAC,eAAe;YAyIT,aAAa;YAqDb,aAAa;IAiE3B,mBAAmB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAI3D,sBAAsB;IAItB,aAAa;IAeb,cAAc,CAAC,aAAa,EAAE,OAAO;IA0C/B,KAAK;IAQX,OAAO,KAAK,oBAAoB,GAE/B;CACF"}
@@ -22,7 +22,10 @@ class AudioRecognition {
22
22
  lastFinalTranscriptTime = 0;
23
23
  audioTranscript = "";
24
24
  audioInterimTranscript = "";
25
- lastSpeakingTime = 0;
25
+ audioPreflightTranscript = "";
26
+ finalTranscriptConfidence = [];
27
+ lastSpeakingTime;
28
+ speechStartTime;
26
29
  userTurnCommitted = false;
27
30
  speaking = false;
28
31
  sampleRate;
@@ -70,7 +73,7 @@ class AudioRecognition {
70
73
  });
71
74
  }
72
75
  async onSTTEvent(ev) {
73
- var _a, _b, _c, _d, _e, _f, _g, _h, _i;
76
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r;
74
77
  if (this.turnDetectionMode === "manual" && this.userTurnCommitted && (this.bounceEOUTask === void 0 || this.bounceEOUTask.done || ev.type == SpeechEventType.INTERIM_TRANSCRIPT)) {
75
78
  this.logger.debug(
76
79
  {
@@ -87,7 +90,8 @@ class AudioRecognition {
87
90
  case SpeechEventType.FINAL_TRANSCRIPT:
88
91
  this.hooks.onFinalTranscript(ev);
89
92
  const transcript = (_c = (_b = ev.alternatives) == null ? void 0 : _b[0]) == null ? void 0 : _c.text;
90
- this.lastLanguage = (_e = (_d = ev.alternatives) == null ? void 0 : _d[0]) == null ? void 0 : _e.language;
93
+ const confidence = ((_e = (_d = ev.alternatives) == null ? void 0 : _d[0]) == null ? void 0 : _e.confidence) ?? 0;
94
+ this.lastLanguage = (_g = (_f = ev.alternatives) == null ? void 0 : _f[0]) == null ? void 0 : _g.language;
91
95
  if (!transcript) {
92
96
  return;
93
97
  }
@@ -101,26 +105,112 @@ class AudioRecognition {
101
105
  this.lastFinalTranscriptTime = Date.now();
102
106
  this.audioTranscript += ` ${transcript}`;
103
107
  this.audioTranscript = this.audioTranscript.trimStart();
108
+ this.finalTranscriptConfidence.push(confidence);
109
+ const transcriptChanged = this.audioTranscript !== this.audioPreflightTranscript;
104
110
  this.audioInterimTranscript = "";
105
- if (!this.speaking) {
106
- if (!this.vad) {
107
- this.lastSpeakingTime = Date.now();
111
+ this.audioPreflightTranscript = "";
112
+ if (!this.vad || this.lastSpeakingTime === void 0) {
113
+ this.lastSpeakingTime = Date.now();
114
+ }
115
+ if (this.vadBaseTurnDetection || this.userTurnCommitted) {
116
+ if (transcriptChanged) {
117
+ this.logger.debug(
118
+ { transcript: this.audioTranscript },
119
+ "triggering preemptive generation (FINAL_TRANSCRIPT)"
120
+ );
121
+ this.hooks.onPreemptiveGeneration({
122
+ newTranscript: this.audioTranscript,
123
+ transcriptConfidence: this.finalTranscriptConfidence.length > 0 ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) / this.finalTranscriptConfidence.length : 0
124
+ });
108
125
  }
109
- if (this.vadBaseTurnDetection || this.userTurnCommitted) {
126
+ if (!this.speaking) {
110
127
  const chatCtx = this.hooks.retrieveChatCtx();
111
128
  this.logger.debug("running EOU detection on stt FINAL_TRANSCRIPT");
112
129
  this.runEOUDetection(chatCtx);
113
130
  }
114
131
  }
115
132
  break;
133
+ case SpeechEventType.PREFLIGHT_TRANSCRIPT:
134
+ this.hooks.onInterimTranscript(ev);
135
+ const preflightTranscript = ((_i = (_h = ev.alternatives) == null ? void 0 : _h[0]) == null ? void 0 : _i.text) ?? "";
136
+ const preflightConfidence = ((_k = (_j = ev.alternatives) == null ? void 0 : _j[0]) == null ? void 0 : _k.confidence) ?? 0;
137
+ const preflightLanguage = (_m = (_l = ev.alternatives) == null ? void 0 : _l[0]) == null ? void 0 : _m.language;
138
+ const MIN_LANGUAGE_DETECTION_LENGTH = 5;
139
+ if (!this.lastLanguage || preflightLanguage && preflightTranscript.length > MIN_LANGUAGE_DETECTION_LENGTH) {
140
+ this.lastLanguage = preflightLanguage;
141
+ }
142
+ if (!preflightTranscript) {
143
+ return;
144
+ }
145
+ this.logger.debug(
146
+ {
147
+ user_transcript: preflightTranscript,
148
+ language: this.lastLanguage
149
+ },
150
+ "received user preflight transcript"
151
+ );
152
+ this.lastFinalTranscriptTime = Date.now();
153
+ this.audioPreflightTranscript = `${this.audioTranscript} ${preflightTranscript}`.trimStart();
154
+ this.audioInterimTranscript = preflightTranscript;
155
+ if (!this.vad || this.lastSpeakingTime === void 0) {
156
+ this.lastSpeakingTime = Date.now();
157
+ }
158
+ if (this.turnDetectionMode !== "manual" || this.userTurnCommitted) {
159
+ const confidenceVals = [...this.finalTranscriptConfidence, preflightConfidence];
160
+ this.logger.debug(
161
+ {
162
+ transcript: this.audioPreflightTranscript.length > 100 ? this.audioPreflightTranscript.slice(0, 100) + "..." : this.audioPreflightTranscript
163
+ },
164
+ "triggering preemptive generation (PREFLIGHT_TRANSCRIPT)"
165
+ );
166
+ this.hooks.onPreemptiveGeneration({
167
+ newTranscript: this.audioPreflightTranscript,
168
+ transcriptConfidence: confidenceVals.length > 0 ? confidenceVals.reduce((a, b) => a + b, 0) / confidenceVals.length : 0
169
+ });
170
+ }
171
+ break;
116
172
  case SpeechEventType.INTERIM_TRANSCRIPT:
117
- this.logger.debug({ transcript: (_g = (_f = ev.alternatives) == null ? void 0 : _f[0]) == null ? void 0 : _g.text }, "interim transcript");
173
+ this.logger.debug({ transcript: (_o = (_n = ev.alternatives) == null ? void 0 : _n[0]) == null ? void 0 : _o.text }, "interim transcript");
118
174
  this.hooks.onInterimTranscript(ev);
119
- this.audioInterimTranscript = ((_i = (_h = ev.alternatives) == null ? void 0 : _h[0]) == null ? void 0 : _i.text) ?? "";
175
+ this.audioInterimTranscript = ((_q = (_p = ev.alternatives) == null ? void 0 : _p[0]) == null ? void 0 : _q.text) ?? "";
176
+ break;
177
+ case SpeechEventType.START_OF_SPEECH:
178
+ if (this.turnDetectionMode !== "stt") break;
179
+ this.hooks.onStartOfSpeech({
180
+ type: VADEventType.START_OF_SPEECH,
181
+ samplesIndex: 0,
182
+ timestamp: Date.now(),
183
+ speechDuration: 0,
184
+ silenceDuration: 0,
185
+ frames: [],
186
+ probability: 0,
187
+ inferenceDuration: 0,
188
+ speaking: true,
189
+ rawAccumulatedSilence: 0,
190
+ rawAccumulatedSpeech: 0
191
+ });
192
+ this.speaking = true;
193
+ this.lastSpeakingTime = Date.now();
194
+ (_r = this.bounceEOUTask) == null ? void 0 : _r.cancel();
120
195
  break;
121
196
  case SpeechEventType.END_OF_SPEECH:
122
197
  if (this.turnDetectionMode !== "stt") break;
198
+ this.hooks.onEndOfSpeech({
199
+ type: VADEventType.END_OF_SPEECH,
200
+ samplesIndex: 0,
201
+ timestamp: Date.now(),
202
+ speechDuration: 0,
203
+ silenceDuration: 0,
204
+ frames: [],
205
+ probability: 0,
206
+ inferenceDuration: 0,
207
+ speaking: false,
208
+ rawAccumulatedSilence: 0,
209
+ rawAccumulatedSpeech: 0
210
+ });
211
+ this.speaking = false;
123
212
  this.userTurnCommitted = true;
213
+ this.lastSpeakingTime = Date.now();
124
214
  if (!this.speaking) {
125
215
  const chatCtx = this.hooks.retrieveChatCtx();
126
216
  this.logger.debug("running EOU detection on stt END_OF_SPEECH");
@@ -148,7 +238,7 @@ class AudioRecognition {
148
238
  // disable EOU model if manual turn detection enabled
149
239
  this.audioTranscript && this.turnDetectionMode !== "manual" ? this.turnDetector : void 0
150
240
  );
151
- const bounceEOUTask = (lastSpeakingTime) => async (controller) => {
241
+ const bounceEOUTask = (lastSpeakingTime, lastFinalTranscriptTime, speechStartTime) => async (controller) => {
152
242
  let endpointingDelay = this.minEndpointingDelay;
153
243
  if (turnDetector) {
154
244
  this.logger.debug("Running turn detector model");
@@ -175,21 +265,46 @@ class AudioRecognition {
175
265
  }
176
266
  }
177
267
  }
178
- const extraSleep = lastSpeakingTime + endpointingDelay - Date.now();
179
- await delay(Math.max(extraSleep, 0), { signal: controller.signal });
268
+ let extraSleep = endpointingDelay;
269
+ if (lastSpeakingTime !== void 0) {
270
+ extraSleep += lastSpeakingTime - Date.now();
271
+ }
272
+ if (extraSleep > 0) {
273
+ await delay(Math.max(extraSleep, 0), { signal: controller.signal });
274
+ }
180
275
  this.logger.debug({ transcript: this.audioTranscript }, "end of user turn");
276
+ const confidenceAvg = this.finalTranscriptConfidence.length > 0 ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) / this.finalTranscriptConfidence.length : 0;
277
+ let startedSpeakingAt;
278
+ let stoppedSpeakingAt;
279
+ let transcriptionDelay;
280
+ let endOfUtteranceDelay;
281
+ if (lastFinalTranscriptTime !== 0 && lastSpeakingTime !== void 0 && speechStartTime !== void 0) {
282
+ startedSpeakingAt = speechStartTime;
283
+ stoppedSpeakingAt = lastSpeakingTime;
284
+ transcriptionDelay = Math.max(lastFinalTranscriptTime - lastSpeakingTime, 0);
285
+ endOfUtteranceDelay = Date.now() - lastSpeakingTime;
286
+ }
181
287
  const committed = await this.hooks.onEndOfTurn({
182
288
  newTranscript: this.audioTranscript,
183
- transcriptionDelay: Math.max(this.lastFinalTranscriptTime - lastSpeakingTime, 0),
184
- endOfUtteranceDelay: Date.now() - lastSpeakingTime
289
+ transcriptConfidence: confidenceAvg,
290
+ transcriptionDelay: transcriptionDelay ?? 0,
291
+ endOfUtteranceDelay: endOfUtteranceDelay ?? 0,
292
+ startedSpeakingAt,
293
+ stoppedSpeakingAt
185
294
  });
186
295
  if (committed) {
187
296
  this.audioTranscript = "";
297
+ this.finalTranscriptConfidence = [];
298
+ this.lastSpeakingTime = void 0;
299
+ this.lastFinalTranscriptTime = 0;
300
+ this.speechStartTime = void 0;
188
301
  }
189
302
  this.userTurnCommitted = false;
190
303
  };
191
304
  (_a = this.bounceEOUTask) == null ? void 0 : _a.cancel();
192
- this.bounceEOUTask = Task.from(bounceEOUTask(this.lastSpeakingTime));
305
+ this.bounceEOUTask = Task.from(
306
+ bounceEOUTask(this.lastSpeakingTime, this.lastFinalTranscriptTime, this.speechStartTime)
307
+ );
193
308
  this.bounceEOUTask.result.then(() => {
194
309
  this.logger.debug("EOU detection task completed");
195
310
  }).catch((err) => {
@@ -269,12 +384,17 @@ class AudioRecognition {
269
384
  break;
270
385
  case VADEventType.INFERENCE_DONE:
271
386
  this.hooks.onVADInferenceDone(ev);
387
+ if (ev.rawAccumulatedSpeech > 0) {
388
+ this.lastSpeakingTime = Date.now();
389
+ if (this.speechStartTime === void 0) {
390
+ this.speechStartTime = Date.now();
391
+ }
392
+ }
272
393
  break;
273
394
  case VADEventType.END_OF_SPEECH:
274
395
  this.logger.debug("VAD task: END_OF_SPEECH");
275
396
  this.hooks.onEndOfSpeech(ev);
276
397
  this.speaking = false;
277
- this.lastSpeakingTime = Date.now() - ev.silenceDuration;
278
398
  if (this.vadBaseTurnDetection || this.turnDetectionMode === "stt" && this.userTurnCommitted) {
279
399
  const chatCtx = this.hooks.retrieveChatCtx();
280
400
  this.runEOUDetection(chatCtx);
@@ -298,6 +418,8 @@ class AudioRecognition {
298
418
  var _a;
299
419
  this.audioTranscript = "";
300
420
  this.audioInterimTranscript = "";
421
+ this.audioPreflightTranscript = "";
422
+ this.finalTranscriptConfidence = [];
301
423
  this.userTurnCommitted = false;
302
424
  (_a = this.sttTask) == null ? void 0 : _a.cancelAndWait().finally(() => {
303
425
  this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private lastSpeakingTime = 0;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.audioInterimTranscript = '';\n\n if (!this.speaking) {\n if (!this.vad) {\n // Copied from python agents:\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for _last_speaking_time\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.userTurnCommitted = true;\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask = (lastSpeakingTime: number) => async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n // TODO(AJS-74): need to support actual turn detection model plugins for following code to run\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n const extraSleep = lastSpeakingTime + endpointingDelay - Date.now();\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptionDelay: Math.max(this.lastFinalTranscriptTime - lastSpeakingTime, 0),\n endOfUtteranceDelay: Date.now() - lastSpeakingTime,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n this.bounceEOUTask = Task.from(bounceEOUTask(this.lastSpeakingTime));\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n this.speaking = false;\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.lastSpeakingTime = Date.now() - ev.silenceDuration;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAGA,SAAS,kBAAkB;AAE3B,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AAqC/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AAxH5C;AAyHI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,UAAU;AAClB,cAAI,CAAC,KAAK,KAAK;AAOb,iBAAK,mBAAmB,KAAK,IAAI;AAAA,UACnC;AAEA,cAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,oBAAoB;AAEzB,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AAzMhD;AA0MI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBAAgB,CAAC,qBAA6B,OAAO,eAAgC;AACzF,UAAI,mBAAmB,KAAK;AAG5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,YAAM,aAAa,mBAAmB,mBAAmB,KAAK,IAAI;AAElE,YAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAElE,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,oBAAoB,KAAK,IAAI,KAAK,0BAA0B,kBAAkB,CAAC;AAAA,QAC/E,qBAAqB,KAAK,IAAI,IAAI;AAAA,MACpC,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGA,eAAK,kBAAL,mBAAoB;AACpB,SAAK,gBAAgB,KAAK,KAAK,cAAc,KAAK,gBAAgB,CAAC;AAEnE,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA1VzE;AA2VI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAChC;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAC3B,iBAAK,WAAW;AAEhB,iBAAK,mBAAmB,KAAK,IAAI,IAAI,GAAG;AAExC,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA3ZlB;AA4ZI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AAxazC;AAyaI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AAldhB;AAmdI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptConfidence: number;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n startedSpeakingAt: number | undefined;\n stoppedSpeakingAt: number | undefined;\n}\n\nexport interface PreemptiveGenerationInfo {\n newTranscript: string;\n transcriptConfidence: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private audioPreflightTranscript = '';\n private finalTranscriptConfidence: number[] = [];\n private lastSpeakingTime: number | undefined;\n private speechStartTime: number | undefined;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n const confidence = ev.alternatives?.[0]?.confidence ?? 0;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.finalTranscriptConfidence.push(confidence);\n const transcriptChanged = this.audioTranscript !== this.audioPreflightTranscript;\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for lastSpeakingTime\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n if (transcriptChanged) {\n this.logger.debug(\n { transcript: this.audioTranscript },\n 'triggering preemptive generation (FINAL_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioTranscript,\n transcriptConfidence:\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0,\n });\n }\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.PREFLIGHT_TRANSCRIPT:\n this.hooks.onInterimTranscript(ev);\n const preflightTranscript = ev.alternatives?.[0]?.text ?? '';\n const preflightConfidence = ev.alternatives?.[0]?.confidence ?? 0;\n const preflightLanguage = ev.alternatives?.[0]?.language;\n\n const MIN_LANGUAGE_DETECTION_LENGTH = 5;\n if (\n !this.lastLanguage ||\n (preflightLanguage && preflightTranscript.length > MIN_LANGUAGE_DETECTION_LENGTH)\n ) {\n this.lastLanguage = preflightLanguage;\n }\n\n if (!preflightTranscript) {\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: preflightTranscript,\n language: this.lastLanguage,\n },\n 'received user preflight transcript',\n );\n\n // still need to increment it as it's used for turn detection,\n this.lastFinalTranscriptTime = Date.now();\n // preflight transcript includes all pre-committed transcripts (including final transcript from the previous STT run)\n this.audioPreflightTranscript =\n `${this.audioTranscript} ${preflightTranscript}`.trimStart();\n this.audioInterimTranscript = preflightTranscript;\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.turnDetectionMode !== 'manual' || this.userTurnCommitted) {\n const confidenceVals = [...this.finalTranscriptConfidence, preflightConfidence];\n this.logger.debug(\n {\n transcript:\n this.audioPreflightTranscript.length > 100\n ? this.audioPreflightTranscript.slice(0, 100) + '...'\n : this.audioPreflightTranscript,\n },\n 'triggering preemptive generation (PREFLIGHT_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioPreflightTranscript,\n transcriptConfidence:\n confidenceVals.length > 0\n ? confidenceVals.reduce((a, b) => a + b, 0) / confidenceVals.length\n : 0,\n });\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.START_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.hooks.onStartOfSpeech({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: true,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n this.speaking = true;\n this.lastSpeakingTime = Date.now();\n\n this.bounceEOUTask?.cancel();\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.hooks.onEndOfSpeech({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: false,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n this.speaking = false;\n this.userTurnCommitted = true;\n this.lastSpeakingTime = Date.now();\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask =\n (\n lastSpeakingTime: number | undefined,\n lastFinalTranscriptTime: number,\n speechStartTime: number | undefined,\n ) =>\n async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n let extraSleep = endpointingDelay;\n if (lastSpeakingTime !== undefined) {\n extraSleep += lastSpeakingTime - Date.now();\n }\n\n if (extraSleep > 0) {\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n }\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const confidenceAvg =\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0;\n\n let startedSpeakingAt: number | undefined;\n let stoppedSpeakingAt: number | undefined;\n let transcriptionDelay: number | undefined;\n let endOfUtteranceDelay: number | undefined;\n\n // sometimes, we can't calculate the metrics because VAD was unreliable.\n // in this case, we just ignore the calculation, it's better than providing likely wrong values\n if (\n lastFinalTranscriptTime !== 0 &&\n lastSpeakingTime !== undefined &&\n speechStartTime !== undefined\n ) {\n startedSpeakingAt = speechStartTime;\n stoppedSpeakingAt = lastSpeakingTime;\n transcriptionDelay = Math.max(lastFinalTranscriptTime - lastSpeakingTime, 0);\n endOfUtteranceDelay = Date.now() - lastSpeakingTime;\n }\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptConfidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n startedSpeakingAt,\n stoppedSpeakingAt,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n this.finalTranscriptConfidence = [];\n this.lastSpeakingTime = undefined;\n this.lastFinalTranscriptTime = 0;\n this.speechStartTime = undefined;\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n // copy the values before awaiting (the values can change)\n this.bounceEOUTask = Task.from(\n bounceEOUTask(this.lastSpeakingTime, this.lastFinalTranscriptTime, this.speechStartTime),\n );\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n // for metrics, get the \"earliest\" signal of speech as possible\n if (ev.rawAccumulatedSpeech > 0.0) {\n this.lastSpeakingTime = Date.now();\n\n if (this.speechStartTime === undefined) {\n this.speechStartTime = Date.now();\n }\n }\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.speaking = false;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n this.finalTranscriptConfidence = [];\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAGA,SAAS,kBAAkB;AAE3B,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AA8C/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,2BAA2B;AAAA,EAC3B,4BAAsC,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AApI5C;AAqII,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,cAAM,eAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AACvD,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,0BAA0B,KAAK,UAAU;AAC9C,cAAM,oBAAoB,KAAK,oBAAoB,KAAK;AACxD,aAAK,yBAAyB;AAC9B,aAAK,2BAA2B;AAEhC,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAMpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,cAAI,mBAAmB;AACrB,iBAAK,OAAO;AAAA,cACV,EAAE,YAAY,KAAK,gBAAgB;AAAA,cACnC;AAAA,YACF;AACA,iBAAK,MAAM,uBAAuB;AAAA,cAChC,eAAe,KAAK;AAAA,cACpB,sBACE,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAAA,YACR,CAAC;AAAA,UACH;AAEA,cAAI,CAAC,KAAK,UAAU;AAClB,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,MAAM,oBAAoB,EAAE;AACjC,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC1D,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AAChE,cAAM,qBAAoB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAEhD,cAAM,gCAAgC;AACtC,YACE,CAAC,KAAK,gBACL,qBAAqB,oBAAoB,SAAS,+BACnD;AACA,eAAK,eAAe;AAAA,QACtB;AAEA,YAAI,CAAC,qBAAqB;AACxB;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAGA,aAAK,0BAA0B,KAAK,IAAI;AAExC,aAAK,2BACH,GAAG,KAAK,eAAe,IAAI,mBAAmB,GAAG,UAAU;AAC7D,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAEpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,sBAAsB,YAAY,KAAK,mBAAmB;AACjE,gBAAM,iBAAiB,CAAC,GAAG,KAAK,2BAA2B,mBAAmB;AAC9E,eAAK,OAAO;AAAA,YACV;AAAA,cACE,YACE,KAAK,yBAAyB,SAAS,MACnC,KAAK,yBAAyB,MAAM,GAAG,GAAG,IAAI,QAC9C,KAAK;AAAA,YACb;AAAA,YACA;AAAA,UACF;AACA,eAAK,MAAM,uBAAuB;AAAA,YAChC,eAAe,KAAK;AAAA,YACpB,sBACE,eAAe,SAAS,IACpB,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,SAC3D;AAAA,UACR,CAAC;AAAA,QACH;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,MAAM,gBAAgB;AAAA,UACzB,MAAM,aAAa;AAAA,UACnB,cAAc;AAAA,UACd,WAAW,KAAK,IAAI;AAAA,UACpB,gBAAgB;AAAA,UAChB,iBAAiB;AAAA,UACjB,QAAQ,CAAC;AAAA,UACT,aAAa;AAAA,UACb,mBAAmB;AAAA,UACnB,UAAU;AAAA,UACV,uBAAuB;AAAA,UACvB,sBAAsB;AAAA,QACxB,CAAC;AACD,aAAK,WAAW;AAChB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,mBAAK,kBAAL,mBAAoB;AACpB;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,MAAM,cAAc;AAAA,UACvB,MAAM,aAAa;AAAA,UACnB,cAAc;AAAA,UACd,WAAW,KAAK,IAAI;AAAA,UACpB,gBAAgB;AAAA,UAChB,iBAAiB;AAAA,UACjB,QAAQ,CAAC;AAAA,UACT,aAAa;AAAA,UACb,mBAAmB;AAAA,UACnB,UAAU;AAAA,UACV,uBAAuB;AAAA,UACvB,sBAAsB;AAAA,QACxB,CAAC;AACD,aAAK,WAAW;AAChB,aAAK,oBAAoB;AACzB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AApUhD;AAqUI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBACJ,CACE,kBACA,yBACA,oBAEF,OAAO,eAAgC;AACrC,UAAI,mBAAmB,KAAK;AAE5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,UAAI,aAAa;AACjB,UAAI,qBAAqB,QAAW;AAClC,sBAAc,mBAAmB,KAAK,IAAI;AAAA,MAC5C;AAEA,UAAI,aAAa,GAAG;AAElB,cAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MACpE;AAEA,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,gBACJ,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAEN,UAAI;AACJ,UAAI;AACJ,UAAI;AACJ,UAAI;AAIJ,UACE,4BAA4B,KAC5B,qBAAqB,UACrB,oBAAoB,QACpB;AACA,4BAAoB;AACpB,4BAAoB;AACpB,6BAAqB,KAAK,IAAI,0BAA0B,kBAAkB,CAAC;AAC3E,8BAAsB,KAAK,IAAI,IAAI;AAAA,MACrC;AAEA,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,sBAAsB;AAAA,QACtB,oBAAoB,sBAAsB;AAAA,QAC1C,qBAAqB,uBAAuB;AAAA,QAC5C;AAAA,QACA;AAAA,MACF,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AACvB,aAAK,4BAA4B,CAAC;AAClC,aAAK,mBAAmB;AACxB,aAAK,0BAA0B;AAC/B,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,kBAAL,mBAAoB;AAEpB,SAAK,gBAAgB,KAAK;AAAA,MACxB,cAAc,KAAK,kBAAkB,KAAK,yBAAyB,KAAK,eAAe;AAAA,IACzF;AAEA,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AAlgBzE;AAmgBI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAEhC,gBAAI,GAAG,uBAAuB,GAAK;AACjC,mBAAK,mBAAmB,KAAK,IAAI;AAEjC,kBAAI,KAAK,oBAAoB,QAAW;AACtC,qBAAK,kBAAkB,KAAK,IAAI;AAAA,cAClC;AAAA,YACF;AACA;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAG3B,iBAAK,WAAW;AAEhB,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA3kBlB;AA4kBI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,2BAA2B;AAChC,SAAK,4BAA4B,CAAC;AAClC,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AA1lBzC;AA2lBI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AApoBhB;AAqoBI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../src/voice/room_io/_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AudioFrame,\n AudioStream,\n type NoiseCancellationOptions,\n RemoteParticipant,\n type RemoteTrack,\n type RemoteTrackPublication,\n type Room,\n RoomEvent,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { resampleStream } from '../../utils.js';\nimport { AudioInput } from '../io.js';\n\nexport class ParticipantAudioInputStream extends AudioInput {\n private room: Room;\n private sampleRate: number;\n private numChannels: number;\n private noiseCancellation?: NoiseCancellationOptions;\n private publication: RemoteTrackPublication | null = null;\n private participantIdentity: string | null = null;\n private logger = log();\n constructor({\n room,\n sampleRate,\n numChannels,\n noiseCancellation,\n }: {\n room: Room;\n sampleRate: number;\n numChannels: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.room = room;\n this.sampleRate = sampleRate;\n this.numChannels = numChannels;\n this.noiseCancellation = noiseCancellation;\n\n this.room.on(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.on(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n }\n\n setParticipant(participant: RemoteParticipant | string | null) {\n this.logger.debug({ participant }, 'setting participant audio input');\n const participantIdentity =\n participant instanceof RemoteParticipant ? participant.identity : participant;\n\n if (this.participantIdentity === participantIdentity) {\n return;\n }\n this.participantIdentity = participantIdentity;\n this.closeStream();\n\n if (!participantIdentity) {\n return;\n }\n\n const participantValue =\n participant instanceof RemoteParticipant\n ? participant\n : this.room.remoteParticipants.get(participantIdentity);\n\n // Convert Map iterator to array for Pino serialization\n const trackPublicationsArray = Array.from(participantValue?.trackPublications.values() ?? []);\n\n this.logger.info(\n {\n participantValue: participantValue?.identity,\n trackPublications: trackPublicationsArray,\n lengthOfTrackPublications: trackPublicationsArray.length,\n },\n 'participantValue.trackPublications',\n );\n // We need to check if the participant has a microphone track and subscribe to it\n // in case we miss the tracksubscribed event\n if (participantValue) {\n for (const publication of participantValue.trackPublications.values()) {\n if (publication.track && publication.source === TrackSource.SOURCE_MICROPHONE) {\n this.onTrackSubscribed(publication.track, publication, participantValue);\n break;\n }\n }\n }\n }\n\n private onTrackUnpublished = (\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) => {\n if (\n this.publication?.sid !== publication.sid ||\n participant.identity !== this.participantIdentity\n ) {\n return;\n }\n this.closeStream();\n\n // subscribe to the first available track\n for (const publication of participant.trackPublications.values()) {\n if (\n publication.track &&\n this.onTrackSubscribed(publication.track, publication, participant)\n ) {\n return;\n }\n }\n };\n\n private closeStream() {\n if (this.deferredStream.isSourceSet) {\n this.deferredStream.detachSource();\n }\n this.publication = null;\n }\n\n private onTrackSubscribed = (\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ): boolean => {\n this.logger.debug({ participant: participant.identity }, 'onTrackSubscribed in _input');\n if (\n this.participantIdentity !== participant.identity ||\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n (this.publication && this.publication.sid === publication.sid)\n ) {\n return false;\n }\n this.closeStream();\n this.publication = publication;\n this.deferredStream.setSource(\n resampleStream({\n stream: this.createStream(track),\n outputRate: this.sampleRate,\n }),\n );\n return true;\n };\n\n private createStream(track: RemoteTrack): ReadableStream<AudioFrame> {\n return new AudioStream(track, {\n sampleRate: this.sampleRate,\n numChannels: this.numChannels,\n noiseCancellation: this.noiseCancellation,\n // TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting\n }) as unknown as ReadableStream<AudioFrame>;\n }\n\n async close() {\n this.room.off(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.off(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n this.closeStream();\n this.deferredStream.stream.cancel();\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAUO;AAEP,iBAAoB;AACpB,mBAA+B;AAC/B,gBAA2B;AAEpB,MAAM,oCAAoC,qBAAW;AAAA,EAClD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAA6C;AAAA,EAC7C,sBAAqC;AAAA,EACrC,aAAS,gBAAI;AAAA,EACrB,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKG;AACD,UAAM;AACN,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,oBAAoB;AAEzB,SAAK,KAAK,GAAG,0BAAU,iBAAiB,KAAK,iBAAiB;AAC9D,SAAK,KAAK,GAAG,0BAAU,kBAAkB,KAAK,kBAAkB;AAAA,EAClE;AAAA,EAEA,eAAe,aAAgD;AAC7D,SAAK,OAAO,MAAM,EAAE,YAAY,GAAG,iCAAiC;AACpE,UAAM,sBACJ,uBAAuB,oCAAoB,YAAY,WAAW;AAEpE,QAAI,KAAK,wBAAwB,qBAAqB;AACpD;AAAA,IACF;AACA,SAAK,sBAAsB;AAC3B,SAAK,YAAY;AAEjB,QAAI,CAAC,qBAAqB;AACxB;AAAA,IACF;AAEA,UAAM,mBACJ,uBAAuB,oCACnB,cACA,KAAK,KAAK,mBAAmB,IAAI,mBAAmB;AAG1D,UAAM,yBAAyB,MAAM,MAAK,qDAAkB,kBAAkB,aAAY,CAAC,CAAC;AAE5F,SAAK,OAAO;AAAA,MACV;AAAA,QACE,kBAAkB,qDAAkB;AAAA,QACpC,mBAAmB;AAAA,QACnB,2BAA2B,uBAAuB;AAAA,MACpD;AAAA,MACA;AAAA,IACF;AAGA,QAAI,kBAAkB;AACpB,iBAAW,eAAe,iBAAiB,kBAAkB,OAAO,GAAG;AACrE,YAAI,YAAY,SAAS,YAAY,WAAW,4BAAY,mBAAmB;AAC7E,eAAK,kBAAkB,YAAY,OAAO,aAAa,gBAAgB;AACvE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAAqB,CAC3B,aACA,gBACG;AA9FP;AA+FI,UACE,UAAK,gBAAL,mBAAkB,SAAQ,YAAY,OACtC,YAAY,aAAa,KAAK,qBAC9B;AACA;AAAA,IACF;AACA,SAAK,YAAY;AAGjB,eAAWA,gBAAe,YAAY,kBAAkB,OAAO,GAAG;AAChE,UACEA,aAAY,SACZ,KAAK,kBAAkBA,aAAY,OAAOA,cAAa,WAAW,GAClE;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,QAAI,KAAK,eAAe,aAAa;AACnC,WAAK,eAAe,aAAa;AAAA,IACnC;AACA,SAAK,cAAc;AAAA,EACrB;AAAA,EAEQ,oBAAoB,CAC1B,OACA,aACA,gBACY;AACZ,SAAK,OAAO,MAAM,EAAE,aAAa,YAAY,SAAS,GAAG,6BAA6B;AACtF,QACE,KAAK,wBAAwB,YAAY,YACzC,YAAY,WAAW,4BAAY,qBAClC,KAAK,eAAe,KAAK,YAAY,QAAQ,YAAY,KAC1D;AACA,aAAO;AAAA,IACT;AACA,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,UAClB,6BAAe;AAAA,QACb,QAAQ,KAAK,aAAa,KAAK;AAAA,QAC/B,YAAY,KAAK;AAAA,MACnB,CAAC;AAAA,IACH;AACA,WAAO;AAAA,EACT;AAAA,EAEQ,aAAa,OAAgD;AACnE,WAAO,IAAI,4BAAY,OAAO;AAAA,MAC5B,YAAY,KAAK;AAAA,MACjB,aAAa,KAAK;AAAA,MAClB,mBAAmB,KAAK;AAAA;AAAA,IAE1B,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,KAAK,IAAI,0BAAU,iBAAiB,KAAK,iBAAiB;AAC/D,SAAK,KAAK,IAAI,0BAAU,kBAAkB,KAAK,kBAAkB;AACjE,SAAK,YAAY;AACjB,SAAK,eAAe,OAAO,OAAO;AAAA,EACpC;AACF;","names":["publication"]}
1
+ {"version":3,"sources":["../../../src/voice/room_io/_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport {\n AudioStream,\n type NoiseCancellationOptions,\n RemoteParticipant,\n type RemoteTrack,\n type RemoteTrackPublication,\n type Room,\n RoomEvent,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { resampleStream } from '../../utils.js';\nimport { AudioInput } from '../io.js';\n\nexport class ParticipantAudioInputStream extends AudioInput {\n private room: Room;\n private sampleRate: number;\n private numChannels: number;\n private noiseCancellation?: NoiseCancellationOptions;\n private publication: RemoteTrackPublication | null = null;\n private participantIdentity: string | null = null;\n private logger = log();\n constructor({\n room,\n sampleRate,\n numChannels,\n noiseCancellation,\n }: {\n room: Room;\n sampleRate: number;\n numChannels: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.room = room;\n this.sampleRate = sampleRate;\n this.numChannels = numChannels;\n this.noiseCancellation = noiseCancellation;\n\n this.room.on(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.on(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n }\n\n setParticipant(participant: RemoteParticipant | string | null) {\n this.logger.debug({ participant }, 'setting participant audio input');\n const participantIdentity =\n participant instanceof RemoteParticipant ? participant.identity : participant;\n\n if (this.participantIdentity === participantIdentity) {\n return;\n }\n this.participantIdentity = participantIdentity;\n this.closeStream();\n\n if (!participantIdentity) {\n return;\n }\n\n const participantValue =\n participant instanceof RemoteParticipant\n ? participant\n : this.room.remoteParticipants.get(participantIdentity);\n\n // Convert Map iterator to array for Pino serialization\n const trackPublicationsArray = Array.from(participantValue?.trackPublications.values() ?? []);\n\n this.logger.info(\n {\n participantValue: participantValue?.identity,\n trackPublications: trackPublicationsArray,\n lengthOfTrackPublications: trackPublicationsArray.length,\n },\n 'participantValue.trackPublications',\n );\n // We need to check if the participant has a microphone track and subscribe to it\n // in case we miss the tracksubscribed event\n if (participantValue) {\n for (const publication of participantValue.trackPublications.values()) {\n if (publication.track && publication.source === TrackSource.SOURCE_MICROPHONE) {\n this.onTrackSubscribed(publication.track, publication, participantValue);\n break;\n }\n }\n }\n }\n\n private onTrackUnpublished = (\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) => {\n if (\n this.publication?.sid !== publication.sid ||\n participant.identity !== this.participantIdentity\n ) {\n return;\n }\n this.closeStream();\n\n // subscribe to the first available track\n for (const publication of participant.trackPublications.values()) {\n if (\n publication.track &&\n this.onTrackSubscribed(publication.track, publication, participant)\n ) {\n return;\n }\n }\n };\n\n private closeStream() {\n if (this.deferredStream.isSourceSet) {\n this.deferredStream.detachSource();\n }\n this.publication = null;\n }\n\n private onTrackSubscribed = (\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ): boolean => {\n this.logger.debug({ participant: participant.identity }, 'onTrackSubscribed in _input');\n if (\n this.participantIdentity !== participant.identity ||\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n (this.publication && this.publication.sid === publication.sid)\n ) {\n return false;\n }\n this.closeStream();\n this.publication = publication;\n this.deferredStream.setSource(\n resampleStream({\n stream: this.createStream(track),\n outputRate: this.sampleRate,\n }),\n );\n return true;\n };\n\n private createStream(track: RemoteTrack): ReadableStream<AudioFrame> {\n return new AudioStream(track, {\n sampleRate: this.sampleRate,\n numChannels: this.numChannels,\n noiseCancellation: this.noiseCancellation,\n // TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting\n }) as unknown as ReadableStream<AudioFrame>;\n }\n\n async close() {\n this.room.off(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.off(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n this.closeStream();\n this.deferredStream.stream.cancel();\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,sBASO;AAEP,iBAAoB;AACpB,mBAA+B;AAC/B,gBAA2B;AAEpB,MAAM,oCAAoC,qBAAW;AAAA,EAClD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAA6C;AAAA,EAC7C,sBAAqC;AAAA,EACrC,aAAS,gBAAI;AAAA,EACrB,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKG;AACD,UAAM;AACN,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,oBAAoB;AAEzB,SAAK,KAAK,GAAG,0BAAU,iBAAiB,KAAK,iBAAiB;AAC9D,SAAK,KAAK,GAAG,0BAAU,kBAAkB,KAAK,kBAAkB;AAAA,EAClE;AAAA,EAEA,eAAe,aAAgD;AAC7D,SAAK,OAAO,MAAM,EAAE,YAAY,GAAG,iCAAiC;AACpE,UAAM,sBACJ,uBAAuB,oCAAoB,YAAY,WAAW;AAEpE,QAAI,KAAK,wBAAwB,qBAAqB;AACpD;AAAA,IACF;AACA,SAAK,sBAAsB;AAC3B,SAAK,YAAY;AAEjB,QAAI,CAAC,qBAAqB;AACxB;AAAA,IACF;AAEA,UAAM,mBACJ,uBAAuB,oCACnB,cACA,KAAK,KAAK,mBAAmB,IAAI,mBAAmB;AAG1D,UAAM,yBAAyB,MAAM,MAAK,qDAAkB,kBAAkB,aAAY,CAAC,CAAC;AAE5F,SAAK,OAAO;AAAA,MACV;AAAA,QACE,kBAAkB,qDAAkB;AAAA,QACpC,mBAAmB;AAAA,QACnB,2BAA2B,uBAAuB;AAAA,MACpD;AAAA,MACA;AAAA,IACF;AAGA,QAAI,kBAAkB;AACpB,iBAAW,eAAe,iBAAiB,kBAAkB,OAAO,GAAG;AACrE,YAAI,YAAY,SAAS,YAAY,WAAW,4BAAY,mBAAmB;AAC7E,eAAK,kBAAkB,YAAY,OAAO,aAAa,gBAAgB;AACvE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAAqB,CAC3B,aACA,gBACG;AA9FP;AA+FI,UACE,UAAK,gBAAL,mBAAkB,SAAQ,YAAY,OACtC,YAAY,aAAa,KAAK,qBAC9B;AACA;AAAA,IACF;AACA,SAAK,YAAY;AAGjB,eAAWA,gBAAe,YAAY,kBAAkB,OAAO,GAAG;AAChE,UACEA,aAAY,SACZ,KAAK,kBAAkBA,aAAY,OAAOA,cAAa,WAAW,GAClE;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,QAAI,KAAK,eAAe,aAAa;AACnC,WAAK,eAAe,aAAa;AAAA,IACnC;AACA,SAAK,cAAc;AAAA,EACrB;AAAA,EAEQ,oBAAoB,CAC1B,OACA,aACA,gBACY;AACZ,SAAK,OAAO,MAAM,EAAE,aAAa,YAAY,SAAS,GAAG,6BAA6B;AACtF,QACE,KAAK,wBAAwB,YAAY,YACzC,YAAY,WAAW,4BAAY,qBAClC,KAAK,eAAe,KAAK,YAAY,QAAQ,YAAY,KAC1D;AACA,aAAO;AAAA,IACT;AACA,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,UAClB,6BAAe;AAAA,QACb,QAAQ,KAAK,aAAa,KAAK;AAAA,QAC/B,YAAY,KAAK;AAAA,MACnB,CAAC;AAAA,IACH;AACA,WAAO;AAAA,EACT;AAAA,EAEQ,aAAa,OAAgD;AACnE,WAAO,IAAI,4BAAY,OAAO;AAAA,MAC5B,YAAY,KAAK;AAAA,MACjB,aAAa,KAAK;AAAA,MAClB,mBAAmB,KAAK;AAAA;AAAA,IAE1B,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,KAAK,IAAI,0BAAU,iBAAiB,KAAK,iBAAiB;AAC/D,SAAK,KAAK,IAAI,0BAAU,kBAAkB,KAAK,kBAAkB;AACjE,SAAK,YAAY;AACjB,SAAK,eAAe,OAAO,OAAO;AAAA,EACpC;AACF;","names":["publication"]}
@@ -1 +1 @@
1
- {"version":3,"file":"_input.d.ts","sourceRoot":"","sources":["../../../src/voice/room_io/_input.ts"],"names":[],"mappings":"AAGA,OAAO,EAGL,KAAK,wBAAwB,EAC7B,iBAAiB,EAGjB,KAAK,IAAI,EAGV,MAAM,mBAAmB,CAAC;AAI3B,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAEtC,qBAAa,2BAA4B,SAAQ,UAAU;IACzD,OAAO,CAAC,IAAI,CAAO;IACnB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,iBAAiB,CAAC,CAA2B;IACrD,OAAO,CAAC,WAAW,CAAuC;IAC1D,OAAO,CAAC,mBAAmB,CAAuB;IAClD,OAAO,CAAC,MAAM,CAAS;gBACX,EACV,IAAI,EACJ,UAAU,EACV,WAAW,EACX,iBAAiB,GAClB,EAAE;QACD,IAAI,EAAE,IAAI,CAAC;QACX,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,MAAM,CAAC;QACpB,iBAAiB,CAAC,EAAE,wBAAwB,CAAC;KAC9C;IAWD,cAAc,CAAC,WAAW,EAAE,iBAAiB,GAAG,MAAM,GAAG,IAAI;IA2C7D,OAAO,CAAC,kBAAkB,CAqBxB;IAEF,OAAO,CAAC,WAAW;IAOnB,OAAO,CAAC,iBAAiB,CAsBvB;IAEF,OAAO,CAAC,YAAY;IASd,KAAK;CAMZ"}
1
+ {"version":3,"file":"_input.d.ts","sourceRoot":"","sources":["../../../src/voice/room_io/_input.ts"],"names":[],"mappings":"AAIA,OAAO,EAEL,KAAK,wBAAwB,EAC7B,iBAAiB,EAGjB,KAAK,IAAI,EAGV,MAAM,mBAAmB,CAAC;AAI3B,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAEtC,qBAAa,2BAA4B,SAAQ,UAAU;IACzD,OAAO,CAAC,IAAI,CAAO;IACnB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,iBAAiB,CAAC,CAA2B;IACrD,OAAO,CAAC,WAAW,CAAuC;IAC1D,OAAO,CAAC,mBAAmB,CAAuB;IAClD,OAAO,CAAC,MAAM,CAAS;gBACX,EACV,IAAI,EACJ,UAAU,EACV,WAAW,EACX,iBAAiB,GAClB,EAAE;QACD,IAAI,EAAE,IAAI,CAAC;QACX,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,MAAM,CAAC;QACpB,iBAAiB,CAAC,EAAE,wBAAwB,CAAC;KAC9C;IAWD,cAAc,CAAC,WAAW,EAAE,iBAAiB,GAAG,MAAM,GAAG,IAAI;IA2C7D,OAAO,CAAC,kBAAkB,CAqBxB;IAEF,OAAO,CAAC,WAAW;IAOnB,OAAO,CAAC,iBAAiB,CAsBvB;IAEF,OAAO,CAAC,YAAY;IASd,KAAK;CAMZ"}
@@ -1,5 +1,4 @@
1
1
  import {
2
- AudioFrame,
3
2
  AudioStream,
4
3
  RemoteParticipant,
5
4
  RoomEvent,
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../src/voice/room_io/_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AudioFrame,\n AudioStream,\n type NoiseCancellationOptions,\n RemoteParticipant,\n type RemoteTrack,\n type RemoteTrackPublication,\n type Room,\n RoomEvent,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { resampleStream } from '../../utils.js';\nimport { AudioInput } from '../io.js';\n\nexport class ParticipantAudioInputStream extends AudioInput {\n private room: Room;\n private sampleRate: number;\n private numChannels: number;\n private noiseCancellation?: NoiseCancellationOptions;\n private publication: RemoteTrackPublication | null = null;\n private participantIdentity: string | null = null;\n private logger = log();\n constructor({\n room,\n sampleRate,\n numChannels,\n noiseCancellation,\n }: {\n room: Room;\n sampleRate: number;\n numChannels: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.room = room;\n this.sampleRate = sampleRate;\n this.numChannels = numChannels;\n this.noiseCancellation = noiseCancellation;\n\n this.room.on(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.on(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n }\n\n setParticipant(participant: RemoteParticipant | string | null) {\n this.logger.debug({ participant }, 'setting participant audio input');\n const participantIdentity =\n participant instanceof RemoteParticipant ? participant.identity : participant;\n\n if (this.participantIdentity === participantIdentity) {\n return;\n }\n this.participantIdentity = participantIdentity;\n this.closeStream();\n\n if (!participantIdentity) {\n return;\n }\n\n const participantValue =\n participant instanceof RemoteParticipant\n ? participant\n : this.room.remoteParticipants.get(participantIdentity);\n\n // Convert Map iterator to array for Pino serialization\n const trackPublicationsArray = Array.from(participantValue?.trackPublications.values() ?? []);\n\n this.logger.info(\n {\n participantValue: participantValue?.identity,\n trackPublications: trackPublicationsArray,\n lengthOfTrackPublications: trackPublicationsArray.length,\n },\n 'participantValue.trackPublications',\n );\n // We need to check if the participant has a microphone track and subscribe to it\n // in case we miss the tracksubscribed event\n if (participantValue) {\n for (const publication of participantValue.trackPublications.values()) {\n if (publication.track && publication.source === TrackSource.SOURCE_MICROPHONE) {\n this.onTrackSubscribed(publication.track, publication, participantValue);\n break;\n }\n }\n }\n }\n\n private onTrackUnpublished = (\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) => {\n if (\n this.publication?.sid !== publication.sid ||\n participant.identity !== this.participantIdentity\n ) {\n return;\n }\n this.closeStream();\n\n // subscribe to the first available track\n for (const publication of participant.trackPublications.values()) {\n if (\n publication.track &&\n this.onTrackSubscribed(publication.track, publication, participant)\n ) {\n return;\n }\n }\n };\n\n private closeStream() {\n if (this.deferredStream.isSourceSet) {\n this.deferredStream.detachSource();\n }\n this.publication = null;\n }\n\n private onTrackSubscribed = (\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ): boolean => {\n this.logger.debug({ participant: participant.identity }, 'onTrackSubscribed in _input');\n if (\n this.participantIdentity !== participant.identity ||\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n (this.publication && this.publication.sid === publication.sid)\n ) {\n return false;\n }\n this.closeStream();\n this.publication = publication;\n this.deferredStream.setSource(\n resampleStream({\n stream: this.createStream(track),\n outputRate: this.sampleRate,\n }),\n );\n return true;\n };\n\n private createStream(track: RemoteTrack): ReadableStream<AudioFrame> {\n return new AudioStream(track, {\n sampleRate: this.sampleRate,\n numChannels: this.numChannels,\n noiseCancellation: this.noiseCancellation,\n // TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting\n }) as unknown as ReadableStream<AudioFrame>;\n }\n\n async close() {\n this.room.off(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.off(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n this.closeStream();\n this.deferredStream.stream.cancel();\n }\n}\n"],"mappings":"AAGA;AAAA,EACE;AAAA,EACA;AAAA,EAEA;AAAA,EAIA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,WAAW;AACpB,SAAS,sBAAsB;AAC/B,SAAS,kBAAkB;AAEpB,MAAM,oCAAoC,WAAW;AAAA,EAClD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAA6C;AAAA,EAC7C,sBAAqC;AAAA,EACrC,SAAS,IAAI;AAAA,EACrB,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKG;AACD,UAAM;AACN,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,oBAAoB;AAEzB,SAAK,KAAK,GAAG,UAAU,iBAAiB,KAAK,iBAAiB;AAC9D,SAAK,KAAK,GAAG,UAAU,kBAAkB,KAAK,kBAAkB;AAAA,EAClE;AAAA,EAEA,eAAe,aAAgD;AAC7D,SAAK,OAAO,MAAM,EAAE,YAAY,GAAG,iCAAiC;AACpE,UAAM,sBACJ,uBAAuB,oBAAoB,YAAY,WAAW;AAEpE,QAAI,KAAK,wBAAwB,qBAAqB;AACpD;AAAA,IACF;AACA,SAAK,sBAAsB;AAC3B,SAAK,YAAY;AAEjB,QAAI,CAAC,qBAAqB;AACxB;AAAA,IACF;AAEA,UAAM,mBACJ,uBAAuB,oBACnB,cACA,KAAK,KAAK,mBAAmB,IAAI,mBAAmB;AAG1D,UAAM,yBAAyB,MAAM,MAAK,qDAAkB,kBAAkB,aAAY,CAAC,CAAC;AAE5F,SAAK,OAAO;AAAA,MACV;AAAA,QACE,kBAAkB,qDAAkB;AAAA,QACpC,mBAAmB;AAAA,QACnB,2BAA2B,uBAAuB;AAAA,MACpD;AAAA,MACA;AAAA,IACF;AAGA,QAAI,kBAAkB;AACpB,iBAAW,eAAe,iBAAiB,kBAAkB,OAAO,GAAG;AACrE,YAAI,YAAY,SAAS,YAAY,WAAW,YAAY,mBAAmB;AAC7E,eAAK,kBAAkB,YAAY,OAAO,aAAa,gBAAgB;AACvE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAAqB,CAC3B,aACA,gBACG;AA9FP;AA+FI,UACE,UAAK,gBAAL,mBAAkB,SAAQ,YAAY,OACtC,YAAY,aAAa,KAAK,qBAC9B;AACA;AAAA,IACF;AACA,SAAK,YAAY;AAGjB,eAAWA,gBAAe,YAAY,kBAAkB,OAAO,GAAG;AAChE,UACEA,aAAY,SACZ,KAAK,kBAAkBA,aAAY,OAAOA,cAAa,WAAW,GAClE;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,QAAI,KAAK,eAAe,aAAa;AACnC,WAAK,eAAe,aAAa;AAAA,IACnC;AACA,SAAK,cAAc;AAAA,EACrB;AAAA,EAEQ,oBAAoB,CAC1B,OACA,aACA,gBACY;AACZ,SAAK,OAAO,MAAM,EAAE,aAAa,YAAY,SAAS,GAAG,6BAA6B;AACtF,QACE,KAAK,wBAAwB,YAAY,YACzC,YAAY,WAAW,YAAY,qBAClC,KAAK,eAAe,KAAK,YAAY,QAAQ,YAAY,KAC1D;AACA,aAAO;AAAA,IACT;AACA,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,MAClB,eAAe;AAAA,QACb,QAAQ,KAAK,aAAa,KAAK;AAAA,QAC/B,YAAY,KAAK;AAAA,MACnB,CAAC;AAAA,IACH;AACA,WAAO;AAAA,EACT;AAAA,EAEQ,aAAa,OAAgD;AACnE,WAAO,IAAI,YAAY,OAAO;AAAA,MAC5B,YAAY,KAAK;AAAA,MACjB,aAAa,KAAK;AAAA,MAClB,mBAAmB,KAAK;AAAA;AAAA,IAE1B,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,KAAK,IAAI,UAAU,iBAAiB,KAAK,iBAAiB;AAC/D,SAAK,KAAK,IAAI,UAAU,kBAAkB,KAAK,kBAAkB;AACjE,SAAK,YAAY;AACjB,SAAK,eAAe,OAAO,OAAO;AAAA,EACpC;AACF;","names":["publication"]}
1
+ {"version":3,"sources":["../../../src/voice/room_io/_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport {\n AudioStream,\n type NoiseCancellationOptions,\n RemoteParticipant,\n type RemoteTrack,\n type RemoteTrackPublication,\n type Room,\n RoomEvent,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { resampleStream } from '../../utils.js';\nimport { AudioInput } from '../io.js';\n\nexport class ParticipantAudioInputStream extends AudioInput {\n private room: Room;\n private sampleRate: number;\n private numChannels: number;\n private noiseCancellation?: NoiseCancellationOptions;\n private publication: RemoteTrackPublication | null = null;\n private participantIdentity: string | null = null;\n private logger = log();\n constructor({\n room,\n sampleRate,\n numChannels,\n noiseCancellation,\n }: {\n room: Room;\n sampleRate: number;\n numChannels: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.room = room;\n this.sampleRate = sampleRate;\n this.numChannels = numChannels;\n this.noiseCancellation = noiseCancellation;\n\n this.room.on(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.on(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n }\n\n setParticipant(participant: RemoteParticipant | string | null) {\n this.logger.debug({ participant }, 'setting participant audio input');\n const participantIdentity =\n participant instanceof RemoteParticipant ? participant.identity : participant;\n\n if (this.participantIdentity === participantIdentity) {\n return;\n }\n this.participantIdentity = participantIdentity;\n this.closeStream();\n\n if (!participantIdentity) {\n return;\n }\n\n const participantValue =\n participant instanceof RemoteParticipant\n ? participant\n : this.room.remoteParticipants.get(participantIdentity);\n\n // Convert Map iterator to array for Pino serialization\n const trackPublicationsArray = Array.from(participantValue?.trackPublications.values() ?? []);\n\n this.logger.info(\n {\n participantValue: participantValue?.identity,\n trackPublications: trackPublicationsArray,\n lengthOfTrackPublications: trackPublicationsArray.length,\n },\n 'participantValue.trackPublications',\n );\n // We need to check if the participant has a microphone track and subscribe to it\n // in case we miss the tracksubscribed event\n if (participantValue) {\n for (const publication of participantValue.trackPublications.values()) {\n if (publication.track && publication.source === TrackSource.SOURCE_MICROPHONE) {\n this.onTrackSubscribed(publication.track, publication, participantValue);\n break;\n }\n }\n }\n }\n\n private onTrackUnpublished = (\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) => {\n if (\n this.publication?.sid !== publication.sid ||\n participant.identity !== this.participantIdentity\n ) {\n return;\n }\n this.closeStream();\n\n // subscribe to the first available track\n for (const publication of participant.trackPublications.values()) {\n if (\n publication.track &&\n this.onTrackSubscribed(publication.track, publication, participant)\n ) {\n return;\n }\n }\n };\n\n private closeStream() {\n if (this.deferredStream.isSourceSet) {\n this.deferredStream.detachSource();\n }\n this.publication = null;\n }\n\n private onTrackSubscribed = (\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ): boolean => {\n this.logger.debug({ participant: participant.identity }, 'onTrackSubscribed in _input');\n if (\n this.participantIdentity !== participant.identity ||\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n (this.publication && this.publication.sid === publication.sid)\n ) {\n return false;\n }\n this.closeStream();\n this.publication = publication;\n this.deferredStream.setSource(\n resampleStream({\n stream: this.createStream(track),\n outputRate: this.sampleRate,\n }),\n );\n return true;\n };\n\n private createStream(track: RemoteTrack): ReadableStream<AudioFrame> {\n return new AudioStream(track, {\n sampleRate: this.sampleRate,\n numChannels: this.numChannels,\n noiseCancellation: this.noiseCancellation,\n // TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting\n }) as unknown as ReadableStream<AudioFrame>;\n }\n\n async close() {\n this.room.off(RoomEvent.TrackSubscribed, this.onTrackSubscribed);\n this.room.off(RoomEvent.TrackUnpublished, this.onTrackUnpublished);\n this.closeStream();\n this.deferredStream.stream.cancel();\n }\n}\n"],"mappings":"AAIA;AAAA,EACE;AAAA,EAEA;AAAA,EAIA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,WAAW;AACpB,SAAS,sBAAsB;AAC/B,SAAS,kBAAkB;AAEpB,MAAM,oCAAoC,WAAW;AAAA,EAClD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAA6C;AAAA,EAC7C,sBAAqC;AAAA,EACrC,SAAS,IAAI;AAAA,EACrB,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKG;AACD,UAAM;AACN,SAAK,OAAO;AACZ,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,oBAAoB;AAEzB,SAAK,KAAK,GAAG,UAAU,iBAAiB,KAAK,iBAAiB;AAC9D,SAAK,KAAK,GAAG,UAAU,kBAAkB,KAAK,kBAAkB;AAAA,EAClE;AAAA,EAEA,eAAe,aAAgD;AAC7D,SAAK,OAAO,MAAM,EAAE,YAAY,GAAG,iCAAiC;AACpE,UAAM,sBACJ,uBAAuB,oBAAoB,YAAY,WAAW;AAEpE,QAAI,KAAK,wBAAwB,qBAAqB;AACpD;AAAA,IACF;AACA,SAAK,sBAAsB;AAC3B,SAAK,YAAY;AAEjB,QAAI,CAAC,qBAAqB;AACxB;AAAA,IACF;AAEA,UAAM,mBACJ,uBAAuB,oBACnB,cACA,KAAK,KAAK,mBAAmB,IAAI,mBAAmB;AAG1D,UAAM,yBAAyB,MAAM,MAAK,qDAAkB,kBAAkB,aAAY,CAAC,CAAC;AAE5F,SAAK,OAAO;AAAA,MACV;AAAA,QACE,kBAAkB,qDAAkB;AAAA,QACpC,mBAAmB;AAAA,QACnB,2BAA2B,uBAAuB;AAAA,MACpD;AAAA,MACA;AAAA,IACF;AAGA,QAAI,kBAAkB;AACpB,iBAAW,eAAe,iBAAiB,kBAAkB,OAAO,GAAG;AACrE,YAAI,YAAY,SAAS,YAAY,WAAW,YAAY,mBAAmB;AAC7E,eAAK,kBAAkB,YAAY,OAAO,aAAa,gBAAgB;AACvE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAAqB,CAC3B,aACA,gBACG;AA9FP;AA+FI,UACE,UAAK,gBAAL,mBAAkB,SAAQ,YAAY,OACtC,YAAY,aAAa,KAAK,qBAC9B;AACA;AAAA,IACF;AACA,SAAK,YAAY;AAGjB,eAAWA,gBAAe,YAAY,kBAAkB,OAAO,GAAG;AAChE,UACEA,aAAY,SACZ,KAAK,kBAAkBA,aAAY,OAAOA,cAAa,WAAW,GAClE;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,QAAI,KAAK,eAAe,aAAa;AACnC,WAAK,eAAe,aAAa;AAAA,IACnC;AACA,SAAK,cAAc;AAAA,EACrB;AAAA,EAEQ,oBAAoB,CAC1B,OACA,aACA,gBACY;AACZ,SAAK,OAAO,MAAM,EAAE,aAAa,YAAY,SAAS,GAAG,6BAA6B;AACtF,QACE,KAAK,wBAAwB,YAAY,YACzC,YAAY,WAAW,YAAY,qBAClC,KAAK,eAAe,KAAK,YAAY,QAAQ,YAAY,KAC1D;AACA,aAAO;AAAA,IACT;AACA,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,MAClB,eAAe;AAAA,QACb,QAAQ,KAAK,aAAa,KAAK;AAAA,QAC/B,YAAY,KAAK;AAAA,MACnB,CAAC;AAAA,IACH;AACA,WAAO;AAAA,EACT;AAAA,EAEQ,aAAa,OAAgD;AACnE,WAAO,IAAI,YAAY,OAAO;AAAA,MAC5B,YAAY,KAAK;AAAA,MACjB,aAAa,KAAK;AAAA,MAClB,mBAAmB,KAAK;AAAA;AAAA,IAE1B,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,KAAK,IAAI,UAAU,iBAAiB,KAAK,iBAAiB;AAC/D,SAAK,KAAK,IAAI,UAAU,kBAAkB,KAAK,kBAAkB;AACjE,SAAK,YAAY;AACjB,SAAK,eAAe,OAAO,OAAO;AAAA,EACpC;AACF;","names":["publication"]}
package/dist/worker.cjs CHANGED
@@ -28,7 +28,9 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
28
28
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
29
  var worker_exports = {};
30
30
  __export(worker_exports, {
31
+ AgentServer: () => AgentServer,
31
32
  MissingCredentialsError: () => MissingCredentialsError,
33
+ ServerOptions: () => ServerOptions,
32
34
  Worker: () => Worker,
33
35
  WorkerError: () => WorkerError,
34
36
  WorkerOptions: () => WorkerOptions,
@@ -127,7 +129,7 @@ class WorkerPermissions {
127
129
  this.hidden = hidden;
128
130
  }
129
131
  }
130
- class WorkerOptions {
132
+ class ServerOptions {
131
133
  agent;
132
134
  requestFunc;
133
135
  loadFunc;
@@ -137,7 +139,7 @@ class WorkerOptions {
137
139
  initializeProcessTimeout;
138
140
  permissions;
139
141
  agentName;
140
- workerType;
142
+ serverType;
141
143
  maxRetry;
142
144
  wsURL;
143
145
  apiKey;
@@ -160,7 +162,7 @@ class WorkerOptions {
160
162
  initializeProcessTimeout = 10 * 1e3,
161
163
  permissions = new WorkerPermissions(),
162
164
  agentName = "",
163
- workerType = import_protocol.JobType.JT_ROOM,
165
+ serverType = import_protocol.JobType.JT_ROOM,
164
166
  maxRetry = MAX_RECONNECT_ATTEMPTS,
165
167
  wsURL = "ws://localhost:7880",
166
168
  apiKey = void 0,
@@ -185,7 +187,7 @@ class WorkerOptions {
185
187
  this.initializeProcessTimeout = initializeProcessTimeout;
186
188
  this.permissions = permissions;
187
189
  this.agentName = agentName;
188
- this.workerType = workerType;
190
+ this.serverType = serverType;
189
191
  this.maxRetry = maxRetry;
190
192
  this.wsURL = wsURL;
191
193
  this.apiKey = apiKey;
@@ -207,7 +209,7 @@ class PendingAssignment {
207
209
  arg;
208
210
  }
209
211
  }
210
- class Worker {
212
+ class AgentServer {
211
213
  #opts;
212
214
  #procPool;
213
215
  #id = "unregistered";
@@ -222,22 +224,22 @@ class Worker {
222
224
  #httpServer;
223
225
  #logger = (0, import_log.log)().child({ version: import_version.version });
224
226
  #inferenceExecutor;
225
- /** @throws {@link MissingCredentialsError} if URL, API key or API secret are missing */
227
+ /* @throws {@link MissingCredentialsError} if URL, API key or API secret are missing */
226
228
  constructor(opts) {
227
229
  opts.wsURL = opts.wsURL || process.env.LIVEKIT_URL || "";
228
230
  opts.apiKey = opts.apiKey || process.env.LIVEKIT_API_KEY || "";
229
231
  opts.apiSecret = opts.apiSecret || process.env.LIVEKIT_API_SECRET || "";
230
232
  if (opts.wsURL === "")
231
233
  throw new MissingCredentialsError(
232
- "URL is required: Set LIVEKIT_URL, run with --url, or pass wsURL in WorkerOptions"
234
+ "URL is required: Set LIVEKIT_URL, run with --url, or pass wsURL in ServerOptions"
233
235
  );
234
236
  if (opts.apiKey === "")
235
237
  throw new MissingCredentialsError(
236
- "API Key is required: Set LIVEKIT_API_KEY, run with --api-key, or pass apiKey in WorkerOptions"
238
+ "API Key is required: Set LIVEKIT_API_KEY, run with --api-key, or pass apiKey in ServerOptions"
237
239
  );
238
240
  if (opts.apiSecret === "")
239
241
  throw new MissingCredentialsError(
240
- "API Secret is required: Set LIVEKIT_API_SECRET, run with --api-secret, or pass apiSecret in WorkerOptions"
242
+ "API Secret is required: Set LIVEKIT_API_SECRET, run with --api-secret, or pass apiSecret in ServerOptions"
241
243
  );
242
244
  if (opts.workerToken) {
243
245
  if (opts.loadFunc !== defaultCpuLoad) {
@@ -278,7 +280,7 @@ class Worker {
278
280
  this.#opts = opts;
279
281
  this.#httpServer = new import_http_server.HTTPServer(opts.host, opts.port, () => ({
280
282
  agent_name: opts.agentName,
281
- worker_type: import_protocol.JobType[opts.workerType],
283
+ worker_type: import_protocol.JobType[opts.serverType],
282
284
  active_jobs: this.activeJobs.length,
283
285
  sdk_version: import_version.version,
284
286
  project_type: PROJECT_TYPE
@@ -511,7 +513,7 @@ class Worker {
511
513
  message: {
512
514
  case: "register",
513
515
  value: {
514
- type: this.#opts.workerType,
516
+ type: this.#opts.serverType,
515
517
  agentName: this.#opts.agentName,
516
518
  allowedPermissions: new import_protocol.ParticipantPermission({
517
519
  canPublish: this.#opts.permissions.canPublish,
@@ -663,9 +665,13 @@ class Worker {
663
665
  await this.#close.await;
664
666
  }
665
667
  }
668
+ const Worker = AgentServer;
669
+ const WorkerOptions = ServerOptions;
666
670
  // Annotate the CommonJS export names for ESM import in node:
667
671
  0 && (module.exports = {
672
+ AgentServer,
668
673
  MissingCredentialsError,
674
+ ServerOptions,
669
675
  Worker,
670
676
  WorkerError,
671
677
  WorkerOptions,