@livekit/agents 1.0.36-dev.0 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/dist/index.cjs +1 -3
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +0 -1
  4. package/dist/index.d.ts +0 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +0 -1
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/utils.cjs +2 -15
  9. package/dist/inference/utils.cjs.map +1 -1
  10. package/dist/inference/utils.d.cts +0 -1
  11. package/dist/inference/utils.d.ts +0 -1
  12. package/dist/inference/utils.d.ts.map +1 -1
  13. package/dist/inference/utils.js +1 -13
  14. package/dist/inference/utils.js.map +1 -1
  15. package/dist/stream/stream_channel.cjs +0 -3
  16. package/dist/stream/stream_channel.cjs.map +1 -1
  17. package/dist/stream/stream_channel.d.cts +2 -3
  18. package/dist/stream/stream_channel.d.ts +2 -3
  19. package/dist/stream/stream_channel.d.ts.map +1 -1
  20. package/dist/stream/stream_channel.js +0 -3
  21. package/dist/stream/stream_channel.js.map +1 -1
  22. package/dist/telemetry/trace_types.cjs +0 -15
  23. package/dist/telemetry/trace_types.cjs.map +1 -1
  24. package/dist/telemetry/trace_types.d.cts +0 -5
  25. package/dist/telemetry/trace_types.d.ts +0 -5
  26. package/dist/telemetry/trace_types.d.ts.map +1 -1
  27. package/dist/telemetry/trace_types.js +0 -10
  28. package/dist/telemetry/trace_types.js.map +1 -1
  29. package/dist/voice/agent_activity.cjs +19 -68
  30. package/dist/voice/agent_activity.cjs.map +1 -1
  31. package/dist/voice/agent_activity.d.cts +0 -14
  32. package/dist/voice/agent_activity.d.ts +0 -14
  33. package/dist/voice/agent_activity.d.ts.map +1 -1
  34. package/dist/voice/agent_activity.js +19 -68
  35. package/dist/voice/agent_activity.js.map +1 -1
  36. package/dist/voice/agent_session.cjs +65 -37
  37. package/dist/voice/agent_session.cjs.map +1 -1
  38. package/dist/voice/agent_session.d.cts +25 -4
  39. package/dist/voice/agent_session.d.ts +25 -4
  40. package/dist/voice/agent_session.d.ts.map +1 -1
  41. package/dist/voice/agent_session.js +65 -37
  42. package/dist/voice/agent_session.js.map +1 -1
  43. package/dist/voice/audio_recognition.cjs +2 -124
  44. package/dist/voice/audio_recognition.cjs.map +1 -1
  45. package/dist/voice/audio_recognition.d.cts +1 -32
  46. package/dist/voice/audio_recognition.d.ts +1 -32
  47. package/dist/voice/audio_recognition.d.ts.map +1 -1
  48. package/dist/voice/audio_recognition.js +2 -127
  49. package/dist/voice/audio_recognition.js.map +1 -1
  50. package/dist/voice/index.cjs +14 -1
  51. package/dist/voice/index.cjs.map +1 -1
  52. package/dist/voice/index.d.cts +1 -0
  53. package/dist/voice/index.d.ts +1 -0
  54. package/dist/voice/index.d.ts.map +1 -1
  55. package/dist/voice/index.js +3 -1
  56. package/dist/voice/index.js.map +1 -1
  57. package/dist/voice/room_io/room_io.cjs +1 -0
  58. package/dist/voice/room_io/room_io.cjs.map +1 -1
  59. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  60. package/dist/voice/room_io/room_io.js +1 -0
  61. package/dist/voice/room_io/room_io.js.map +1 -1
  62. package/dist/voice/speech_handle.cjs +12 -3
  63. package/dist/voice/speech_handle.cjs.map +1 -1
  64. package/dist/voice/speech_handle.d.cts +12 -2
  65. package/dist/voice/speech_handle.d.ts +12 -2
  66. package/dist/voice/speech_handle.d.ts.map +1 -1
  67. package/dist/voice/speech_handle.js +10 -2
  68. package/dist/voice/speech_handle.js.map +1 -1
  69. package/dist/voice/testing/index.cjs +54 -0
  70. package/dist/voice/testing/index.cjs.map +1 -0
  71. package/dist/voice/testing/index.d.cts +20 -0
  72. package/dist/voice/testing/index.d.ts +20 -0
  73. package/dist/voice/testing/index.d.ts.map +1 -0
  74. package/dist/voice/testing/index.js +33 -0
  75. package/dist/voice/testing/index.js.map +1 -0
  76. package/dist/voice/testing/run_result.cjs +766 -0
  77. package/dist/voice/testing/run_result.cjs.map +1 -0
  78. package/dist/voice/testing/run_result.d.cts +374 -0
  79. package/dist/voice/testing/run_result.d.ts +374 -0
  80. package/dist/voice/testing/run_result.d.ts.map +1 -0
  81. package/dist/voice/testing/run_result.js +739 -0
  82. package/dist/voice/testing/run_result.js.map +1 -0
  83. package/dist/{inference/interruption/index.cjs → voice/testing/types.cjs} +24 -12
  84. package/dist/voice/testing/types.cjs.map +1 -0
  85. package/dist/voice/testing/types.d.cts +83 -0
  86. package/dist/voice/testing/types.d.ts +83 -0
  87. package/dist/voice/testing/types.d.ts.map +1 -0
  88. package/dist/voice/testing/types.js +19 -0
  89. package/dist/voice/testing/types.js.map +1 -0
  90. package/package.json +3 -4
  91. package/src/index.ts +0 -2
  92. package/src/inference/utils.ts +0 -15
  93. package/src/stream/stream_channel.ts +2 -6
  94. package/src/telemetry/trace_types.ts +0 -7
  95. package/src/voice/agent_activity.ts +24 -83
  96. package/src/voice/agent_session.ts +74 -49
  97. package/src/voice/audio_recognition.ts +1 -161
  98. package/src/voice/index.ts +1 -0
  99. package/src/voice/room_io/room_io.ts +1 -0
  100. package/src/voice/speech_handle.ts +24 -4
  101. package/src/voice/testing/index.ts +50 -0
  102. package/src/voice/testing/run_result.ts +937 -0
  103. package/src/voice/testing/types.ts +118 -0
  104. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs +0 -152
  105. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs.map +0 -1
  106. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.cts +0 -50
  107. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts +0 -50
  108. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts.map +0 -1
  109. package/dist/inference/interruption/AdaptiveInterruptionDetector.js +0 -125
  110. package/dist/inference/interruption/AdaptiveInterruptionDetector.js.map +0 -1
  111. package/dist/inference/interruption/InterruptionStream.cjs +0 -310
  112. package/dist/inference/interruption/InterruptionStream.cjs.map +0 -1
  113. package/dist/inference/interruption/InterruptionStream.d.cts +0 -57
  114. package/dist/inference/interruption/InterruptionStream.d.ts +0 -57
  115. package/dist/inference/interruption/InterruptionStream.d.ts.map +0 -1
  116. package/dist/inference/interruption/InterruptionStream.js +0 -288
  117. package/dist/inference/interruption/InterruptionStream.js.map +0 -1
  118. package/dist/inference/interruption/defaults.cjs +0 -76
  119. package/dist/inference/interruption/defaults.cjs.map +0 -1
  120. package/dist/inference/interruption/defaults.d.cts +0 -14
  121. package/dist/inference/interruption/defaults.d.ts +0 -14
  122. package/dist/inference/interruption/defaults.d.ts.map +0 -1
  123. package/dist/inference/interruption/defaults.js +0 -42
  124. package/dist/inference/interruption/defaults.js.map +0 -1
  125. package/dist/inference/interruption/errors.cjs +0 -2
  126. package/dist/inference/interruption/errors.cjs.map +0 -1
  127. package/dist/inference/interruption/errors.d.cts +0 -2
  128. package/dist/inference/interruption/errors.d.ts +0 -2
  129. package/dist/inference/interruption/errors.d.ts.map +0 -1
  130. package/dist/inference/interruption/errors.js +0 -1
  131. package/dist/inference/interruption/errors.js.map +0 -1
  132. package/dist/inference/interruption/http_transport.cjs +0 -57
  133. package/dist/inference/interruption/http_transport.cjs.map +0 -1
  134. package/dist/inference/interruption/http_transport.d.cts +0 -23
  135. package/dist/inference/interruption/http_transport.d.ts +0 -23
  136. package/dist/inference/interruption/http_transport.d.ts.map +0 -1
  137. package/dist/inference/interruption/http_transport.js +0 -33
  138. package/dist/inference/interruption/http_transport.js.map +0 -1
  139. package/dist/inference/interruption/index.cjs.map +0 -1
  140. package/dist/inference/interruption/index.d.cts +0 -5
  141. package/dist/inference/interruption/index.d.ts +0 -5
  142. package/dist/inference/interruption/index.d.ts.map +0 -1
  143. package/dist/inference/interruption/index.js +0 -7
  144. package/dist/inference/interruption/index.js.map +0 -1
  145. package/dist/inference/interruption/interruption.cjs +0 -85
  146. package/dist/inference/interruption/interruption.cjs.map +0 -1
  147. package/dist/inference/interruption/interruption.d.cts +0 -48
  148. package/dist/inference/interruption/interruption.d.ts +0 -48
  149. package/dist/inference/interruption/interruption.d.ts.map +0 -1
  150. package/dist/inference/interruption/interruption.js +0 -59
  151. package/dist/inference/interruption/interruption.js.map +0 -1
  152. package/dist/inference/utils.test.cjs +0 -20
  153. package/dist/inference/utils.test.cjs.map +0 -1
  154. package/dist/inference/utils.test.js +0 -19
  155. package/dist/inference/utils.test.js.map +0 -1
  156. package/dist/utils/ws_transport.cjs +0 -51
  157. package/dist/utils/ws_transport.cjs.map +0 -1
  158. package/dist/utils/ws_transport.d.cts +0 -9
  159. package/dist/utils/ws_transport.d.ts +0 -9
  160. package/dist/utils/ws_transport.d.ts.map +0 -1
  161. package/dist/utils/ws_transport.js +0 -17
  162. package/dist/utils/ws_transport.js.map +0 -1
  163. package/dist/utils/ws_transport.test.cjs +0 -212
  164. package/dist/utils/ws_transport.test.cjs.map +0 -1
  165. package/dist/utils/ws_transport.test.js +0 -211
  166. package/dist/utils/ws_transport.test.js.map +0 -1
  167. package/src/inference/interruption/AdaptiveInterruptionDetector.ts +0 -166
  168. package/src/inference/interruption/InterruptionStream.ts +0 -397
  169. package/src/inference/interruption/defaults.ts +0 -33
  170. package/src/inference/interruption/errors.ts +0 -0
  171. package/src/inference/interruption/http_transport.ts +0 -61
  172. package/src/inference/interruption/index.ts +0 -4
  173. package/src/inference/interruption/interruption.ts +0 -88
  174. package/src/inference/utils.test.ts +0 -31
  175. package/src/utils/ws_transport.test.ts +0 -282
  176. package/src/utils/ws_transport.ts +0 -22
@@ -1,397 +0,0 @@
1
- import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
2
- import type { Span } from '@opentelemetry/api';
3
- import { traceTypes } from '../../telemetry/index.js';
4
- import { type ReadableStream, TransformStream, WritableStream } from 'stream/web';
5
- import { log } from '../../log.js';
6
- import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
7
- import { createAccessToken } from '../utils.js';
8
- import type {
9
- AdaptiveInterruptionDetector,
10
- InterruptionOptions,
11
- } from './AdaptiveInterruptionDetector.js';
12
- import { apiConnectDefaults } from './defaults.js';
13
- import { predictHTTP } from './http_transport.js';
14
- import {
15
- InterruptionCacheEntry,
16
- type InterruptionDetectionError,
17
- type InterruptionEvent,
18
- InterruptionEventType,
19
- } from './interruption.js';
20
-
21
- export interface AgentSpeechStarted {
22
- type: 'agent-speech-started';
23
- }
24
-
25
- export interface AgentSpeechEnded {
26
- type: 'agent-speech-ended';
27
- }
28
-
29
- export interface OverlapSpeechStarted {
30
- type: 'overlap-speech-started';
31
- speechDuration: number;
32
- userSpeakingSpan: Span;
33
- }
34
-
35
- export interface OverlapSpeechEnded {
36
- type: 'overlap-speech-ended';
37
- }
38
-
39
- export interface Flush {
40
- type: 'flush';
41
- }
42
-
43
- export type InterruptionSentinel =
44
- | AgentSpeechStarted
45
- | AgentSpeechEnded
46
- | OverlapSpeechStarted
47
- | OverlapSpeechEnded
48
- | Flush;
49
-
50
- export class InterruptionStreamSentinel {
51
- static speechStarted(): AgentSpeechStarted {
52
- return { type: 'agent-speech-started' };
53
- }
54
-
55
- static speechEnded(): AgentSpeechEnded {
56
- return { type: 'agent-speech-ended' };
57
- }
58
-
59
- static overlapSpeechStarted(
60
- speechDuration: number,
61
- userSpeakingSpan: Span,
62
- ): OverlapSpeechStarted {
63
- return { type: 'overlap-speech-started', speechDuration, userSpeakingSpan };
64
- }
65
-
66
- static overlapSpeechEnded(): OverlapSpeechEnded {
67
- return { type: 'overlap-speech-ended' };
68
- }
69
-
70
- static flush(): Flush {
71
- return { type: 'flush' };
72
- }
73
- }
74
-
75
- export interface ApiConnectOptions {
76
- maxRetries: number;
77
- retryInterval: number;
78
- timeout: number;
79
- }
80
-
81
- function updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {
82
- span.setAttribute(
83
- traceTypes.ATTR_IS_INTERRUPTION,
84
- (entry.isInterruption ?? false).toString().toLowerCase(),
85
- );
86
- span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);
87
- span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDuration);
88
- span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDuration);
89
- span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelay);
90
- }
91
-
92
- export class InterruptionStreamBase {
93
- private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;
94
-
95
- private eventStream: StreamChannel<InterruptionEvent, InterruptionDetectionError>;
96
-
97
- private resampler?: AudioResampler;
98
-
99
- private userSpeakingSpan: Span | undefined;
100
-
101
- private overlapSpeechStartedAt: number | undefined;
102
-
103
- private options: InterruptionOptions;
104
-
105
- private apiOptions: ApiConnectOptions;
106
-
107
- private model: AdaptiveInterruptionDetector;
108
-
109
- constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {
110
- this.inputStream = createStreamChannel<
111
- InterruptionSentinel | AudioFrame,
112
- InterruptionDetectionError
113
- >();
114
-
115
- this.eventStream = createStreamChannel<InterruptionEvent, InterruptionDetectionError>();
116
-
117
- this.model = model;
118
- this.options = model.options;
119
- this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
120
-
121
- this.setupTransform();
122
- }
123
-
124
- private setupTransform() {
125
- let agentSpeechStarted = false;
126
- let startIdx = 0;
127
- let accumulatedSamples = 0;
128
- let overlapSpeechStarted = false;
129
- const cache = new Map<number, InterruptionCacheEntry>(); // TODO limit cache size
130
- const inferenceS16Data = new Int16Array(
131
- Math.ceil(this.options.maxAudioDuration * this.options.sampleRate),
132
- ).fill(0);
133
-
134
- const transformer = new TransformStream<InterruptionSentinel | AudioFrame, Int16Array>(
135
- {
136
- transform: (chunk, controller) => {
137
- if (chunk instanceof AudioFrame) {
138
- if (!agentSpeechStarted) {
139
- return;
140
- }
141
- if (this.options.sampleRate !== chunk.sampleRate) {
142
- controller.error('the sample rate of the input frames must be consistent');
143
- return;
144
- }
145
- const result = writeToInferenceS16Data(
146
- chunk,
147
- startIdx,
148
- inferenceS16Data,
149
- this.options.maxAudioDuration,
150
- );
151
- startIdx = result.startIdx;
152
- accumulatedSamples += result.samplesWritten;
153
-
154
- // Send data for inference when enough samples accumulated during overlap
155
- if (
156
- accumulatedSamples >=
157
- Math.floor(this.options.detectionInterval * this.options.sampleRate) &&
158
- overlapSpeechStarted
159
- ) {
160
- // Send a copy of the audio data up to startIdx for inference
161
- const audioSlice = inferenceS16Data.slice(0, startIdx);
162
- // TODO: send to data channel - dataChan.send(audioSlice);
163
- accumulatedSamples = 0;
164
- controller.enqueue(audioSlice);
165
- }
166
- } else if (chunk.type === 'agent-speech-started') {
167
- log().debug('agent speech started');
168
-
169
- agentSpeechStarted = true;
170
- overlapSpeechStarted = false;
171
- accumulatedSamples = 0;
172
- startIdx = 0;
173
- cache.clear();
174
- } else if (chunk.type === 'agent-speech-ended') {
175
- log().debug('agent speech ended');
176
-
177
- agentSpeechStarted = false;
178
- overlapSpeechStarted = false;
179
- accumulatedSamples = 0;
180
- startIdx = 0;
181
- cache.clear();
182
- } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {
183
- this.userSpeakingSpan = chunk.userSpeakingSpan;
184
- log().debug('overlap speech started, starting interruption inference');
185
- overlapSpeechStarted = true;
186
- accumulatedSamples = 0;
187
- // Include both speech duration and audio prefix duration for context
188
- const shiftSize = Math.min(
189
- startIdx,
190
- Math.round(chunk.speechDuration * this.options.sampleRate) +
191
- Math.round(this.options.audioPrefixDuration * this.options.sampleRate),
192
- );
193
- // Shift the buffer: copy the last `shiftSize` samples before startIdx
194
- // to the beginning of the buffer. This preserves recent audio context
195
- // (the user's speech that occurred just before overlap was detected).
196
- inferenceS16Data.copyWithin(0, startIdx - shiftSize, startIdx);
197
- startIdx = shiftSize;
198
- cache.clear();
199
- } else if (chunk.type === 'overlap-speech-ended') {
200
- log().debug('overlap speech ended');
201
-
202
- if (overlapSpeechStarted) {
203
- this.userSpeakingSpan = undefined;
204
- let latestEntry = Array.from(cache.values()).at(-1);
205
- if (!latestEntry) {
206
- log().debug('no request made for overlap speech');
207
- latestEntry = InterruptionCacheEntry.default();
208
- } else {
209
- cache.delete(latestEntry.createdAt);
210
- }
211
- const event: InterruptionEvent = {
212
- type: InterruptionEventType.OVERLAP_SPEECH_ENDED,
213
- timestamp: Date.now(),
214
- isInterruption: false,
215
- overlapSpeechStartedAt: this.overlapSpeechStartedAt,
216
- speechInput: latestEntry.speechInput,
217
- probabilities: latestEntry.probabilities,
218
- totalDuration: latestEntry.totalDuration,
219
- detectionDelay: latestEntry.detectionDelay,
220
- predictionDuration: latestEntry.predictionDuration,
221
- probability: latestEntry.probability,
222
- };
223
- this.eventStream.write(event);
224
- }
225
- } else if (chunk.type === 'flush') {
226
- log().debug('flushing');
227
- // do nothing
228
- }
229
- },
230
- },
231
- { highWaterMark: Number.MAX_SAFE_INTEGER },
232
- { highWaterMark: Number.MAX_SAFE_INTEGER },
233
- );
234
-
235
- const httpPostWriter = new WritableStream<Int16Array>(
236
- {
237
- // Implement the sink
238
- write: async (chunk) => {
239
- if (!this.overlapSpeechStartedAt) {
240
- return;
241
- }
242
- const resp = await predictHTTP(
243
- chunk,
244
- { threshold: this.options.threshold, minFrames: this.options.minFrames },
245
- {
246
- baseUrl: this.options.baseUrl,
247
- timeout: this.options.inferenceTimeout,
248
- token: await createAccessToken(this.options.apiKey, this.options.apiSecret),
249
- },
250
- );
251
- console.log('received inference response', resp);
252
- const { createdAt, isBargein, probabilities, predictionDuration } = resp;
253
- const entry = new InterruptionCacheEntry({
254
- createdAt,
255
- probabilities,
256
- isInterruption: isBargein,
257
- speechInput: chunk,
258
- totalDuration: (performance.now() - createdAt) / 1e9,
259
- detectionDelay: Date.now() - this.overlapSpeechStartedAt,
260
- predictionDuration,
261
- });
262
- cache.set(createdAt, entry);
263
- if (overlapSpeechStarted && entry.isInterruption) {
264
- if (this.userSpeakingSpan) {
265
- updateUserSpeakingSpan(this.userSpeakingSpan, entry);
266
- }
267
- const event: InterruptionEvent = {
268
- type: InterruptionEventType.INTERRUPTION,
269
- timestamp: Date.now(),
270
- overlapSpeechStartedAt: this.overlapSpeechStartedAt,
271
- isInterruption: entry.isInterruption,
272
- speechInput: entry.speechInput,
273
- probabilities: entry.probabilities,
274
- totalDuration: entry.totalDuration,
275
- predictionDuration: entry.predictionDuration,
276
- detectionDelay: entry.detectionDelay,
277
- probability: entry.probability,
278
- };
279
- this.eventStream.write(event);
280
- }
281
- },
282
- close() {
283
- console.log('closing http writer');
284
- },
285
- abort(err) {
286
- console.log('Sink error:', err);
287
- },
288
- },
289
- { highWaterMark: Number.MAX_SAFE_INTEGER },
290
- );
291
-
292
- this.inputStream.stream().pipeThrough(transformer).pipeTo(httpPostWriter);
293
- }
294
-
295
- private ensureInputNotEnded() {
296
- if (this.inputStream.closed) {
297
- throw new Error('input stream is closed');
298
- }
299
- }
300
-
301
- private ensureStreamsNotEnded() {
302
- this.ensureInputNotEnded();
303
- }
304
-
305
- private getResamplerFor(inputSampleRate: number): AudioResampler {
306
- if (!this.resampler) {
307
- this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);
308
- }
309
- return this.resampler;
310
- }
311
-
312
- get stream(): ReadableStream<InterruptionEvent> {
313
- return this.eventStream.stream();
314
- }
315
-
316
- async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {
317
- this.ensureStreamsNotEnded();
318
- if (!(frame instanceof AudioFrame)) {
319
- if (frame.type === 'overlap-speech-started') {
320
- this.overlapSpeechStartedAt = Date.now() - frame.speechDuration;
321
- }
322
- return this.inputStream.write(frame);
323
- } else if (this.options.sampleRate !== frame.sampleRate) {
324
- const resampler = this.getResamplerFor(frame.sampleRate);
325
- if (resampler.inputRate !== frame.sampleRate) {
326
- throw new Error('the sample rate of the input frames must be consistent');
327
- }
328
- for (const resampledFrame of resampler.push(frame)) {
329
- await this.inputStream.write(resampledFrame);
330
- }
331
- } else {
332
- await this.inputStream.write(frame);
333
- }
334
- }
335
-
336
- async flush(): Promise<void> {
337
- this.ensureStreamsNotEnded();
338
- this.inputStream.write(InterruptionStreamSentinel.flush());
339
- }
340
-
341
- async endInput(): Promise<void> {
342
- await this.flush();
343
- await this.inputStream.close();
344
- }
345
-
346
- async close(): Promise<void> {
347
- if (!this.inputStream.closed) await this.inputStream.close();
348
- }
349
- }
350
-
351
- /**
352
- * Write the audio frame to the output data array and return the new start index
353
- * and the number of samples written.
354
- */
355
- function writeToInferenceS16Data(
356
- frame: AudioFrame,
357
- startIdx: number,
358
- outData: Int16Array,
359
- maxAudioDuration: number,
360
- ): { startIdx: number; samplesWritten: number } {
361
- const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);
362
-
363
- if (frame.samplesPerChannel > outData.length) {
364
- throw new Error('frame samples are greater than the max window size');
365
- }
366
-
367
- // Shift the data to the left if the window would overflow
368
- const shift = startIdx + frame.samplesPerChannel - maxWindowSize;
369
- if (shift > 0) {
370
- outData.copyWithin(0, shift, startIdx);
371
- startIdx -= shift;
372
- }
373
-
374
- // Get the frame data as Int16Array
375
- const frameData = new Int16Array(
376
- frame.data.buffer,
377
- frame.data.byteOffset,
378
- frame.samplesPerChannel * frame.channels,
379
- );
380
-
381
- if (frame.channels > 1) {
382
- // Mix down multiple channels to mono by averaging
383
- for (let i = 0; i < frame.samplesPerChannel; i++) {
384
- let sum = 0;
385
- for (let ch = 0; ch < frame.channels; ch++) {
386
- sum += frameData[i * frame.channels + ch] ?? 0;
387
- }
388
- outData[startIdx + i] = Math.floor(sum / frame.channels);
389
- }
390
- } else {
391
- // Single channel - copy directly
392
- outData.set(frameData, startIdx);
393
- }
394
-
395
- startIdx += frame.samplesPerChannel;
396
- return { startIdx, samplesWritten: frame.samplesPerChannel };
397
- }
@@ -1,33 +0,0 @@
1
- import type { InterruptionOptions } from './AdaptiveInterruptionDetector.js';
2
- import type { ApiConnectOptions } from './InterruptionStream.js';
3
-
4
- export const MIN_INTERRUPTION_DURATION = 0.025 * 2; // 25ms per frame, 2 consecutive frames
5
- export const THRESHOLD = 0.65;
6
- export const MAX_AUDIO_DURATION = 3.0;
7
- export const AUDIO_PREFIX_DURATION = 0.5;
8
- export const DETECTION_INTERVAL = 0.1;
9
- export const REMOTE_INFERENCE_TIMEOUT = 1.0;
10
- export const SAMPLE_RATE = 16000;
11
- export const FRAMES_PER_SECOND = 40;
12
- export const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
13
-
14
- export const apiConnectDefaults: ApiConnectOptions = {
15
- maxRetries: 3,
16
- retryInterval: 2_000,
17
- timeout: 10_000,
18
- } as const;
19
-
20
- export const interruptionOptionDefaults: InterruptionOptions = {
21
- sampleRate: SAMPLE_RATE,
22
- threshold: THRESHOLD,
23
- minFrames: Math.ceil(MIN_INTERRUPTION_DURATION * FRAMES_PER_SECOND),
24
- maxAudioDuration: MAX_AUDIO_DURATION,
25
- audioPrefixDuration: AUDIO_PREFIX_DURATION,
26
- detectionInterval: DETECTION_INTERVAL,
27
- inferenceTimeout: 10_000,
28
- baseUrl: DEFAULT_BASE_URL,
29
- apiKey: process.env.LIVEKIT_API_KEY || '',
30
- apiSecret: process.env.LIVEKIT_API_SECRET || '',
31
- useProxy: false,
32
- minInterruptionDuration: MIN_INTERRUPTION_DURATION,
33
- } as const;
File without changes
@@ -1,61 +0,0 @@
1
- import { ofetch } from 'ofetch';
2
-
3
- export interface PostOptions {
4
- baseUrl: string;
5
- token: string;
6
- signal?: AbortSignal;
7
- timeout?: number;
8
- }
9
-
10
- export interface PredictOptions {
11
- threshold: number;
12
- minFrames: number;
13
- }
14
-
15
- export interface PredictEndpointResponse {
16
- created_at: number;
17
- is_bargein: boolean;
18
- probabilities: number[];
19
- }
20
-
21
- export interface PredictResponse {
22
- createdAt: number;
23
- isBargein: boolean;
24
- probabilities: Float32Array;
25
- predictionDuration: number;
26
- }
27
-
28
- export async function predictHTTP(
29
- data: Int16Array,
30
- predictOptions: PredictOptions,
31
- options: PostOptions,
32
- ): Promise<PredictResponse> {
33
- const createdAt = performance.now();
34
- const url = new URL(`/bargein`, options.baseUrl);
35
- url.searchParams.append('threshold', predictOptions.threshold.toString());
36
- url.searchParams.append('min_frames', predictOptions.minFrames.toFixed());
37
- url.searchParams.append('created_at', createdAt.toFixed());
38
-
39
- const { created_at, is_bargein, probabilities } = await ofetch<PredictEndpointResponse>(
40
- url.toString(),
41
- {
42
- retry: 1,
43
- retryDelay: 100,
44
- headers: {
45
- 'Content-Type': 'application/octet-stream',
46
- Authorization: `Bearer ${options.token}`,
47
- },
48
- signal: options.signal,
49
- timeout: options.timeout,
50
- method: 'POST',
51
- body: data,
52
- },
53
- );
54
-
55
- return {
56
- createdAt: created_at,
57
- isBargein: is_bargein,
58
- probabilities: new Float32Array(probabilities),
59
- predictionDuration: (performance.now() - createdAt) / 1e9,
60
- };
61
- }
@@ -1,4 +0,0 @@
1
- export * from './AdaptiveInterruptionDetector.js';
2
- export * from './interruption.js';
3
- export { InterruptionStreamSentinel } from './InterruptionStream.js';
4
- export type { InterruptionSentinel } from './InterruptionStream.js';
@@ -1,88 +0,0 @@
1
- import { slidingWindowMinMax } from '../utils.js';
2
- import { MIN_INTERRUPTION_DURATION } from './defaults.js';
3
-
4
- export enum InterruptionEventType {
5
- INTERRUPTION = 'interruption',
6
- OVERLAP_SPEECH_ENDED = 'overlap_speech_ended',
7
- }
8
- export interface InterruptionEvent {
9
- type: InterruptionEventType;
10
- timestamp: number;
11
- isInterruption: boolean;
12
- totalDuration: number;
13
- predictionDuration: number;
14
- detectionDelay: number;
15
- overlapSpeechStartedAt?: number;
16
- speechInput?: Int16Array;
17
- probabilities?: Float32Array;
18
- probability: number;
19
- }
20
-
21
- export class InterruptionDetectionError extends Error {
22
- readonly type = 'InterruptionDetectionError';
23
-
24
- readonly timestamp: number;
25
- readonly label: string;
26
- readonly recoverable: boolean;
27
-
28
- constructor(message: string, timestamp: number, label: string, recoverable: boolean) {
29
- super(message);
30
- this.name = 'InterruptionDetectionError';
31
- this.timestamp = timestamp;
32
- this.label = label;
33
- this.recoverable = recoverable;
34
- }
35
-
36
- toString(): string {
37
- return `${this.name}: ${this.message} (label=${this.label}, timestamp=${this.timestamp}, recoverable=${this.recoverable})`;
38
- }
39
- }
40
-
41
- function estimateProbability(
42
- probabilities: Float32Array,
43
- windowSize: number = MIN_INTERRUPTION_DURATION,
44
- ): number {
45
- const minWindow = Math.ceil(windowSize / 0.025); // 25ms per frame
46
- if (probabilities.length < minWindow) {
47
- return 0;
48
- }
49
-
50
- return slidingWindowMinMax(probabilities, windowSize);
51
- }
52
-
53
- /**
54
- * Typed cache entry for interruption inference results.
55
- */
56
- export class InterruptionCacheEntry {
57
- readonly createdAt: number;
58
- readonly totalDuration: number;
59
- readonly predictionDuration: number;
60
- readonly detectionDelay: number;
61
- readonly speechInput?: Int16Array;
62
- readonly probabilities?: Float32Array;
63
- readonly isInterruption?: boolean;
64
- readonly probability: number;
65
-
66
- constructor(params: {
67
- createdAt: number;
68
- speechInput?: Int16Array;
69
- totalDuration?: number;
70
- predictionDuration?: number;
71
- detectionDelay?: number;
72
- probabilities?: Float32Array;
73
- isInterruption?: boolean;
74
- }) {
75
- this.createdAt = params.createdAt;
76
- this.totalDuration = params.totalDuration ?? 0;
77
- this.predictionDuration = params.predictionDuration ?? 0;
78
- this.detectionDelay = params.detectionDelay ?? 0;
79
- this.speechInput = params.speechInput;
80
- this.probabilities = params.probabilities;
81
- this.isInterruption = params.isInterruption;
82
- this.probability = this.probabilities ? estimateProbability(this.probabilities) : 0;
83
- }
84
-
85
- static default(): InterruptionCacheEntry {
86
- return new InterruptionCacheEntry({ createdAt: 0 });
87
- }
88
- }
@@ -1,31 +0,0 @@
1
- // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
- //
3
- // SPDX-License-Identifier: Apache-2.0
4
- import { describe, expect, it } from 'vitest';
5
- import { slidingWindowMinMax } from './utils.js';
6
-
7
- describe('slidingWindowMinMax', () => {
8
- it('returns -Infinity when array is shorter than window size', () => {
9
- expect(slidingWindowMinMax([0.5, 0.6], 3)).toBe(-Infinity);
10
- expect(slidingWindowMinMax([], 1)).toBe(-Infinity);
11
- });
12
-
13
- it('returns the max value when window size is 1', () => {
14
- // With window size 1, min of each window is the element itself,
15
- // so max of mins is just the max of the array
16
- expect(slidingWindowMinMax([0.1, 0.5, 0.3, 0.8, 0.2], 1)).toBe(0.8);
17
- });
18
-
19
- it('finds the best sustained probability across windows', () => {
20
- // Windows of size 3: [0.2, 0.8, 0.7], [0.8, 0.7, 0.3], [0.7, 0.3, 0.9]
21
- // Mins: 0.2, 0.3, 0.3
22
- // Max of mins: 0.3
23
- expect(slidingWindowMinMax([0.2, 0.8, 0.7, 0.3, 0.9], 3)).toBe(0.3);
24
- });
25
-
26
- it('returns the single element when array length equals window size', () => {
27
- // Only one window covering the entire array, return min of that window
28
- expect(slidingWindowMinMax([0.5, 0.9, 0.7], 3)).toBe(0.5);
29
- expect(slidingWindowMinMax([0.8], 1)).toBe(0.8);
30
- });
31
- });