@livekit/agents 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +47 -0
  3. package/LICENSE +201 -0
  4. package/dist/audio.d.ts +9 -0
  5. package/dist/audio.d.ts.map +1 -0
  6. package/dist/audio.js +54 -0
  7. package/dist/audio.js.map +1 -0
  8. package/dist/cli.d.ts +12 -1
  9. package/dist/cli.d.ts.map +1 -1
  10. package/dist/cli.js +102 -19
  11. package/dist/cli.js.map +1 -1
  12. package/dist/generator.d.ts +17 -6
  13. package/dist/generator.d.ts.map +1 -1
  14. package/dist/generator.js +20 -3
  15. package/dist/generator.js.map +1 -1
  16. package/dist/http_server.d.ts +1 -1
  17. package/dist/http_server.d.ts.map +1 -1
  18. package/dist/http_server.js +5 -3
  19. package/dist/http_server.js.map +1 -1
  20. package/dist/index.d.ts +14 -3
  21. package/dist/index.d.ts.map +1 -1
  22. package/dist/index.js +14 -3
  23. package/dist/index.js.map +1 -1
  24. package/dist/ipc/job_executor.d.ts +19 -0
  25. package/dist/ipc/job_executor.d.ts.map +1 -0
  26. package/dist/ipc/job_executor.js +8 -0
  27. package/dist/ipc/job_executor.js.map +1 -0
  28. package/dist/ipc/job_main.d.ts +7 -4
  29. package/dist/ipc/job_main.d.ts.map +1 -1
  30. package/dist/ipc/job_main.js +102 -59
  31. package/dist/ipc/job_main.js.map +1 -1
  32. package/dist/ipc/message.d.ts +41 -0
  33. package/dist/ipc/message.d.ts.map +1 -0
  34. package/dist/ipc/message.js +2 -0
  35. package/dist/ipc/message.js.map +1 -0
  36. package/dist/ipc/proc_job_executor.d.ts +15 -0
  37. package/dist/ipc/proc_job_executor.d.ts.map +1 -0
  38. package/dist/ipc/proc_job_executor.js +150 -0
  39. package/dist/ipc/proc_job_executor.js.map +1 -0
  40. package/dist/ipc/proc_pool.d.ts +26 -0
  41. package/dist/ipc/proc_pool.d.ts.map +1 -0
  42. package/dist/ipc/proc_pool.js +83 -0
  43. package/dist/ipc/proc_pool.js.map +1 -0
  44. package/dist/job.d.ts +100 -0
  45. package/dist/job.d.ts.map +1 -0
  46. package/dist/job.js +213 -0
  47. package/dist/job.js.map +1 -0
  48. package/dist/llm/function_context.d.ts +20 -0
  49. package/dist/llm/function_context.d.ts.map +1 -0
  50. package/dist/llm/function_context.js +37 -0
  51. package/dist/llm/function_context.js.map +1 -0
  52. package/dist/llm/index.d.ts +3 -0
  53. package/dist/llm/index.d.ts.map +1 -0
  54. package/dist/llm/index.js +6 -0
  55. package/dist/llm/index.js.map +1 -0
  56. package/dist/log.d.ts +12 -1
  57. package/dist/log.d.ts.map +1 -1
  58. package/dist/log.js +28 -11
  59. package/dist/log.js.map +1 -1
  60. package/dist/multimodal/agent_playout.d.ts +34 -0
  61. package/dist/multimodal/agent_playout.d.ts.map +1 -0
  62. package/dist/multimodal/agent_playout.js +221 -0
  63. package/dist/multimodal/agent_playout.js.map +1 -0
  64. package/dist/multimodal/index.d.ts +3 -0
  65. package/dist/multimodal/index.d.ts.map +1 -0
  66. package/dist/multimodal/index.js +6 -0
  67. package/dist/multimodal/index.js.map +1 -0
  68. package/dist/multimodal/multimodal_agent.d.ts +47 -0
  69. package/dist/multimodal/multimodal_agent.d.ts.map +1 -0
  70. package/dist/multimodal/multimodal_agent.js +331 -0
  71. package/dist/multimodal/multimodal_agent.js.map +1 -0
  72. package/dist/plugin.js +20 -7
  73. package/dist/plugin.js.map +1 -1
  74. package/dist/stt/index.d.ts +1 -1
  75. package/dist/stt/index.d.ts.map +1 -1
  76. package/dist/stt/index.js.map +1 -1
  77. package/dist/stt/stream_adapter.d.ts +2 -11
  78. package/dist/stt/stream_adapter.d.ts.map +1 -1
  79. package/dist/stt/stream_adapter.js +47 -33
  80. package/dist/stt/stream_adapter.js.map +1 -1
  81. package/dist/stt/stt.d.ts +27 -0
  82. package/dist/stt/stt.d.ts.map +1 -1
  83. package/dist/stt/stt.js +32 -5
  84. package/dist/stt/stt.js.map +1 -1
  85. package/dist/transcription.d.ts +22 -0
  86. package/dist/transcription.d.ts.map +1 -0
  87. package/dist/transcription.js +111 -0
  88. package/dist/transcription.js.map +1 -0
  89. package/dist/tts/stream_adapter.d.ts +4 -11
  90. package/dist/tts/stream_adapter.d.ts.map +1 -1
  91. package/dist/tts/stream_adapter.js +66 -32
  92. package/dist/tts/stream_adapter.js.map +1 -1
  93. package/dist/tts/tts.d.ts +10 -0
  94. package/dist/tts/tts.d.ts.map +1 -1
  95. package/dist/tts/tts.js +48 -7
  96. package/dist/tts/tts.js.map +1 -1
  97. package/dist/utils.d.ts +59 -0
  98. package/dist/utils.d.ts.map +1 -1
  99. package/dist/utils.js +212 -6
  100. package/dist/utils.js.map +1 -1
  101. package/dist/vad.d.ts +29 -0
  102. package/dist/vad.d.ts.map +1 -1
  103. package/dist/vad.js.map +1 -1
  104. package/dist/worker.d.ts +69 -50
  105. package/dist/worker.d.ts.map +1 -1
  106. package/dist/worker.js +414 -213
  107. package/dist/worker.js.map +1 -1
  108. package/package.json +12 -10
  109. package/src/audio.ts +62 -0
  110. package/src/cli.ts +108 -20
  111. package/src/generator.ts +27 -7
  112. package/src/http_server.ts +5 -0
  113. package/src/index.ts +15 -3
  114. package/src/ipc/job_executor.ts +25 -0
  115. package/src/ipc/job_main.ts +141 -61
  116. package/src/ipc/message.ts +39 -0
  117. package/src/ipc/proc_job_executor.ts +162 -0
  118. package/src/ipc/proc_pool.ts +109 -0
  119. package/src/job.ts +278 -0
  120. package/src/llm/function_context.ts +61 -0
  121. package/src/llm/index.ts +11 -0
  122. package/src/log.ts +40 -8
  123. package/src/multimodal/agent_playout.ts +254 -0
  124. package/src/multimodal/index.ts +5 -0
  125. package/src/multimodal/multimodal_agent.ts +428 -0
  126. package/src/stt/index.ts +1 -1
  127. package/src/stt/stream_adapter.ts +32 -32
  128. package/src/stt/stt.ts +27 -0
  129. package/src/transcription.ts +128 -0
  130. package/src/tts/stream_adapter.ts +32 -31
  131. package/src/tts/tts.ts +10 -0
  132. package/src/utils.ts +257 -3
  133. package/src/vad.ts +29 -0
  134. package/src/worker.ts +465 -172
  135. package/tsconfig.json +7 -1
  136. package/dist/ipc/job_process.d.ts +0 -22
  137. package/dist/ipc/job_process.d.ts.map +0 -1
  138. package/dist/ipc/job_process.js +0 -73
  139. package/dist/ipc/job_process.js.map +0 -1
  140. package/dist/ipc/protocol.d.ts +0 -40
  141. package/dist/ipc/protocol.d.ts.map +0 -1
  142. package/dist/ipc/protocol.js +0 -14
  143. package/dist/ipc/protocol.js.map +0 -1
  144. package/dist/job_context.d.ts +0 -16
  145. package/dist/job_context.d.ts.map +0 -1
  146. package/dist/job_context.js +0 -31
  147. package/dist/job_context.js.map +0 -1
  148. package/dist/job_request.d.ts +0 -42
  149. package/dist/job_request.d.ts.map +0 -1
  150. package/dist/job_request.js +0 -79
  151. package/dist/job_request.js.map +0 -1
  152. package/src/ipc/job_process.ts +0 -96
  153. package/src/ipc/protocol.ts +0 -51
  154. package/src/job_context.ts +0 -49
  155. package/src/job_request.ts +0 -118
@@ -0,0 +1,128 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type { AudioFrame, Room } from '@livekit/rtc-node';
5
+ import { log } from './log.js';
6
+
7
+ export interface TranscriptionForwarder {
8
+ start(): void;
9
+ pushAudio(frame: AudioFrame): void;
10
+ pushText(text: string): void;
11
+ markTextComplete(): void;
12
+ markAudioComplete(): void;
13
+ close(interrupt: boolean): Promise<void>;
14
+ currentCharacterIndex: number;
15
+ }
16
+
17
+ export class BasicTranscriptionForwarder implements TranscriptionForwarder {
18
+ #room: Room;
19
+ #participantIdentity: string;
20
+ #trackSid: string;
21
+ #currentText: string = '';
22
+ #totalAudioDuration: number = 0;
23
+ #currentPlayoutTime: number = 0;
24
+ #DEFAULT_CHARS_PER_SECOND = 16;
25
+ #charsPerSecond: number = this.#DEFAULT_CHARS_PER_SECOND;
26
+ #messageId: string;
27
+ #isRunning: boolean = false;
28
+ currentCharacterIndex: number = 0;
29
+
30
+ constructor(room: Room, participantIdentity: string, trackSid: string, messageId: string) {
31
+ this.#room = room;
32
+ this.#participantIdentity = participantIdentity;
33
+ this.#trackSid = trackSid;
34
+ this.#messageId = messageId;
35
+ }
36
+
37
+ start(): void {
38
+ if (!this.#isRunning) {
39
+ this.#isRunning = true;
40
+ this.#startPublishingLoop().catch((error) => {
41
+ log().error('Error in publishing loop:', error);
42
+ this.#isRunning = false;
43
+ });
44
+ }
45
+ }
46
+
47
+ pushAudio(frame: AudioFrame): void {
48
+ this.#totalAudioDuration += frame.samplesPerChannel / frame.sampleRate;
49
+ }
50
+
51
+ pushText(text: string): void {
52
+ this.#currentText += text;
53
+ }
54
+
55
+ #textIsComplete: boolean = false;
56
+ #audioIsComplete: boolean = false;
57
+
58
+ markTextComplete(): void {
59
+ this.#textIsComplete = true;
60
+ this.#adjustTimingIfBothFinished();
61
+ }
62
+
63
+ markAudioComplete(): void {
64
+ this.#audioIsComplete = true;
65
+ this.#adjustTimingIfBothFinished();
66
+ }
67
+
68
+ #adjustTimingIfBothFinished(): void {
69
+ if (this.#textIsComplete && this.#audioIsComplete) {
70
+ const actualDuration = this.#totalAudioDuration;
71
+ if (actualDuration > 0 && this.#currentText.length > 0) {
72
+ this.#charsPerSecond = this.#currentText.length / actualDuration;
73
+ }
74
+ }
75
+ }
76
+
77
+ #computeSleepInterval(): number {
78
+ return Math.min(Math.max(1 / this.#charsPerSecond, 0.0625), 0.5);
79
+ }
80
+
81
+ async #startPublishingLoop(): Promise<void> {
82
+ this.#isRunning = true;
83
+ let sleepInterval = this.#computeSleepInterval();
84
+ let isComplete = false;
85
+ while (this.#isRunning && !isComplete) {
86
+ this.#currentPlayoutTime += sleepInterval;
87
+ this.currentCharacterIndex = Math.floor(this.#currentPlayoutTime * this.#charsPerSecond);
88
+ isComplete = this.#textIsComplete && this.currentCharacterIndex >= this.#currentText.length;
89
+ await this.#publishTranscription(false);
90
+ if (this.#isRunning && !isComplete) {
91
+ sleepInterval = this.#computeSleepInterval();
92
+ await new Promise((resolve) => setTimeout(resolve, sleepInterval * 1000));
93
+ }
94
+ }
95
+
96
+ if (this.#isRunning) {
97
+ this.close(false);
98
+ }
99
+ }
100
+
101
+ async #publishTranscription(final: boolean): Promise<void> {
102
+ const textToPublish = this.#currentText.slice(0, this.currentCharacterIndex);
103
+ await this.#room.localParticipant?.publishTranscription({
104
+ participantIdentity: this.#participantIdentity,
105
+ trackSid: this.#trackSid,
106
+ segments: [
107
+ {
108
+ text: textToPublish,
109
+ final: final,
110
+ id: this.#messageId,
111
+ startTime: BigInt(0),
112
+ endTime: BigInt(0),
113
+ language: '',
114
+ },
115
+ ],
116
+ });
117
+ }
118
+
119
+ async close(interrupt: boolean): Promise<void> {
120
+ this.#isRunning = false;
121
+
122
+ // Publish whatever we had as final
123
+ if (!interrupt) {
124
+ this.currentCharacterIndex = this.#currentText.length;
125
+ }
126
+ await this.#publishTranscription(true);
127
+ }
128
+ }
@@ -2,25 +2,26 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { SentenceStream, SentenceTokenizer } from '../tokenize.js';
5
- import { ChunkedStream, SynthesisEvent, SynthesisEventType, SynthesizeStream, TTS } from './tts.js';
5
+ import type { ChunkedStream } from './tts.js';
6
+ import { SynthesisEvent, SynthesisEventType, SynthesizeStream, TTS } from './tts.js';
6
7
 
7
8
  export class StreamAdapterWrapper extends SynthesizeStream {
8
- closed: boolean;
9
- tts: TTS;
10
- sentenceStream: SentenceStream;
11
- eventQueue: (SynthesisEvent | undefined)[];
12
- task: {
9
+ #closed: boolean;
10
+ #tts: TTS;
11
+ #sentenceStream: SentenceStream;
12
+ #eventQueue: (SynthesisEvent | undefined)[];
13
+ #task: {
13
14
  run: Promise<void>;
14
15
  cancel: () => void;
15
16
  };
16
17
 
17
18
  constructor(tts: TTS, sentenceStream: SentenceStream) {
18
19
  super();
19
- this.closed = false;
20
- this.tts = tts;
21
- this.sentenceStream = sentenceStream;
22
- this.eventQueue = [];
23
- this.task = {
20
+ this.#closed = false;
21
+ this.#tts = tts;
22
+ this.#sentenceStream = sentenceStream;
23
+ this.#eventQueue = [];
24
+ this.#task = {
24
25
  run: new Promise((_, reject) => {
25
26
  this.run(reject);
26
27
  }),
@@ -29,32 +30,32 @@ export class StreamAdapterWrapper extends SynthesizeStream {
29
30
  }
30
31
 
31
32
  async run(reject: (arg: Error) => void) {
32
- while (!this.closed) {
33
- this.task.cancel = () => {
34
- this.closed = true;
33
+ while (!this.#closed) {
34
+ this.#task.cancel = () => {
35
+ this.#closed = true;
35
36
  reject(new Error('cancelled'));
36
37
  };
37
- for await (const sentence of this.sentenceStream) {
38
- const audio = await this.tts.synthesize(sentence.text).then((data) => data.next());
38
+ for await (const sentence of this.#sentenceStream) {
39
+ const audio = await this.#tts.synthesize(sentence.text).then((data) => data.next());
39
40
  if (!audio.done) {
40
- this.eventQueue.push(new SynthesisEvent(SynthesisEventType.STARTED));
41
- this.eventQueue.push(new SynthesisEvent(SynthesisEventType.AUDIO, audio.value));
42
- this.eventQueue.push(new SynthesisEvent(SynthesisEventType.FINISHED));
41
+ this.#eventQueue.push(new SynthesisEvent(SynthesisEventType.STARTED));
42
+ this.#eventQueue.push(new SynthesisEvent(SynthesisEventType.AUDIO, audio.value));
43
+ this.#eventQueue.push(new SynthesisEvent(SynthesisEventType.FINISHED));
43
44
  }
44
45
  }
45
46
  }
46
47
  }
47
48
 
48
49
  pushText(token: string) {
49
- this.sentenceStream.pushText(token);
50
+ this.#sentenceStream.pushText(token);
50
51
  }
51
52
 
52
53
  async flush() {
53
- await this.sentenceStream.flush();
54
+ await this.#sentenceStream.flush();
54
55
  }
55
56
 
56
57
  next(): IteratorResult<SynthesisEvent> {
57
- const event = this.eventQueue.shift();
58
+ const event = this.#eventQueue.shift();
58
59
  if (event) {
59
60
  return { done: false, value: event };
60
61
  } else {
@@ -63,30 +64,30 @@ export class StreamAdapterWrapper extends SynthesizeStream {
63
64
  }
64
65
 
65
66
  async close(): Promise<void> {
66
- this.task.cancel();
67
+ this.#task.cancel();
67
68
  try {
68
- await this.task.run;
69
+ await this.#task.run;
69
70
  } finally {
70
- this.eventQueue.push(undefined);
71
+ this.#eventQueue.push(undefined);
71
72
  }
72
73
  }
73
74
  }
74
75
 
75
76
  export class StreamAdapter extends TTS {
76
- tts: TTS;
77
- tokenizer: SentenceTokenizer;
77
+ #tts: TTS;
78
+ #tokenizer: SentenceTokenizer;
78
79
 
79
80
  constructor(tts: TTS, tokenizer: SentenceTokenizer) {
80
81
  super(true);
81
- this.tts = tts;
82
- this.tokenizer = tokenizer;
82
+ this.#tts = tts;
83
+ this.#tokenizer = tokenizer;
83
84
  }
84
85
 
85
86
  synthesize(text: string): Promise<ChunkedStream> {
86
- return this.tts.synthesize(text);
87
+ return this.#tts.synthesize(text);
87
88
  }
88
89
 
89
90
  stream() {
90
- return new StreamAdapterWrapper(this.tts, this.tokenizer.stream(undefined));
91
+ return new StreamAdapterWrapper(this.#tts, this.#tokenizer.stream(undefined));
91
92
  }
92
93
  }
package/src/tts/tts.ts CHANGED
@@ -10,8 +10,18 @@ export interface SynthesizedAudio {
10
10
  }
11
11
 
12
12
  export enum SynthesisEventType {
13
+ /**
14
+ * Indicate the start of synthesis.
15
+ * Retriggered after FINISHED.
16
+ */
13
17
  STARTED = 0,
18
+ /**
19
+ * Indicate that audio data is available.
20
+ */
14
21
  AUDIO = 1,
22
+ /**
23
+ * Indicate the end of synthesis. Does not necessarily mean stream is done.
24
+ */
15
25
  FINISHED = 2,
16
26
  }
17
27
 
package/src/utils.ts CHANGED
@@ -1,10 +1,26 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import { AudioFrame } from '@livekit/rtc-node';
4
+ import type {
5
+ LocalParticipant,
6
+ RemoteParticipant,
7
+ Room,
8
+ TrackPublication,
9
+ } from '@livekit/rtc-node';
10
+ import { AudioFrame, TrackSource } from '@livekit/rtc-node';
11
+ import { EventEmitter, once } from 'events';
5
12
 
13
+ /** Union of a single and a list of {@link AudioFrame}s */
6
14
  export type AudioBuffer = AudioFrame[] | AudioFrame;
7
15
 
16
+ /**
17
+ * Merge one or more {@link AudioFrame}s into a single one.
18
+ *
19
+ * @param buffer Either an {@link AudioFrame} or a list thereof
20
+ * @throws
21
+ * {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypeError
22
+ * | TypeError} if sample rate or channel count are mismatched
23
+ */
8
24
  export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
9
25
  if (Array.isArray(buffer)) {
10
26
  buffer = buffer as AudioFrame[];
@@ -15,7 +31,7 @@ export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
15
31
  const sampleRate = buffer[0].sampleRate;
16
32
  const channels = buffer[0].channels;
17
33
  let samplesPerChannel = 0;
18
- let data = new Uint16Array();
34
+ let data = new Int16Array();
19
35
 
20
36
  for (const frame of buffer) {
21
37
  if (frame.sampleRate !== sampleRate) {
@@ -26,7 +42,7 @@ export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
26
42
  throw new TypeError('channel count mismatch');
27
43
  }
28
44
 
29
- data = new Uint16Array([...data, ...frame.data]);
45
+ data = new Int16Array([...data, ...frame.data]);
30
46
  samplesPerChannel += frame.samplesPerChannel;
31
47
  }
32
48
 
@@ -35,3 +51,241 @@ export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
35
51
 
36
52
  return buffer;
37
53
  };
54
+
55
+ export const findMicroTrackId = (room: Room, identity: string): string => {
56
+ let p: RemoteParticipant | LocalParticipant | undefined = room.remoteParticipants.get(identity);
57
+
58
+ if (identity === room.localParticipant?.identity) {
59
+ p = room.localParticipant;
60
+ }
61
+
62
+ if (!p) {
63
+ throw new Error(`participant ${identity} not found`);
64
+ }
65
+
66
+ // find first micro track
67
+ let trackId: string | undefined;
68
+ p.trackPublications.forEach((track: TrackPublication) => {
69
+ if (track.source === TrackSource.SOURCE_MICROPHONE) {
70
+ trackId = track.sid;
71
+ return;
72
+ }
73
+ });
74
+
75
+ if (!trackId) {
76
+ throw new Error(`participant ${identity} does not have a microphone track`);
77
+ }
78
+
79
+ return trackId;
80
+ };
81
+
82
+ /** @internal */
83
+ export class Mutex {
84
+ #locking: Promise<void>;
85
+ #locks: number;
86
+ #limit: number;
87
+
88
+ constructor(limit = 1) {
89
+ this.#locking = Promise.resolve();
90
+ this.#locks = 0;
91
+ this.#limit = limit;
92
+ }
93
+
94
+ isLocked(): boolean {
95
+ return this.#locks >= this.#limit;
96
+ }
97
+
98
+ async lock(): Promise<() => void> {
99
+ this.#locks += 1;
100
+
101
+ let unlockNext: () => void;
102
+
103
+ const willLock = new Promise<void>(
104
+ (resolve) =>
105
+ (unlockNext = () => {
106
+ this.#locks -= 1;
107
+ resolve();
108
+ }),
109
+ );
110
+
111
+ const willUnlock = this.#locking.then(() => unlockNext);
112
+ this.#locking = this.#locking.then(() => willLock);
113
+ return willUnlock;
114
+ }
115
+ }
116
+
117
+ /** @internal */
118
+ export class Queue<T> {
119
+ /** @internal */
120
+ items: T[] = [];
121
+ #limit?: number;
122
+ #events = new EventEmitter();
123
+
124
+ constructor(limit?: number) {
125
+ this.#limit = limit;
126
+ }
127
+
128
+ async get(): Promise<T> {
129
+ if (this.items.length === 0) {
130
+ await once(this.#events, 'put');
131
+ }
132
+ const item = this.items.shift()!;
133
+ this.#events.emit('get');
134
+ return item;
135
+ }
136
+
137
+ async put(item: T) {
138
+ if (this.#limit && this.items.length >= this.#limit) {
139
+ await once(this.#events, 'get');
140
+ }
141
+ this.items.push(item);
142
+ this.#events.emit('put');
143
+ }
144
+ }
145
+
146
+ /** @internal */
147
+ export class Future {
148
+ #await = new Promise<void>((resolve, reject: (_: Error) => void) => {
149
+ this.resolve = resolve;
150
+ this.reject = reject;
151
+ });
152
+ #done: boolean = false;
153
+
154
+ get await() {
155
+ return this.#await;
156
+ }
157
+
158
+ get done() {
159
+ return this.#done;
160
+ }
161
+
162
+ resolve() {
163
+ this.#done = true;
164
+ }
165
+
166
+ reject(_: Error) {
167
+ this.#done = true;
168
+ _;
169
+ }
170
+ }
171
+
172
+ /** @internal */
173
+ export class CancellablePromise<T> {
174
+ #promise: Promise<T>;
175
+ #cancelFn: () => void;
176
+ #isCancelled: boolean = false;
177
+ #error: Error | null = null;
178
+
179
+ constructor(
180
+ executor: (
181
+ resolve: (value: T | PromiseLike<T>) => void,
182
+ reject: (reason?: any) => void,
183
+ onCancel: (cancelFn: () => void) => void,
184
+ ) => void,
185
+ ) {
186
+ let cancel: () => void;
187
+
188
+ this.#promise = new Promise<T>((resolve, reject) => {
189
+ executor(
190
+ resolve,
191
+ (reason) => {
192
+ this.#error = reason instanceof Error ? reason : new Error(String(reason));
193
+ reject(reason);
194
+ },
195
+ (cancelFn) => {
196
+ cancel = () => {
197
+ this.#isCancelled = true;
198
+ cancelFn();
199
+ };
200
+ },
201
+ );
202
+ });
203
+
204
+ this.#cancelFn = cancel!;
205
+ }
206
+
207
+ get isCancelled(): boolean {
208
+ return this.#isCancelled;
209
+ }
210
+
211
+ get error(): Error | null {
212
+ return this.#error;
213
+ }
214
+
215
+ then<TResult1 = T, TResult2 = never>(
216
+ onfulfilled?: ((value: T) => TResult1 | Promise<TResult1>) | null,
217
+ onrejected?: ((reason: any) => TResult2 | Promise<TResult2>) | null,
218
+ ): Promise<TResult1 | TResult2> {
219
+ return this.#promise.then(onfulfilled, onrejected);
220
+ }
221
+
222
+ catch<TResult = never>(
223
+ onrejected?: ((reason: any) => TResult | Promise<TResult>) | null,
224
+ ): Promise<T | TResult> {
225
+ return this.#promise.catch(onrejected);
226
+ }
227
+
228
+ finally(onfinally?: (() => void) | null): Promise<T> {
229
+ return this.#promise.finally(onfinally);
230
+ }
231
+
232
+ cancel(): void {
233
+ this.#cancelFn();
234
+ }
235
+
236
+ static from<T>(promise: Promise<T>): CancellablePromise<T> {
237
+ return new CancellablePromise<T>((resolve, reject) => {
238
+ promise.then(resolve).catch(reject);
239
+ });
240
+ }
241
+ }
242
+
243
+ /** @internal */
244
+ export async function gracefullyCancel<T>(promise: CancellablePromise<T>): Promise<void> {
245
+ if (!promise.isCancelled) {
246
+ promise.cancel();
247
+ }
248
+ try {
249
+ await promise;
250
+ } catch (error) {
251
+ // Ignore the error, as it's expected due to cancellation
252
+ }
253
+ }
254
+
255
+ /** @internal */
256
+ export class AsyncIterableQueue<T> implements AsyncIterable<T> {
257
+ private queue: Queue<T | typeof AsyncIterableQueue.QUEUE_END_MARKER>;
258
+ private closed = false;
259
+ private static readonly QUEUE_END_MARKER = Symbol('QUEUE_END_MARKER');
260
+
261
+ constructor() {
262
+ this.queue = new Queue<T | typeof AsyncIterableQueue.QUEUE_END_MARKER>();
263
+ }
264
+
265
+ put(item: T): void {
266
+ if (this.closed) {
267
+ throw new Error('Queue is closed');
268
+ }
269
+ this.queue.put(item);
270
+ }
271
+
272
+ close(): void {
273
+ this.closed = true;
274
+ this.queue.put(AsyncIterableQueue.QUEUE_END_MARKER);
275
+ }
276
+
277
+ [Symbol.asyncIterator](): AsyncIterator<T> {
278
+ return {
279
+ next: async (): Promise<IteratorResult<T>> => {
280
+ if (this.closed && this.queue.items.length === 0) {
281
+ return { value: undefined, done: true };
282
+ }
283
+ const item = await this.queue.get();
284
+ if (item === AsyncIterableQueue.QUEUE_END_MARKER && this.closed) {
285
+ return { value: undefined, done: true };
286
+ }
287
+ return { value: item as T, done: false };
288
+ },
289
+ };
290
+ }
291
+ }
package/src/vad.ts CHANGED
@@ -11,12 +11,23 @@ export enum VADEventType {
11
11
 
12
12
  export interface VADEvent {
13
13
  type: VADEventType;
14
+ /**
15
+ * Index of the samples of the event (when the event was fired)
16
+ */
14
17
  samplesIndex: number;
18
+ /**
19
+ * Duration of speech, in seconds
20
+ */
15
21
  duration: number;
16
22
  speech: AudioFrame[];
17
23
  }
18
24
 
19
25
  export abstract class VAD {
26
+ /**
27
+ * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.
28
+ *
29
+ * @param options
30
+ */
20
31
  abstract stream({
21
32
  minSpeakingDuration,
22
33
  minSilenceDuration,
@@ -24,10 +35,28 @@ export abstract class VAD {
24
35
  sampleRate,
25
36
  maxBufferedSpeech,
26
37
  }: {
38
+ /**
39
+ * Minimum duration of speech required to trigger a {@link VADEventType.START_OF_SPEECH} event
40
+ */
27
41
  minSpeakingDuration: number;
42
+ /**
43
+ * Milliseconds to wait before separating speech chunk.
44
+ * Not always precise, generally rounded to the nearest 40ms depending on VAD implementation
45
+ */
28
46
  minSilenceDuration: number;
47
+ /**
48
+ * Number of frames to pad the start and end of speech with
49
+ */
29
50
  paddingDuration: number;
51
+ /**
52
+ * Sample rate of inference/processing
53
+ */
30
54
  sampleRate: number;
55
+ /**
56
+ * Number of seconds the buffer may keep until {@link VADEventType.END_OF_SPEECH} is triggered.
57
+ * It is recommended to set this to a positive value, as zero may OOM if the user doesn't stop
58
+ * speaking.
59
+ */
31
60
  maxBufferedSpeech: number;
32
61
  }): VADStream;
33
62
  }