@livekit/agents 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +21 -0
  3. package/LICENSE +201 -0
  4. package/dist/audio.d.ts +12 -0
  5. package/dist/audio.d.ts.map +1 -0
  6. package/dist/audio.js +37 -0
  7. package/dist/audio.js.map +1 -0
  8. package/dist/cli.d.ts +11 -0
  9. package/dist/cli.d.ts.map +1 -1
  10. package/dist/cli.js +68 -8
  11. package/dist/cli.js.map +1 -1
  12. package/dist/generator.d.ts +12 -6
  13. package/dist/generator.d.ts.map +1 -1
  14. package/dist/generator.js +9 -3
  15. package/dist/generator.js.map +1 -1
  16. package/dist/http_server.d.ts +1 -1
  17. package/dist/http_server.js +0 -3
  18. package/dist/http_server.js.map +1 -1
  19. package/dist/index.d.ts +12 -3
  20. package/dist/index.d.ts.map +1 -1
  21. package/dist/index.js +12 -3
  22. package/dist/index.js.map +1 -1
  23. package/dist/ipc/job_executor.d.ts +19 -0
  24. package/dist/ipc/job_executor.d.ts.map +1 -0
  25. package/dist/ipc/job_executor.js +8 -0
  26. package/dist/ipc/job_executor.js.map +1 -0
  27. package/dist/ipc/job_main.d.ts +7 -4
  28. package/dist/ipc/job_main.d.ts.map +1 -1
  29. package/dist/ipc/job_main.js +96 -61
  30. package/dist/ipc/job_main.js.map +1 -1
  31. package/dist/ipc/message.d.ts +41 -0
  32. package/dist/ipc/message.d.ts.map +1 -0
  33. package/dist/ipc/message.js +2 -0
  34. package/dist/ipc/message.js.map +1 -0
  35. package/dist/ipc/proc_job_executor.d.ts +15 -0
  36. package/dist/ipc/proc_job_executor.d.ts.map +1 -0
  37. package/dist/ipc/proc_job_executor.js +150 -0
  38. package/dist/ipc/proc_job_executor.js.map +1 -0
  39. package/dist/ipc/proc_pool.d.ts +26 -0
  40. package/dist/ipc/proc_pool.d.ts.map +1 -0
  41. package/dist/ipc/proc_pool.js +82 -0
  42. package/dist/ipc/proc_pool.js.map +1 -0
  43. package/dist/job.d.ts +99 -0
  44. package/dist/job.d.ts.map +1 -0
  45. package/dist/job.js +197 -0
  46. package/dist/job.js.map +1 -0
  47. package/dist/llm/function_context.d.ts +20 -0
  48. package/dist/llm/function_context.d.ts.map +1 -0
  49. package/dist/llm/function_context.js +37 -0
  50. package/dist/llm/function_context.js.map +1 -0
  51. package/dist/llm/index.d.ts +3 -0
  52. package/dist/llm/index.d.ts.map +1 -0
  53. package/dist/llm/index.js +6 -0
  54. package/dist/llm/index.js.map +1 -0
  55. package/dist/log.d.ts +12 -1
  56. package/dist/log.d.ts.map +1 -1
  57. package/dist/log.js +28 -11
  58. package/dist/log.js.map +1 -1
  59. package/dist/plugin.js +20 -7
  60. package/dist/plugin.js.map +1 -1
  61. package/dist/stt/index.d.ts +1 -1
  62. package/dist/stt/index.d.ts.map +1 -1
  63. package/dist/stt/index.js.map +1 -1
  64. package/dist/stt/stream_adapter.d.ts +2 -11
  65. package/dist/stt/stream_adapter.d.ts.map +1 -1
  66. package/dist/stt/stream_adapter.js +47 -33
  67. package/dist/stt/stream_adapter.js.map +1 -1
  68. package/dist/stt/stt.d.ts +27 -0
  69. package/dist/stt/stt.d.ts.map +1 -1
  70. package/dist/stt/stt.js +32 -5
  71. package/dist/stt/stt.js.map +1 -1
  72. package/dist/tts/stream_adapter.d.ts +4 -11
  73. package/dist/tts/stream_adapter.d.ts.map +1 -1
  74. package/dist/tts/stream_adapter.js +66 -32
  75. package/dist/tts/stream_adapter.js.map +1 -1
  76. package/dist/tts/tts.d.ts +10 -0
  77. package/dist/tts/tts.d.ts.map +1 -1
  78. package/dist/tts/tts.js +48 -7
  79. package/dist/tts/tts.js.map +1 -1
  80. package/dist/utils.d.ts +32 -0
  81. package/dist/utils.d.ts.map +1 -1
  82. package/dist/utils.js +114 -6
  83. package/dist/utils.js.map +1 -1
  84. package/dist/vad.d.ts +29 -0
  85. package/dist/vad.d.ts.map +1 -1
  86. package/dist/vad.js.map +1 -1
  87. package/dist/worker.d.ts +67 -50
  88. package/dist/worker.d.ts.map +1 -1
  89. package/dist/worker.js +379 -214
  90. package/dist/worker.js.map +1 -1
  91. package/package.json +9 -9
  92. package/src/audio.ts +62 -0
  93. package/src/cli.ts +72 -8
  94. package/src/generator.ts +13 -7
  95. package/src/index.ts +13 -3
  96. package/src/ipc/job_executor.ts +25 -0
  97. package/src/ipc/job_main.ts +134 -61
  98. package/src/ipc/message.ts +39 -0
  99. package/src/ipc/proc_job_executor.ts +162 -0
  100. package/src/ipc/proc_pool.ts +108 -0
  101. package/src/job.ts +258 -0
  102. package/src/llm/function_context.ts +61 -0
  103. package/src/llm/index.ts +11 -0
  104. package/src/log.ts +40 -8
  105. package/src/stt/index.ts +1 -1
  106. package/src/stt/stream_adapter.ts +32 -32
  107. package/src/stt/stt.ts +27 -0
  108. package/src/tts/stream_adapter.ts +32 -31
  109. package/src/tts/tts.ts +10 -0
  110. package/src/utils.ts +125 -3
  111. package/src/vad.ts +29 -0
  112. package/src/worker.ts +419 -170
  113. package/tsconfig.json +6 -0
  114. package/dist/ipc/job_process.d.ts +0 -22
  115. package/dist/ipc/job_process.d.ts.map +0 -1
  116. package/dist/ipc/job_process.js +0 -73
  117. package/dist/ipc/job_process.js.map +0 -1
  118. package/dist/ipc/protocol.d.ts +0 -40
  119. package/dist/ipc/protocol.d.ts.map +0 -1
  120. package/dist/ipc/protocol.js +0 -14
  121. package/dist/ipc/protocol.js.map +0 -1
  122. package/dist/job_context.d.ts +0 -16
  123. package/dist/job_context.d.ts.map +0 -1
  124. package/dist/job_context.js +0 -31
  125. package/dist/job_context.js.map +0 -1
  126. package/dist/job_request.d.ts +0 -42
  127. package/dist/job_request.d.ts.map +0 -1
  128. package/dist/job_request.js +0 -79
  129. package/dist/job_request.js.map +0 -1
  130. package/src/ipc/job_process.ts +0 -96
  131. package/src/ipc/protocol.ts +0 -51
  132. package/src/job_context.ts +0 -49
  133. package/src/job_request.ts +0 -118
@@ -7,24 +7,24 @@ import { VADEventType, type VADStream } from '../vad.js';
7
7
  import { STT, SpeechEvent, SpeechEventType, SpeechStream } from './stt.js';
8
8
 
9
9
  export class StreamAdapterWrapper extends SpeechStream {
10
- closed: boolean;
11
- stt: STT;
12
- vadStream: VADStream;
13
- eventQueue: (SpeechEvent | undefined)[];
14
- language?: string;
15
- task: {
10
+ #closed: boolean;
11
+ #stt: STT;
12
+ #vadStream: VADStream;
13
+ #eventQueue: (SpeechEvent | undefined)[];
14
+ #language?: string;
15
+ #task: {
16
16
  run: Promise<void>;
17
17
  cancel: () => void;
18
18
  };
19
19
 
20
20
  constructor(stt: STT, vadStream: VADStream, language: string | undefined = undefined) {
21
21
  super();
22
- this.closed = false;
23
- this.stt = stt;
24
- this.vadStream = vadStream;
25
- this.eventQueue = [];
26
- this.language = language;
27
- this.task = {
22
+ this.#closed = false;
23
+ this.#stt = stt;
24
+ this.#vadStream = vadStream;
25
+ this.#eventQueue = [];
26
+ this.#language = language;
27
+ this.#task = {
28
28
  run: new Promise((_, reject) => {
29
29
  this.run(reject);
30
30
  }),
@@ -33,46 +33,46 @@ export class StreamAdapterWrapper extends SpeechStream {
33
33
  }
34
34
 
35
35
  async run(reject: (arg: Error) => void) {
36
- this.task.cancel = () => {
37
- this.closed = true;
36
+ this.#task.cancel = () => {
37
+ this.#closed = true;
38
38
  reject(new Error('cancelled'));
39
39
  };
40
40
 
41
- for (const event of this.vadStream) {
41
+ for (const event of this.#vadStream) {
42
42
  if (event.type == VADEventType.START_OF_SPEECH) {
43
43
  const startEvent = new SpeechEvent(SpeechEventType.START_OF_SPEECH);
44
- this.eventQueue.push(startEvent);
44
+ this.#eventQueue.push(startEvent);
45
45
  } else if (event.type == VADEventType.END_OF_SPEECH) {
46
46
  const mergedFrames = mergeFrames(event.speech);
47
- const endEvent = await this.stt.recognize(mergedFrames, this.language);
48
- this.eventQueue.push(endEvent);
47
+ const endEvent = await this.#stt.recognize(mergedFrames, this.#language);
48
+ this.#eventQueue.push(endEvent);
49
49
  }
50
50
  }
51
51
 
52
- this.eventQueue.push(undefined);
52
+ this.#eventQueue.push(undefined);
53
53
  }
54
54
 
55
55
  pushFrame(frame: AudioFrame) {
56
- if (this.closed) {
56
+ if (this.#closed) {
57
57
  throw new TypeError('cannot push frame to closed stream');
58
58
  }
59
59
 
60
- this.vadStream.pushFrame(frame);
60
+ this.#vadStream.pushFrame(frame);
61
61
  }
62
62
 
63
63
  async close(wait: boolean = true): Promise<void> {
64
- this.closed = true;
64
+ this.#closed = true;
65
65
 
66
66
  if (!wait) {
67
- this.task.cancel();
67
+ this.#task.cancel();
68
68
  }
69
69
 
70
- await this.vadStream.close(wait);
71
- await this.task.run;
70
+ await this.#vadStream.close(wait);
71
+ await this.#task.run;
72
72
  }
73
73
 
74
74
  next(): IteratorResult<SpeechEvent> {
75
- const item = this.eventQueue.shift();
75
+ const item = this.#eventQueue.shift();
76
76
  if (item) {
77
77
  return { done: false, value: item };
78
78
  } else {
@@ -82,23 +82,23 @@ export class StreamAdapterWrapper extends SpeechStream {
82
82
  }
83
83
 
84
84
  export class StreamAdapter extends STT {
85
- stt: STT;
86
- vadStream: VADStream;
85
+ #stt: STT;
86
+ #vadStream: VADStream;
87
87
 
88
88
  constructor(stt: STT, vadStream: VADStream) {
89
89
  super(true);
90
- this.stt = stt;
91
- this.vadStream = vadStream;
90
+ this.#stt = stt;
91
+ this.#vadStream = vadStream;
92
92
  }
93
93
 
94
94
  async recognize(
95
95
  buffer: AudioBuffer,
96
96
  language: string | undefined = undefined,
97
97
  ): Promise<SpeechEvent> {
98
- return await this.stt.recognize(buffer, language);
98
+ return await this.#stt.recognize(buffer, language);
99
99
  }
100
100
 
101
101
  stream(language: string | undefined = undefined) {
102
- return new StreamAdapterWrapper(this.stt, this.vadStream, language);
102
+ return new StreamAdapterWrapper(this.#stt, this.#vadStream, language);
103
103
  }
104
104
  }
package/src/stt/stt.ts CHANGED
@@ -5,9 +5,25 @@ import type { AudioFrame } from '@livekit/rtc-node';
5
5
  import type { AudioBuffer } from '../utils.js';
6
6
 
7
7
  export enum SpeechEventType {
8
+ /**
9
+ * Indicate the start of speech.
10
+ * If the STT doesn't support this event, this will be emitted at the same time
11
+ * as the first INTERMIN_TRANSCRIPT.
12
+ */
8
13
  START_OF_SPEECH = 0,
14
+ /**
15
+ * Interim transcript, useful for real-time transcription.
16
+ */
9
17
  INTERIM_TRANSCRIPT = 1,
18
+ /**
19
+ * Final transcript, emitted when the STT is confident enough that a certain
20
+ * portion of the speech will not change.
21
+ */
10
22
  FINAL_TRANSCRIPT = 2,
23
+ /**
24
+ * Indicate the end of speech, emitted when the user stops speaking.
25
+ * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.
26
+ */
11
27
  END_OF_SPEECH = 3,
12
28
  }
13
29
 
@@ -30,8 +46,19 @@ export class SpeechEvent {
30
46
  }
31
47
 
32
48
  export abstract class SpeechStream implements IterableIterator<SpeechEvent> {
49
+ /**
50
+ * Push a frame to be recognised.
51
+ * It is recommended to push frames as soon as they are available.
52
+ */
33
53
  abstract pushFrame(token: AudioFrame): void;
34
54
 
55
+ /**
56
+ * Close the stream.
57
+ *
58
+ * @param wait
59
+ * Whether to wait for the STT to finish processing the remaining
60
+ * frames before closing
61
+ */
35
62
  abstract close(wait: boolean): Promise<void>;
36
63
 
37
64
  abstract next(): IteratorResult<SpeechEvent>;
@@ -2,25 +2,26 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { SentenceStream, SentenceTokenizer } from '../tokenize.js';
5
- import { ChunkedStream, SynthesisEvent, SynthesisEventType, SynthesizeStream, TTS } from './tts.js';
5
+ import type { ChunkedStream } from './tts.js';
6
+ import { SynthesisEvent, SynthesisEventType, SynthesizeStream, TTS } from './tts.js';
6
7
 
7
8
  export class StreamAdapterWrapper extends SynthesizeStream {
8
- closed: boolean;
9
- tts: TTS;
10
- sentenceStream: SentenceStream;
11
- eventQueue: (SynthesisEvent | undefined)[];
12
- task: {
9
+ #closed: boolean;
10
+ #tts: TTS;
11
+ #sentenceStream: SentenceStream;
12
+ #eventQueue: (SynthesisEvent | undefined)[];
13
+ #task: {
13
14
  run: Promise<void>;
14
15
  cancel: () => void;
15
16
  };
16
17
 
17
18
  constructor(tts: TTS, sentenceStream: SentenceStream) {
18
19
  super();
19
- this.closed = false;
20
- this.tts = tts;
21
- this.sentenceStream = sentenceStream;
22
- this.eventQueue = [];
23
- this.task = {
20
+ this.#closed = false;
21
+ this.#tts = tts;
22
+ this.#sentenceStream = sentenceStream;
23
+ this.#eventQueue = [];
24
+ this.#task = {
24
25
  run: new Promise((_, reject) => {
25
26
  this.run(reject);
26
27
  }),
@@ -29,32 +30,32 @@ export class StreamAdapterWrapper extends SynthesizeStream {
29
30
  }
30
31
 
31
32
  async run(reject: (arg: Error) => void) {
32
- while (!this.closed) {
33
- this.task.cancel = () => {
34
- this.closed = true;
33
+ while (!this.#closed) {
34
+ this.#task.cancel = () => {
35
+ this.#closed = true;
35
36
  reject(new Error('cancelled'));
36
37
  };
37
- for await (const sentence of this.sentenceStream) {
38
- const audio = await this.tts.synthesize(sentence.text).then((data) => data.next());
38
+ for await (const sentence of this.#sentenceStream) {
39
+ const audio = await this.#tts.synthesize(sentence.text).then((data) => data.next());
39
40
  if (!audio.done) {
40
- this.eventQueue.push(new SynthesisEvent(SynthesisEventType.STARTED));
41
- this.eventQueue.push(new SynthesisEvent(SynthesisEventType.AUDIO, audio.value));
42
- this.eventQueue.push(new SynthesisEvent(SynthesisEventType.FINISHED));
41
+ this.#eventQueue.push(new SynthesisEvent(SynthesisEventType.STARTED));
42
+ this.#eventQueue.push(new SynthesisEvent(SynthesisEventType.AUDIO, audio.value));
43
+ this.#eventQueue.push(new SynthesisEvent(SynthesisEventType.FINISHED));
43
44
  }
44
45
  }
45
46
  }
46
47
  }
47
48
 
48
49
  pushText(token: string) {
49
- this.sentenceStream.pushText(token);
50
+ this.#sentenceStream.pushText(token);
50
51
  }
51
52
 
52
53
  async flush() {
53
- await this.sentenceStream.flush();
54
+ await this.#sentenceStream.flush();
54
55
  }
55
56
 
56
57
  next(): IteratorResult<SynthesisEvent> {
57
- const event = this.eventQueue.shift();
58
+ const event = this.#eventQueue.shift();
58
59
  if (event) {
59
60
  return { done: false, value: event };
60
61
  } else {
@@ -63,30 +64,30 @@ export class StreamAdapterWrapper extends SynthesizeStream {
63
64
  }
64
65
 
65
66
  async close(): Promise<void> {
66
- this.task.cancel();
67
+ this.#task.cancel();
67
68
  try {
68
- await this.task.run;
69
+ await this.#task.run;
69
70
  } finally {
70
- this.eventQueue.push(undefined);
71
+ this.#eventQueue.push(undefined);
71
72
  }
72
73
  }
73
74
  }
74
75
 
75
76
  export class StreamAdapter extends TTS {
76
- tts: TTS;
77
- tokenizer: SentenceTokenizer;
77
+ #tts: TTS;
78
+ #tokenizer: SentenceTokenizer;
78
79
 
79
80
  constructor(tts: TTS, tokenizer: SentenceTokenizer) {
80
81
  super(true);
81
- this.tts = tts;
82
- this.tokenizer = tokenizer;
82
+ this.#tts = tts;
83
+ this.#tokenizer = tokenizer;
83
84
  }
84
85
 
85
86
  synthesize(text: string): Promise<ChunkedStream> {
86
- return this.tts.synthesize(text);
87
+ return this.#tts.synthesize(text);
87
88
  }
88
89
 
89
90
  stream() {
90
- return new StreamAdapterWrapper(this.tts, this.tokenizer.stream(undefined));
91
+ return new StreamAdapterWrapper(this.#tts, this.#tokenizer.stream(undefined));
91
92
  }
92
93
  }
package/src/tts/tts.ts CHANGED
@@ -10,8 +10,18 @@ export interface SynthesizedAudio {
10
10
  }
11
11
 
12
12
  export enum SynthesisEventType {
13
+ /**
14
+ * Indicate the start of synthesis.
15
+ * Retriggered after FINISHED.
16
+ */
13
17
  STARTED = 0,
18
+ /**
19
+ * Indicate that audio data is available.
20
+ */
14
21
  AUDIO = 1,
22
+ /**
23
+ * Indicate the end of synthesis. Does not necessarily mean stream is done.
24
+ */
15
25
  FINISHED = 2,
16
26
  }
17
27
 
package/src/utils.ts CHANGED
@@ -1,10 +1,26 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import { AudioFrame } from '@livekit/rtc-node';
4
+ import type {
5
+ LocalParticipant,
6
+ RemoteParticipant,
7
+ Room,
8
+ TrackPublication,
9
+ } from '@livekit/rtc-node';
10
+ import { AudioFrame, TrackSource } from '@livekit/rtc-node';
11
+ import { EventEmitter, once } from 'events';
5
12
 
13
+ /** Union of a single and a list of {@link AudioFrame}s */
6
14
  export type AudioBuffer = AudioFrame[] | AudioFrame;
7
15
 
16
+ /**
17
+ * Merge one or more {@link AudioFrame}s into a single one.
18
+ *
19
+ * @param buffer Either an {@link AudioFrame} or a list thereof
20
+ * @throws
21
+ * {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypeError
22
+ * | TypeError} if sample rate or channel count are mismatched
23
+ */
8
24
  export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
9
25
  if (Array.isArray(buffer)) {
10
26
  buffer = buffer as AudioFrame[];
@@ -15,7 +31,7 @@ export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
15
31
  const sampleRate = buffer[0].sampleRate;
16
32
  const channels = buffer[0].channels;
17
33
  let samplesPerChannel = 0;
18
- let data = new Uint16Array();
34
+ let data = new Int16Array();
19
35
 
20
36
  for (const frame of buffer) {
21
37
  if (frame.sampleRate !== sampleRate) {
@@ -26,7 +42,7 @@ export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
26
42
  throw new TypeError('channel count mismatch');
27
43
  }
28
44
 
29
- data = new Uint16Array([...data, ...frame.data]);
45
+ data = new Int16Array([...data, ...frame.data]);
30
46
  samplesPerChannel += frame.samplesPerChannel;
31
47
  }
32
48
 
@@ -35,3 +51,109 @@ export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
35
51
 
36
52
  return buffer;
37
53
  };
54
+
55
+ export const findMicroTrackId = (room: Room, identity: string): string => {
56
+ let p: RemoteParticipant | LocalParticipant | undefined = room.remoteParticipants.get(identity);
57
+
58
+ if (identity === room.localParticipant?.identity) {
59
+ p = room.localParticipant;
60
+ }
61
+
62
+ if (!p) {
63
+ throw new Error(`participant ${identity} not found`);
64
+ }
65
+
66
+ // find first micro track
67
+ let trackId: string | undefined;
68
+ p.trackPublications.forEach((track: TrackPublication) => {
69
+ if (track.source === TrackSource.SOURCE_MICROPHONE) {
70
+ trackId = track.sid;
71
+ return;
72
+ }
73
+ });
74
+
75
+ if (!trackId) {
76
+ throw new Error(`participant ${identity} does not have a microphone track`);
77
+ }
78
+
79
+ return trackId;
80
+ };
81
+
82
+ /** @internal */
83
+ export class Mutex {
84
+ #locking: Promise<void>;
85
+ #locks: number;
86
+ #limit: number;
87
+
88
+ constructor(limit = 1) {
89
+ this.#locking = Promise.resolve();
90
+ this.#locks = 0;
91
+ this.#limit = limit;
92
+ }
93
+
94
+ isLocked(): boolean {
95
+ return this.#locks >= this.#limit;
96
+ }
97
+
98
+ async lock(): Promise<() => void> {
99
+ this.#locks += 1;
100
+
101
+ let unlockNext: () => void;
102
+
103
+ const willLock = new Promise<void>(
104
+ (resolve) =>
105
+ (unlockNext = () => {
106
+ this.#locks -= 1;
107
+ resolve();
108
+ }),
109
+ );
110
+
111
+ const willUnlock = this.#locking.then(() => unlockNext);
112
+ this.#locking = this.#locking.then(() => willLock);
113
+ return willUnlock;
114
+ }
115
+ }
116
+
117
+ /** @internal */
118
+ export class Queue<T> {
119
+ #items: T[] = [];
120
+ #limit?: number;
121
+ #events = new EventEmitter();
122
+
123
+ constructor(limit?: number) {
124
+ this.#limit = limit;
125
+ }
126
+
127
+ async get(): Promise<T> {
128
+ if (this.#items.length === 0) {
129
+ await once(this.#events, 'put');
130
+ }
131
+ const item = this.#items.shift()!;
132
+ this.#events.emit('get');
133
+ return item;
134
+ }
135
+
136
+ async put(item: T) {
137
+ if (this.#limit && this.#items.length >= this.#limit) {
138
+ await once(this.#events, 'get');
139
+ }
140
+ this.#items.push(item);
141
+ this.#events.emit('put');
142
+ }
143
+ }
144
+
145
+ /** @internal */
146
+ export class Future {
147
+ #await = new Promise<void>((resolve, reject: (_: Error) => void) => {
148
+ this.resolve = resolve;
149
+ this.reject = reject;
150
+ });
151
+
152
+ get await() {
153
+ return this.#await;
154
+ }
155
+ resolve() {}
156
+ reject(_: Error) {
157
+ _;
158
+ }
159
+ }
package/src/vad.ts CHANGED
@@ -11,12 +11,23 @@ export enum VADEventType {
11
11
 
12
12
  export interface VADEvent {
13
13
  type: VADEventType;
14
+ /**
15
+ * Index of the samples of the event (when the event was fired)
16
+ */
14
17
  samplesIndex: number;
18
+ /**
19
+ * Duration of speech, in seconds
20
+ */
15
21
  duration: number;
16
22
  speech: AudioFrame[];
17
23
  }
18
24
 
19
25
  export abstract class VAD {
26
+ /**
27
+ * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.
28
+ *
29
+ * @param options
30
+ */
20
31
  abstract stream({
21
32
  minSpeakingDuration,
22
33
  minSilenceDuration,
@@ -24,10 +35,28 @@ export abstract class VAD {
24
35
  sampleRate,
25
36
  maxBufferedSpeech,
26
37
  }: {
38
+ /**
39
+ * Minimum duration of speech required to trigger a {@link VADEventType.START_OF_SPEECH} event
40
+ */
27
41
  minSpeakingDuration: number;
42
+ /**
43
+ * Milliseconds to wait before separating speech chunk.
44
+ * Not always precise, generally rounded to the nearest 40ms depending on VAD implementation
45
+ */
28
46
  minSilenceDuration: number;
47
+ /**
48
+ * Number of frames to pad the start and end of speech with
49
+ */
29
50
  paddingDuration: number;
51
+ /**
52
+ * Sample rate of inference/processing
53
+ */
30
54
  sampleRate: number;
55
+ /**
56
+ * Number of seconds the buffer may keep until {@link VADEventType.END_OF_SPEECH} is triggered.
57
+ * It is recommended to set this to a positive value, as zero may OOM if the user doesn't stop
58
+ * speaking.
59
+ */
31
60
  maxBufferedSpeech: number;
32
61
  }): VADStream;
33
62
  }