@livekit/agents 0.3.5 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +36 -0
  3. package/dist/audio.js +17 -30
  4. package/dist/audio.js.map +1 -1
  5. package/dist/cli.js +3 -14
  6. package/dist/cli.js.map +1 -1
  7. package/dist/http_server.d.ts +1 -1
  8. package/dist/http_server.js +5 -9
  9. package/dist/http_server.js.map +1 -1
  10. package/dist/index.d.ts +3 -2
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +14 -2
  13. package/dist/index.js.map +1 -1
  14. package/dist/ipc/job_executor.js +3 -5
  15. package/dist/ipc/job_executor.js.map +1 -1
  16. package/dist/ipc/job_main.d.ts +1 -1
  17. package/dist/ipc/proc_job_executor.js +66 -80
  18. package/dist/ipc/proc_job_executor.js.map +1 -1
  19. package/dist/ipc/proc_pool.d.ts +3 -3
  20. package/dist/ipc/proc_pool.d.ts.map +1 -1
  21. package/dist/ipc/proc_pool.js +38 -20
  22. package/dist/ipc/proc_pool.js.map +1 -1
  23. package/dist/job.js +56 -73
  24. package/dist/job.js.map +1 -1
  25. package/dist/llm/chat_context.d.ts +66 -0
  26. package/dist/llm/chat_context.d.ts.map +1 -0
  27. package/dist/llm/chat_context.js +93 -0
  28. package/dist/llm/chat_context.js.map +1 -0
  29. package/dist/llm/function_context.d.ts +19 -1
  30. package/dist/llm/function_context.d.ts.map +1 -1
  31. package/dist/llm/function_context.js +54 -18
  32. package/dist/llm/function_context.js.map +1 -1
  33. package/dist/llm/function_context.test.d.ts +2 -0
  34. package/dist/llm/function_context.test.d.ts.map +1 -0
  35. package/dist/llm/function_context.test.js +218 -0
  36. package/dist/llm/function_context.test.js.map +1 -0
  37. package/dist/llm/index.d.ts +3 -2
  38. package/dist/llm/index.d.ts.map +1 -1
  39. package/dist/llm/index.js +3 -2
  40. package/dist/llm/index.js.map +1 -1
  41. package/dist/llm/llm.d.ts +53 -0
  42. package/dist/llm/llm.d.ts.map +1 -0
  43. package/dist/llm/llm.js +45 -0
  44. package/dist/llm/llm.js.map +1 -0
  45. package/dist/multimodal/agent_playout.d.ts +1 -1
  46. package/dist/multimodal/agent_playout.js +116 -153
  47. package/dist/multimodal/agent_playout.js.map +1 -1
  48. package/dist/multimodal/multimodal_agent.d.ts +4 -3
  49. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  50. package/dist/multimodal/multimodal_agent.js +207 -234
  51. package/dist/multimodal/multimodal_agent.js.map +1 -1
  52. package/dist/pipeline/agent_output.d.ts +30 -0
  53. package/dist/pipeline/agent_output.d.ts.map +1 -0
  54. package/dist/pipeline/agent_output.js +155 -0
  55. package/dist/pipeline/agent_output.js.map +1 -0
  56. package/dist/pipeline/agent_playout.d.ts +38 -0
  57. package/dist/pipeline/agent_playout.d.ts.map +1 -0
  58. package/dist/pipeline/agent_playout.js +142 -0
  59. package/dist/pipeline/agent_playout.js.map +1 -0
  60. package/dist/pipeline/human_input.d.ts +28 -0
  61. package/dist/pipeline/human_input.d.ts.map +1 -0
  62. package/dist/pipeline/human_input.js +134 -0
  63. package/dist/pipeline/human_input.js.map +1 -0
  64. package/dist/pipeline/index.d.ts +2 -0
  65. package/dist/pipeline/index.d.ts.map +1 -0
  66. package/dist/pipeline/index.js +5 -0
  67. package/dist/pipeline/index.js.map +1 -0
  68. package/dist/pipeline/pipeline_agent.d.ts +134 -0
  69. package/dist/pipeline/pipeline_agent.d.ts.map +1 -0
  70. package/dist/pipeline/pipeline_agent.js +661 -0
  71. package/dist/pipeline/pipeline_agent.js.map +1 -0
  72. package/dist/pipeline/speech_handle.d.ts +27 -0
  73. package/dist/pipeline/speech_handle.d.ts.map +1 -0
  74. package/dist/pipeline/speech_handle.js +102 -0
  75. package/dist/pipeline/speech_handle.js.map +1 -0
  76. package/dist/plugin.js +7 -20
  77. package/dist/plugin.js.map +1 -1
  78. package/dist/stt/index.d.ts +1 -2
  79. package/dist/stt/index.d.ts.map +1 -1
  80. package/dist/stt/index.js +1 -2
  81. package/dist/stt/index.js.map +1 -1
  82. package/dist/stt/stt.d.ts +62 -24
  83. package/dist/stt/stt.d.ts.map +1 -1
  84. package/dist/stt/stt.js +77 -27
  85. package/dist/stt/stt.js.map +1 -1
  86. package/dist/tokenize/basic/basic.d.ts +16 -0
  87. package/dist/tokenize/basic/basic.d.ts.map +1 -0
  88. package/dist/tokenize/basic/basic.js +50 -0
  89. package/dist/tokenize/basic/basic.js.map +1 -0
  90. package/dist/tokenize/basic/hyphenator.d.ts +17 -0
  91. package/dist/tokenize/basic/hyphenator.d.ts.map +1 -0
  92. package/dist/tokenize/basic/hyphenator.js +420 -0
  93. package/dist/tokenize/basic/hyphenator.js.map +1 -0
  94. package/dist/tokenize/basic/index.d.ts +2 -0
  95. package/dist/tokenize/basic/index.d.ts.map +1 -0
  96. package/dist/tokenize/basic/index.js +5 -0
  97. package/dist/tokenize/basic/index.js.map +1 -0
  98. package/dist/tokenize/basic/paragraph.d.ts +5 -0
  99. package/dist/tokenize/basic/paragraph.d.ts.map +1 -0
  100. package/dist/tokenize/basic/paragraph.js +38 -0
  101. package/dist/tokenize/basic/paragraph.js.map +1 -0
  102. package/dist/tokenize/basic/sentence.d.ts +5 -0
  103. package/dist/tokenize/basic/sentence.d.ts.map +1 -0
  104. package/dist/tokenize/basic/sentence.js +60 -0
  105. package/dist/tokenize/basic/sentence.js.map +1 -0
  106. package/dist/tokenize/basic/word.d.ts +5 -0
  107. package/dist/tokenize/basic/word.d.ts.map +1 -0
  108. package/dist/tokenize/basic/word.js +23 -0
  109. package/dist/tokenize/basic/word.js.map +1 -0
  110. package/dist/tokenize/index.d.ts +5 -0
  111. package/dist/tokenize/index.d.ts.map +1 -0
  112. package/dist/tokenize/index.js +8 -0
  113. package/dist/tokenize/index.js.map +1 -0
  114. package/dist/tokenize/token_stream.d.ts +36 -0
  115. package/dist/tokenize/token_stream.d.ts.map +1 -0
  116. package/dist/tokenize/token_stream.js +136 -0
  117. package/dist/tokenize/token_stream.js.map +1 -0
  118. package/dist/tokenize/tokenizer.d.ts +55 -0
  119. package/dist/tokenize/tokenizer.d.ts.map +1 -0
  120. package/dist/tokenize/tokenizer.js +117 -0
  121. package/dist/tokenize/tokenizer.js.map +1 -0
  122. package/dist/transcription.js +78 -89
  123. package/dist/transcription.js.map +1 -1
  124. package/dist/tts/index.d.ts +1 -3
  125. package/dist/tts/index.d.ts.map +1 -1
  126. package/dist/tts/index.js +1 -3
  127. package/dist/tts/index.js.map +1 -1
  128. package/dist/tts/tts.d.ts +66 -37
  129. package/dist/tts/tts.d.ts.map +1 -1
  130. package/dist/tts/tts.js +79 -74
  131. package/dist/tts/tts.js.map +1 -1
  132. package/dist/utils.d.ts +21 -6
  133. package/dist/utils.d.ts.map +1 -1
  134. package/dist/utils.js +120 -76
  135. package/dist/utils.js.map +1 -1
  136. package/dist/vad.d.ts +43 -39
  137. package/dist/vad.d.ts.map +1 -1
  138. package/dist/vad.js +51 -4
  139. package/dist/vad.js.map +1 -1
  140. package/dist/worker.d.ts +1 -1
  141. package/dist/worker.js +257 -247
  142. package/dist/worker.js.map +1 -1
  143. package/package.json +4 -3
  144. package/src/index.ts +16 -2
  145. package/src/ipc/proc_pool.ts +25 -13
  146. package/src/llm/chat_context.ts +147 -0
  147. package/src/llm/function_context.test.ts +248 -0
  148. package/src/llm/function_context.ts +77 -18
  149. package/src/llm/index.ts +21 -2
  150. package/src/llm/llm.ts +102 -0
  151. package/src/multimodal/multimodal_agent.ts +6 -2
  152. package/src/pipeline/agent_output.ts +185 -0
  153. package/src/pipeline/agent_playout.ts +187 -0
  154. package/src/pipeline/human_input.ts +166 -0
  155. package/src/pipeline/index.ts +15 -0
  156. package/src/pipeline/pipeline_agent.ts +917 -0
  157. package/src/pipeline/speech_handle.ts +136 -0
  158. package/src/stt/index.ts +8 -2
  159. package/src/stt/stt.ts +98 -31
  160. package/src/tokenize/basic/basic.ts +73 -0
  161. package/src/tokenize/basic/hyphenator.ts +436 -0
  162. package/src/tokenize/basic/index.ts +5 -0
  163. package/src/tokenize/basic/paragraph.ts +43 -0
  164. package/src/tokenize/basic/sentence.ts +69 -0
  165. package/src/tokenize/basic/word.ts +27 -0
  166. package/src/tokenize/index.ts +16 -0
  167. package/src/tokenize/token_stream.ts +163 -0
  168. package/src/tokenize/tokenizer.ts +152 -0
  169. package/src/tts/index.ts +1 -20
  170. package/src/tts/tts.ts +110 -57
  171. package/src/utils.ts +95 -25
  172. package/src/vad.ts +86 -45
  173. package/tsconfig.tsbuildinfo +1 -1
  174. package/dist/stt/stream_adapter.d.ts +0 -19
  175. package/dist/stt/stream_adapter.d.ts.map +0 -1
  176. package/dist/stt/stream_adapter.js +0 -96
  177. package/dist/stt/stream_adapter.js.map +0 -1
  178. package/dist/tokenize.d.ts +0 -15
  179. package/dist/tokenize.d.ts.map +0 -1
  180. package/dist/tokenize.js +0 -12
  181. package/dist/tokenize.js.map +0 -1
  182. package/dist/tts/stream_adapter.d.ts +0 -19
  183. package/dist/tts/stream_adapter.d.ts.map +0 -1
  184. package/dist/tts/stream_adapter.js +0 -111
  185. package/dist/tts/stream_adapter.js.map +0 -1
  186. package/src/stt/stream_adapter.ts +0 -104
  187. package/src/tokenize.ts +0 -22
  188. package/src/tts/stream_adapter.ts +0 -93
@@ -0,0 +1,166 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type {
5
+ RemoteAudioTrack,
6
+ RemoteParticipant,
7
+ RemoteTrackPublication,
8
+ Room,
9
+ } from '@livekit/rtc-node';
10
+ import { AudioStream, RoomEvent, TrackSource } from '@livekit/rtc-node';
11
+ import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
12
+ import { EventEmitter } from 'node:events';
13
+ import { log } from '../log.js';
14
+ import type { STT, SpeechEvent } from '../stt/stt.js';
15
+ import { SpeechEventType } from '../stt/stt.js';
16
+ import { CancellablePromise, gracefullyCancel } from '../utils.js';
17
+ import type { VAD, VADEvent } from '../vad.js';
18
+ import { VADEventType } from '../vad.js';
19
+
20
+ export enum HumanInputEvent {
21
+ START_OF_SPEECH,
22
+ VAD_INFERENCE_DONE,
23
+ END_OF_SPEECH,
24
+ FINAL_TRANSCRIPT,
25
+ INTERIM_TRANSCRIPT,
26
+ }
27
+
28
+ export type HumanInputCallbacks = {
29
+ [HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;
30
+ [HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;
31
+ [HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;
32
+ [HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;
33
+ [HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;
34
+ };
35
+
36
+ export class HumanInput extends (EventEmitter as new () => TypedEmitter<HumanInputCallbacks>) {
37
+ #closed = false;
38
+ #room: Room;
39
+ #vad: VAD;
40
+ #stt: STT;
41
+ #participant: RemoteParticipant;
42
+ #subscribedTrack?: RemoteAudioTrack;
43
+ #recognizeTask?: CancellablePromise<void>;
44
+ #speaking = false;
45
+ #speechProbability = 0;
46
+ #logger = log();
47
+
48
+ constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant) {
49
+ super();
50
+ this.#room = room;
51
+ this.#vad = vad;
52
+ this.#stt = stt;
53
+ this.#participant = participant;
54
+
55
+ this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
56
+ this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
57
+ this.#subscribeToMicrophone();
58
+ }
59
+
60
+ #subscribeToMicrophone(): void {
61
+ if (!this.#participant) {
62
+ this.#logger.error('Participant is not set');
63
+ return;
64
+ }
65
+
66
+ let microphonePublication: RemoteTrackPublication | undefined = undefined;
67
+ for (const publication of this.#participant.trackPublications.values()) {
68
+ if (publication.source === TrackSource.SOURCE_MICROPHONE) {
69
+ microphonePublication = publication;
70
+ break;
71
+ }
72
+ }
73
+ if (!microphonePublication) {
74
+ return;
75
+ }
76
+
77
+ if (!microphonePublication.subscribed) {
78
+ microphonePublication.setSubscribed(true);
79
+ }
80
+
81
+ const track = microphonePublication.track;
82
+ if (track && track !== this.#subscribedTrack) {
83
+ this.#subscribedTrack = track;
84
+ if (this.#recognizeTask) {
85
+ this.#recognizeTask.cancel();
86
+ }
87
+
88
+ const audioStream = new AudioStream(track, 16000);
89
+
90
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
91
+ this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {
92
+ let cancelled = false;
93
+ onCancel(() => {
94
+ cancelled = true;
95
+ });
96
+
97
+ const sttStream = this.#stt.stream();
98
+ const vadStream = this.#vad.stream();
99
+
100
+ const audioStreamCo = async () => {
101
+ for await (const ev of audioStream) {
102
+ if (cancelled) return;
103
+ sttStream.pushFrame(ev);
104
+ vadStream.pushFrame(ev);
105
+ }
106
+ };
107
+
108
+ const vadStreamCo = async () => {
109
+ for await (const ev of vadStream) {
110
+ if (cancelled) return;
111
+ switch (ev.type) {
112
+ case VADEventType.START_OF_SPEECH:
113
+ this.#speaking = true;
114
+ this.emit(HumanInputEvent.START_OF_SPEECH, ev);
115
+ break;
116
+ case VADEventType.INFERENCE_DONE:
117
+ this.#speechProbability = ev.probability;
118
+ this.emit(HumanInputEvent.VAD_INFERENCE_DONE, ev);
119
+ break;
120
+ case VADEventType.END_OF_SPEECH:
121
+ this.#speaking = false;
122
+ this.emit(HumanInputEvent.END_OF_SPEECH, ev);
123
+ break;
124
+ }
125
+ }
126
+ };
127
+
128
+ const sttStreamCo = async () => {
129
+ for await (const ev of sttStream) {
130
+ if (cancelled) return;
131
+ if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {
132
+ this.emit(HumanInputEvent.FINAL_TRANSCRIPT, ev);
133
+ } else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {
134
+ this.emit(HumanInputEvent.INTERIM_TRANSCRIPT, ev);
135
+ }
136
+ }
137
+ };
138
+
139
+ await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);
140
+ sttStream.close();
141
+ vadStream.close();
142
+ resolve();
143
+ });
144
+ }
145
+ }
146
+
147
+ get speaking(): boolean {
148
+ return this.#speaking;
149
+ }
150
+
151
+ get speakingProbability(): number {
152
+ return this.#speechProbability;
153
+ }
154
+
155
+ async close() {
156
+ if (this.#closed) {
157
+ throw new Error('HumanInput already closed');
158
+ }
159
+ this.#closed = true;
160
+ this.#room.removeAllListeners();
161
+ this.#speaking = false;
162
+ if (this.#recognizeTask) {
163
+ await gracefullyCancel(this.#recognizeTask);
164
+ }
165
+ }
166
+ }
@@ -0,0 +1,15 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ export {
6
+ type AgentState,
7
+ type BeforeTTSCallback,
8
+ type BeforeLLMCallback,
9
+ type VPAEvent,
10
+ type VPACallbacks,
11
+ type AgentCallContext,
12
+ type AgentTranscriptionOptions,
13
+ type VPAOptions,
14
+ VoicePipelineAgent,
15
+ } from './pipeline_agent.js';