@livekit/agents 0.3.5 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +36 -0
- package/dist/audio.js +17 -30
- package/dist/audio.js.map +1 -1
- package/dist/cli.js +3 -14
- package/dist/cli.js.map +1 -1
- package/dist/http_server.d.ts +1 -1
- package/dist/http_server.js +5 -9
- package/dist/http_server.js.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +14 -2
- package/dist/index.js.map +1 -1
- package/dist/ipc/job_executor.js +3 -5
- package/dist/ipc/job_executor.js.map +1 -1
- package/dist/ipc/job_main.d.ts +1 -1
- package/dist/ipc/proc_job_executor.js +66 -80
- package/dist/ipc/proc_job_executor.js.map +1 -1
- package/dist/ipc/proc_pool.d.ts +3 -3
- package/dist/ipc/proc_pool.d.ts.map +1 -1
- package/dist/ipc/proc_pool.js +38 -20
- package/dist/ipc/proc_pool.js.map +1 -1
- package/dist/job.js +56 -73
- package/dist/job.js.map +1 -1
- package/dist/llm/chat_context.d.ts +66 -0
- package/dist/llm/chat_context.d.ts.map +1 -0
- package/dist/llm/chat_context.js +93 -0
- package/dist/llm/chat_context.js.map +1 -0
- package/dist/llm/function_context.d.ts +19 -1
- package/dist/llm/function_context.d.ts.map +1 -1
- package/dist/llm/function_context.js +54 -18
- package/dist/llm/function_context.js.map +1 -1
- package/dist/llm/function_context.test.d.ts +2 -0
- package/dist/llm/function_context.test.d.ts.map +1 -0
- package/dist/llm/function_context.test.js +218 -0
- package/dist/llm/function_context.test.js.map +1 -0
- package/dist/llm/index.d.ts +3 -2
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +3 -2
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.d.ts +53 -0
- package/dist/llm/llm.d.ts.map +1 -0
- package/dist/llm/llm.js +45 -0
- package/dist/llm/llm.js.map +1 -0
- package/dist/multimodal/agent_playout.d.ts +1 -1
- package/dist/multimodal/agent_playout.js +116 -153
- package/dist/multimodal/agent_playout.js.map +1 -1
- package/dist/multimodal/multimodal_agent.d.ts +4 -3
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
- package/dist/multimodal/multimodal_agent.js +207 -234
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/agent_output.d.ts +30 -0
- package/dist/pipeline/agent_output.d.ts.map +1 -0
- package/dist/pipeline/agent_output.js +155 -0
- package/dist/pipeline/agent_output.js.map +1 -0
- package/dist/pipeline/agent_playout.d.ts +38 -0
- package/dist/pipeline/agent_playout.d.ts.map +1 -0
- package/dist/pipeline/agent_playout.js +142 -0
- package/dist/pipeline/agent_playout.js.map +1 -0
- package/dist/pipeline/human_input.d.ts +28 -0
- package/dist/pipeline/human_input.d.ts.map +1 -0
- package/dist/pipeline/human_input.js +134 -0
- package/dist/pipeline/human_input.js.map +1 -0
- package/dist/pipeline/index.d.ts +2 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +5 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/pipeline_agent.d.ts +134 -0
- package/dist/pipeline/pipeline_agent.d.ts.map +1 -0
- package/dist/pipeline/pipeline_agent.js +661 -0
- package/dist/pipeline/pipeline_agent.js.map +1 -0
- package/dist/pipeline/speech_handle.d.ts +27 -0
- package/dist/pipeline/speech_handle.d.ts.map +1 -0
- package/dist/pipeline/speech_handle.js +102 -0
- package/dist/pipeline/speech_handle.js.map +1 -0
- package/dist/plugin.js +7 -20
- package/dist/plugin.js.map +1 -1
- package/dist/stt/index.d.ts +1 -2
- package/dist/stt/index.d.ts.map +1 -1
- package/dist/stt/index.js +1 -2
- package/dist/stt/index.js.map +1 -1
- package/dist/stt/stt.d.ts +62 -24
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +77 -27
- package/dist/stt/stt.js.map +1 -1
- package/dist/tokenize/basic/basic.d.ts +16 -0
- package/dist/tokenize/basic/basic.d.ts.map +1 -0
- package/dist/tokenize/basic/basic.js +50 -0
- package/dist/tokenize/basic/basic.js.map +1 -0
- package/dist/tokenize/basic/hyphenator.d.ts +17 -0
- package/dist/tokenize/basic/hyphenator.d.ts.map +1 -0
- package/dist/tokenize/basic/hyphenator.js +420 -0
- package/dist/tokenize/basic/hyphenator.js.map +1 -0
- package/dist/tokenize/basic/index.d.ts +2 -0
- package/dist/tokenize/basic/index.d.ts.map +1 -0
- package/dist/tokenize/basic/index.js +5 -0
- package/dist/tokenize/basic/index.js.map +1 -0
- package/dist/tokenize/basic/paragraph.d.ts +5 -0
- package/dist/tokenize/basic/paragraph.d.ts.map +1 -0
- package/dist/tokenize/basic/paragraph.js +38 -0
- package/dist/tokenize/basic/paragraph.js.map +1 -0
- package/dist/tokenize/basic/sentence.d.ts +5 -0
- package/dist/tokenize/basic/sentence.d.ts.map +1 -0
- package/dist/tokenize/basic/sentence.js +60 -0
- package/dist/tokenize/basic/sentence.js.map +1 -0
- package/dist/tokenize/basic/word.d.ts +5 -0
- package/dist/tokenize/basic/word.d.ts.map +1 -0
- package/dist/tokenize/basic/word.js +23 -0
- package/dist/tokenize/basic/word.js.map +1 -0
- package/dist/tokenize/index.d.ts +5 -0
- package/dist/tokenize/index.d.ts.map +1 -0
- package/dist/tokenize/index.js +8 -0
- package/dist/tokenize/index.js.map +1 -0
- package/dist/tokenize/token_stream.d.ts +36 -0
- package/dist/tokenize/token_stream.d.ts.map +1 -0
- package/dist/tokenize/token_stream.js +136 -0
- package/dist/tokenize/token_stream.js.map +1 -0
- package/dist/tokenize/tokenizer.d.ts +55 -0
- package/dist/tokenize/tokenizer.d.ts.map +1 -0
- package/dist/tokenize/tokenizer.js +117 -0
- package/dist/tokenize/tokenizer.js.map +1 -0
- package/dist/transcription.js +78 -89
- package/dist/transcription.js.map +1 -1
- package/dist/tts/index.d.ts +1 -3
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +1 -3
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/tts.d.ts +66 -37
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +79 -74
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.d.ts +21 -6
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +120 -76
- package/dist/utils.js.map +1 -1
- package/dist/vad.d.ts +43 -39
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +51 -4
- package/dist/vad.js.map +1 -1
- package/dist/worker.d.ts +1 -1
- package/dist/worker.js +257 -247
- package/dist/worker.js.map +1 -1
- package/package.json +4 -3
- package/src/index.ts +16 -2
- package/src/ipc/proc_pool.ts +25 -13
- package/src/llm/chat_context.ts +147 -0
- package/src/llm/function_context.test.ts +248 -0
- package/src/llm/function_context.ts +77 -18
- package/src/llm/index.ts +21 -2
- package/src/llm/llm.ts +102 -0
- package/src/multimodal/multimodal_agent.ts +6 -2
- package/src/pipeline/agent_output.ts +185 -0
- package/src/pipeline/agent_playout.ts +187 -0
- package/src/pipeline/human_input.ts +166 -0
- package/src/pipeline/index.ts +15 -0
- package/src/pipeline/pipeline_agent.ts +917 -0
- package/src/pipeline/speech_handle.ts +136 -0
- package/src/stt/index.ts +8 -2
- package/src/stt/stt.ts +98 -31
- package/src/tokenize/basic/basic.ts +73 -0
- package/src/tokenize/basic/hyphenator.ts +436 -0
- package/src/tokenize/basic/index.ts +5 -0
- package/src/tokenize/basic/paragraph.ts +43 -0
- package/src/tokenize/basic/sentence.ts +69 -0
- package/src/tokenize/basic/word.ts +27 -0
- package/src/tokenize/index.ts +16 -0
- package/src/tokenize/token_stream.ts +163 -0
- package/src/tokenize/tokenizer.ts +152 -0
- package/src/tts/index.ts +1 -20
- package/src/tts/tts.ts +110 -57
- package/src/utils.ts +95 -25
- package/src/vad.ts +86 -45
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/stt/stream_adapter.d.ts +0 -19
- package/dist/stt/stream_adapter.d.ts.map +0 -1
- package/dist/stt/stream_adapter.js +0 -96
- package/dist/stt/stream_adapter.js.map +0 -1
- package/dist/tokenize.d.ts +0 -15
- package/dist/tokenize.d.ts.map +0 -1
- package/dist/tokenize.js +0 -12
- package/dist/tokenize.js.map +0 -1
- package/dist/tts/stream_adapter.d.ts +0 -19
- package/dist/tts/stream_adapter.d.ts.map +0 -1
- package/dist/tts/stream_adapter.js +0 -111
- package/dist/tts/stream_adapter.js.map +0 -1
- package/src/stt/stream_adapter.ts +0 -104
- package/src/tokenize.ts +0 -22
- package/src/tts/stream_adapter.ts +0 -93
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type {
|
|
5
|
+
RemoteAudioTrack,
|
|
6
|
+
RemoteParticipant,
|
|
7
|
+
RemoteTrackPublication,
|
|
8
|
+
Room,
|
|
9
|
+
} from '@livekit/rtc-node';
|
|
10
|
+
import { AudioStream, RoomEvent, TrackSource } from '@livekit/rtc-node';
|
|
11
|
+
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
12
|
+
import { EventEmitter } from 'node:events';
|
|
13
|
+
import { log } from '../log.js';
|
|
14
|
+
import type { STT, SpeechEvent } from '../stt/stt.js';
|
|
15
|
+
import { SpeechEventType } from '../stt/stt.js';
|
|
16
|
+
import { CancellablePromise, gracefullyCancel } from '../utils.js';
|
|
17
|
+
import type { VAD, VADEvent } from '../vad.js';
|
|
18
|
+
import { VADEventType } from '../vad.js';
|
|
19
|
+
|
|
20
|
+
export enum HumanInputEvent {
|
|
21
|
+
START_OF_SPEECH,
|
|
22
|
+
VAD_INFERENCE_DONE,
|
|
23
|
+
END_OF_SPEECH,
|
|
24
|
+
FINAL_TRANSCRIPT,
|
|
25
|
+
INTERIM_TRANSCRIPT,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export type HumanInputCallbacks = {
|
|
29
|
+
[HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;
|
|
30
|
+
[HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;
|
|
31
|
+
[HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;
|
|
32
|
+
[HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;
|
|
33
|
+
[HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export class HumanInput extends (EventEmitter as new () => TypedEmitter<HumanInputCallbacks>) {
|
|
37
|
+
#closed = false;
|
|
38
|
+
#room: Room;
|
|
39
|
+
#vad: VAD;
|
|
40
|
+
#stt: STT;
|
|
41
|
+
#participant: RemoteParticipant;
|
|
42
|
+
#subscribedTrack?: RemoteAudioTrack;
|
|
43
|
+
#recognizeTask?: CancellablePromise<void>;
|
|
44
|
+
#speaking = false;
|
|
45
|
+
#speechProbability = 0;
|
|
46
|
+
#logger = log();
|
|
47
|
+
|
|
48
|
+
constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant) {
|
|
49
|
+
super();
|
|
50
|
+
this.#room = room;
|
|
51
|
+
this.#vad = vad;
|
|
52
|
+
this.#stt = stt;
|
|
53
|
+
this.#participant = participant;
|
|
54
|
+
|
|
55
|
+
this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
|
|
56
|
+
this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
|
|
57
|
+
this.#subscribeToMicrophone();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
#subscribeToMicrophone(): void {
|
|
61
|
+
if (!this.#participant) {
|
|
62
|
+
this.#logger.error('Participant is not set');
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
let microphonePublication: RemoteTrackPublication | undefined = undefined;
|
|
67
|
+
for (const publication of this.#participant.trackPublications.values()) {
|
|
68
|
+
if (publication.source === TrackSource.SOURCE_MICROPHONE) {
|
|
69
|
+
microphonePublication = publication;
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (!microphonePublication) {
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (!microphonePublication.subscribed) {
|
|
78
|
+
microphonePublication.setSubscribed(true);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const track = microphonePublication.track;
|
|
82
|
+
if (track && track !== this.#subscribedTrack) {
|
|
83
|
+
this.#subscribedTrack = track;
|
|
84
|
+
if (this.#recognizeTask) {
|
|
85
|
+
this.#recognizeTask.cancel();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const audioStream = new AudioStream(track, 16000);
|
|
89
|
+
|
|
90
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
91
|
+
this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {
|
|
92
|
+
let cancelled = false;
|
|
93
|
+
onCancel(() => {
|
|
94
|
+
cancelled = true;
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
const sttStream = this.#stt.stream();
|
|
98
|
+
const vadStream = this.#vad.stream();
|
|
99
|
+
|
|
100
|
+
const audioStreamCo = async () => {
|
|
101
|
+
for await (const ev of audioStream) {
|
|
102
|
+
if (cancelled) return;
|
|
103
|
+
sttStream.pushFrame(ev);
|
|
104
|
+
vadStream.pushFrame(ev);
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
const vadStreamCo = async () => {
|
|
109
|
+
for await (const ev of vadStream) {
|
|
110
|
+
if (cancelled) return;
|
|
111
|
+
switch (ev.type) {
|
|
112
|
+
case VADEventType.START_OF_SPEECH:
|
|
113
|
+
this.#speaking = true;
|
|
114
|
+
this.emit(HumanInputEvent.START_OF_SPEECH, ev);
|
|
115
|
+
break;
|
|
116
|
+
case VADEventType.INFERENCE_DONE:
|
|
117
|
+
this.#speechProbability = ev.probability;
|
|
118
|
+
this.emit(HumanInputEvent.VAD_INFERENCE_DONE, ev);
|
|
119
|
+
break;
|
|
120
|
+
case VADEventType.END_OF_SPEECH:
|
|
121
|
+
this.#speaking = false;
|
|
122
|
+
this.emit(HumanInputEvent.END_OF_SPEECH, ev);
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const sttStreamCo = async () => {
|
|
129
|
+
for await (const ev of sttStream) {
|
|
130
|
+
if (cancelled) return;
|
|
131
|
+
if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {
|
|
132
|
+
this.emit(HumanInputEvent.FINAL_TRANSCRIPT, ev);
|
|
133
|
+
} else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {
|
|
134
|
+
this.emit(HumanInputEvent.INTERIM_TRANSCRIPT, ev);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);
|
|
140
|
+
sttStream.close();
|
|
141
|
+
vadStream.close();
|
|
142
|
+
resolve();
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
get speaking(): boolean {
|
|
148
|
+
return this.#speaking;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
get speakingProbability(): number {
|
|
152
|
+
return this.#speechProbability;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async close() {
|
|
156
|
+
if (this.#closed) {
|
|
157
|
+
throw new Error('HumanInput already closed');
|
|
158
|
+
}
|
|
159
|
+
this.#closed = true;
|
|
160
|
+
this.#room.removeAllListeners();
|
|
161
|
+
this.#speaking = false;
|
|
162
|
+
if (this.#recognizeTask) {
|
|
163
|
+
await gracefullyCancel(this.#recognizeTask);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
export {
|
|
6
|
+
type AgentState,
|
|
7
|
+
type BeforeTTSCallback,
|
|
8
|
+
type BeforeLLMCallback,
|
|
9
|
+
type VPAEvent,
|
|
10
|
+
type VPACallbacks,
|
|
11
|
+
type AgentCallContext,
|
|
12
|
+
type AgentTranscriptionOptions,
|
|
13
|
+
type VPAOptions,
|
|
14
|
+
VoicePipelineAgent,
|
|
15
|
+
} from './pipeline_agent.js';
|