@livekit/agents 0.4.6 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/audio.cjs +77 -0
- package/dist/audio.cjs.map +1 -0
- package/dist/audio.js +48 -37
- package/dist/audio.js.map +1 -1
- package/dist/cli.cjs +131 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.js +96 -122
- package/dist/cli.js.map +1 -1
- package/dist/generator.cjs +36 -0
- package/dist/generator.cjs.map +1 -0
- package/dist/generator.js +8 -22
- package/dist/generator.js.map +1 -1
- package/dist/http_server.cjs +72 -0
- package/dist/http_server.cjs.map +1 -0
- package/dist/http_server.d.ts +1 -1
- package/dist/http_server.js +44 -47
- package/dist/http_server.js.map +1 -1
- package/dist/index.cjs +78 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +26 -28
- package/dist/index.js.map +1 -1
- package/dist/ipc/job_executor.cjs +33 -0
- package/dist/ipc/job_executor.cjs.map +1 -0
- package/dist/ipc/job_executor.js +7 -4
- package/dist/ipc/job_executor.js.map +1 -1
- package/dist/ipc/job_main.cjs +147 -0
- package/dist/ipc/job_main.cjs.map +1 -0
- package/dist/ipc/job_main.d.ts +1 -1
- package/dist/ipc/job_main.js +103 -103
- package/dist/ipc/job_main.js.map +1 -1
- package/dist/ipc/message.cjs +17 -0
- package/dist/ipc/message.cjs.map +1 -0
- package/dist/ipc/message.js +0 -1
- package/dist/ipc/message.js.map +1 -1
- package/dist/ipc/proc_job_executor.cjs +174 -0
- package/dist/ipc/proc_job_executor.cjs.map +1 -0
- package/dist/ipc/proc_job_executor.js +130 -126
- package/dist/ipc/proc_job_executor.js.map +1 -1
- package/dist/ipc/proc_pool.cjs +126 -0
- package/dist/ipc/proc_pool.cjs.map +1 -0
- package/dist/ipc/proc_pool.js +93 -96
- package/dist/ipc/proc_pool.js.map +1 -1
- package/dist/job.cjs +230 -0
- package/dist/job.cjs.map +1 -0
- package/dist/job.d.ts +6 -1
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +195 -198
- package/dist/job.js.map +1 -1
- package/dist/llm/chat_context.cjs +131 -0
- package/dist/llm/chat_context.cjs.map +1 -0
- package/dist/llm/chat_context.js +98 -86
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/function_context.cjs +103 -0
- package/dist/llm/function_context.cjs.map +1 -0
- package/dist/llm/function_context.js +72 -81
- package/dist/llm/function_context.js.map +1 -1
- package/dist/llm/function_context.test.cjs +218 -0
- package/dist/llm/function_context.test.cjs.map +1 -0
- package/dist/llm/function_context.test.js +209 -210
- package/dist/llm/function_context.test.js.map +1 -1
- package/dist/llm/index.cjs +43 -0
- package/dist/llm/index.cjs.map +1 -0
- package/dist/llm/index.js +22 -6
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +76 -0
- package/dist/llm/llm.cjs.map +1 -0
- package/dist/llm/llm.js +48 -42
- package/dist/llm/llm.js.map +1 -1
- package/dist/log.cjs +57 -0
- package/dist/log.cjs.map +1 -0
- package/dist/log.js +27 -26
- package/dist/log.js.map +1 -1
- package/dist/multimodal/agent_playout.cjs +228 -0
- package/dist/multimodal/agent_playout.cjs.map +1 -0
- package/dist/multimodal/agent_playout.d.ts +1 -1
- package/dist/multimodal/agent_playout.js +193 -180
- package/dist/multimodal/agent_playout.js.map +1 -1
- package/dist/multimodal/index.cjs +25 -0
- package/dist/multimodal/index.cjs.map +1 -0
- package/dist/multimodal/index.js +2 -5
- package/dist/multimodal/index.js.map +1 -1
- package/dist/multimodal/multimodal_agent.cjs +404 -0
- package/dist/multimodal/multimodal_agent.cjs.map +1 -0
- package/dist/multimodal/multimodal_agent.d.ts +1 -1
- package/dist/multimodal/multimodal_agent.js +351 -330
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/agent_output.cjs +172 -0
- package/dist/pipeline/agent_output.cjs.map +1 -0
- package/dist/pipeline/agent_output.js +136 -138
- package/dist/pipeline/agent_output.js.map +1 -1
- package/dist/pipeline/agent_playout.cjs +169 -0
- package/dist/pipeline/agent_playout.cjs.map +1 -0
- package/dist/pipeline/agent_playout.js +126 -136
- package/dist/pipeline/agent_playout.js.map +1 -1
- package/dist/pipeline/human_input.cjs +158 -0
- package/dist/pipeline/human_input.cjs.map +1 -0
- package/dist/pipeline/human_input.js +124 -125
- package/dist/pipeline/human_input.js.map +1 -1
- package/dist/pipeline/index.cjs +31 -0
- package/dist/pipeline/index.cjs.map +1 -0
- package/dist/pipeline/index.js +8 -4
- package/dist/pipeline/index.js.map +1 -1
- package/dist/pipeline/pipeline_agent.cjs +642 -0
- package/dist/pipeline/pipeline_agent.cjs.map +1 -0
- package/dist/pipeline/pipeline_agent.js +595 -651
- package/dist/pipeline/pipeline_agent.js.map +1 -1
- package/dist/pipeline/speech_handle.cjs +128 -0
- package/dist/pipeline/speech_handle.cjs.map +1 -0
- package/dist/pipeline/speech_handle.js +102 -100
- package/dist/pipeline/speech_handle.js.map +1 -1
- package/dist/plugin.cjs +46 -0
- package/dist/plugin.cjs.map +1 -0
- package/dist/plugin.js +20 -20
- package/dist/plugin.js.map +1 -1
- package/dist/stt/index.cjs +38 -0
- package/dist/stt/index.cjs.map +1 -0
- package/dist/stt/index.js +13 -5
- package/dist/stt/index.js.map +1 -1
- package/dist/stt/stream_adapter.cjs +87 -0
- package/dist/stt/stream_adapter.cjs.map +1 -0
- package/dist/stt/stream_adapter.js +58 -55
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/stt/stt.cjs +98 -0
- package/dist/stt/stt.cjs.map +1 -0
- package/dist/stt/stt.js +63 -98
- package/dist/stt/stt.js.map +1 -1
- package/dist/tokenize/basic/basic.cjs +98 -0
- package/dist/tokenize/basic/basic.cjs.map +1 -0
- package/dist/tokenize/basic/basic.d.ts +1 -1
- package/dist/tokenize/basic/basic.d.ts.map +1 -1
- package/dist/tokenize/basic/basic.js +56 -45
- package/dist/tokenize/basic/basic.js.map +1 -1
- package/dist/tokenize/basic/hyphenator.cjs +425 -0
- package/dist/tokenize/basic/hyphenator.cjs.map +1 -0
- package/dist/tokenize/basic/hyphenator.js +66 -82
- package/dist/tokenize/basic/hyphenator.js.map +1 -1
- package/dist/tokenize/basic/index.cjs +35 -0
- package/dist/tokenize/basic/index.cjs.map +1 -0
- package/dist/tokenize/basic/index.js +7 -4
- package/dist/tokenize/basic/index.js.map +1 -1
- package/dist/tokenize/basic/paragraph.cjs +57 -0
- package/dist/tokenize/basic/paragraph.cjs.map +1 -0
- package/dist/tokenize/basic/paragraph.js +30 -35
- package/dist/tokenize/basic/paragraph.js.map +1 -1
- package/dist/tokenize/basic/sentence.cjs +89 -0
- package/dist/tokenize/basic/sentence.cjs.map +1 -0
- package/dist/tokenize/basic/sentence.d.ts.map +1 -1
- package/dist/tokenize/basic/sentence.js +62 -57
- package/dist/tokenize/basic/sentence.js.map +1 -1
- package/dist/tokenize/basic/word.cjs +44 -0
- package/dist/tokenize/basic/word.cjs.map +1 -0
- package/dist/tokenize/basic/word.js +17 -20
- package/dist/tokenize/basic/word.js.map +1 -1
- package/dist/tokenize/index.cjs +55 -0
- package/dist/tokenize/index.cjs.map +1 -0
- package/dist/tokenize/index.js +18 -7
- package/dist/tokenize/index.js.map +1 -1
- package/dist/tokenize/token_stream.cjs +164 -0
- package/dist/tokenize/token_stream.cjs.map +1 -0
- package/dist/tokenize/token_stream.js +133 -139
- package/dist/tokenize/token_stream.js.map +1 -1
- package/dist/tokenize/tokenizer.cjs +184 -0
- package/dist/tokenize/tokenizer.cjs.map +1 -0
- package/dist/tokenize/tokenizer.js +138 -99
- package/dist/tokenize/tokenizer.js.map +1 -1
- package/dist/tokenize/tokenizer.test.cjs +220 -0
- package/dist/tokenize/tokenizer.test.cjs.map +1 -0
- package/dist/tokenize/tokenizer.test.d.ts +2 -0
- package/dist/tokenize/tokenizer.test.d.ts.map +1 -0
- package/dist/tokenize/tokenizer.test.js +219 -0
- package/dist/tokenize/tokenizer.test.js.map +1 -0
- package/dist/transcription.cjs +131 -0
- package/dist/transcription.cjs.map +1 -0
- package/dist/transcription.js +99 -96
- package/dist/transcription.js.map +1 -1
- package/dist/tts/index.cjs +38 -0
- package/dist/tts/index.cjs.map +1 -0
- package/dist/tts/index.js +13 -5
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +78 -0
- package/dist/tts/stream_adapter.cjs.map +1 -0
- package/dist/tts/stream_adapter.js +50 -47
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +127 -0
- package/dist/tts/tts.cjs.map +1 -0
- package/dist/tts/tts.js +90 -120
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +284 -0
- package/dist/utils.cjs.map +1 -0
- package/dist/utils.js +242 -247
- package/dist/utils.js.map +1 -1
- package/dist/vad.cjs +92 -0
- package/dist/vad.cjs.map +1 -0
- package/dist/vad.js +57 -52
- package/dist/vad.js.map +1 -1
- package/dist/version.cjs +29 -0
- package/dist/version.cjs.map +1 -0
- package/dist/version.js +4 -4
- package/dist/version.js.map +1 -1
- package/dist/worker.cjs +577 -0
- package/dist/worker.cjs.map +1 -0
- package/dist/worker.d.ts +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +512 -484
- package/dist/worker.js.map +1 -1
- package/package.json +18 -8
- package/src/ipc/job_main.ts +66 -64
- package/src/job.ts +3 -2
- package/src/pipeline/pipeline_agent.ts +23 -23
- package/src/tokenize/basic/basic.ts +1 -1
- package/src/tokenize/basic/sentence.ts +14 -8
- package/src/tokenize/tokenizer.test.ts +255 -0
- package/src/worker.ts +1 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
var agent_playout_exports = {};
|
|
30
|
+
__export(agent_playout_exports, {
|
|
31
|
+
AgentPlayout: () => AgentPlayout,
|
|
32
|
+
AgentPlayoutEvent: () => AgentPlayoutEvent,
|
|
33
|
+
PlayoutHandle: () => PlayoutHandle
|
|
34
|
+
});
|
|
35
|
+
module.exports = __toCommonJS(agent_playout_exports);
|
|
36
|
+
var import_node_events = __toESM(require("node:events"), 1);
|
|
37
|
+
var import_log = require("../log.cjs");
|
|
38
|
+
var import_utils = require("../utils.cjs");
|
|
39
|
+
var import_agent_output = require("./agent_output.cjs");
|
|
40
|
+
var AgentPlayoutEvent = /* @__PURE__ */ ((AgentPlayoutEvent2) => {
|
|
41
|
+
AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STARTED"] = 0] = "PLAYOUT_STARTED";
|
|
42
|
+
AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STOPPED"] = 1] = "PLAYOUT_STOPPED";
|
|
43
|
+
return AgentPlayoutEvent2;
|
|
44
|
+
})(AgentPlayoutEvent || {});
|
|
45
|
+
class PlayoutHandle {
|
|
46
|
+
#speechId;
|
|
47
|
+
#audioSource;
|
|
48
|
+
playoutSource;
|
|
49
|
+
totalPlayedTime;
|
|
50
|
+
#interrupted = false;
|
|
51
|
+
pushedDuration = 0;
|
|
52
|
+
intFut = new import_utils.Future();
|
|
53
|
+
doneFut = new import_utils.Future();
|
|
54
|
+
constructor(speechId, audioSource, playoutSource) {
|
|
55
|
+
this.#speechId = speechId;
|
|
56
|
+
this.#audioSource = audioSource;
|
|
57
|
+
this.playoutSource = playoutSource;
|
|
58
|
+
}
|
|
59
|
+
get speechId() {
|
|
60
|
+
return this.#speechId;
|
|
61
|
+
}
|
|
62
|
+
get interrupted() {
|
|
63
|
+
return this.#interrupted;
|
|
64
|
+
}
|
|
65
|
+
get timePlayed() {
|
|
66
|
+
return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;
|
|
67
|
+
}
|
|
68
|
+
get done() {
|
|
69
|
+
return this.doneFut.done || this.#interrupted;
|
|
70
|
+
}
|
|
71
|
+
interrupt() {
|
|
72
|
+
if (this.done) {
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
this.intFut.resolve();
|
|
76
|
+
this.#interrupted = true;
|
|
77
|
+
}
|
|
78
|
+
join() {
|
|
79
|
+
return this.doneFut;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
class AgentPlayout extends import_node_events.default {
|
|
83
|
+
#closed = false;
|
|
84
|
+
#audioSource;
|
|
85
|
+
#targetVolume = 1;
|
|
86
|
+
#playoutTask;
|
|
87
|
+
#logger = (0, import_log.log)();
|
|
88
|
+
constructor(audioSource) {
|
|
89
|
+
super();
|
|
90
|
+
this.#audioSource = audioSource;
|
|
91
|
+
}
|
|
92
|
+
get targetVolume() {
|
|
93
|
+
return this.#targetVolume;
|
|
94
|
+
}
|
|
95
|
+
set targetVolume(vol) {
|
|
96
|
+
this.#targetVolume = vol;
|
|
97
|
+
}
|
|
98
|
+
play(speechId, playoutSource) {
|
|
99
|
+
if (this.#closed) {
|
|
100
|
+
throw new Error("source closed");
|
|
101
|
+
}
|
|
102
|
+
const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);
|
|
103
|
+
this.#playoutTask = this.#playout(handle, this.#playoutTask);
|
|
104
|
+
return handle;
|
|
105
|
+
}
|
|
106
|
+
#playout(handle, oldTask) {
|
|
107
|
+
return new import_utils.CancellablePromise(async (resolve, _, onCancel) => {
|
|
108
|
+
const cancel = () => {
|
|
109
|
+
captureTask.cancel();
|
|
110
|
+
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
|
|
111
|
+
if (handle.interrupted || captureTask.error) {
|
|
112
|
+
this.#audioSource.clearQueue();
|
|
113
|
+
}
|
|
114
|
+
if (!firstFrame) {
|
|
115
|
+
this.emit(1 /* PLAYOUT_STOPPED */, handle.interrupted);
|
|
116
|
+
}
|
|
117
|
+
handle.doneFut.resolve();
|
|
118
|
+
this.#logger.child({ speechId: handle.speechId, interrupted: handle.interrupted }).debug("playout finished");
|
|
119
|
+
};
|
|
120
|
+
onCancel(() => {
|
|
121
|
+
cancel();
|
|
122
|
+
});
|
|
123
|
+
if (oldTask) {
|
|
124
|
+
await (0, import_utils.gracefullyCancel)(oldTask);
|
|
125
|
+
}
|
|
126
|
+
if (this.#audioSource.queuedDuration > 0) {
|
|
127
|
+
this.#logger.child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration }).warn("new playout while the source is still playing");
|
|
128
|
+
}
|
|
129
|
+
let firstFrame = true;
|
|
130
|
+
const captureTask = new import_utils.CancellablePromise(async (resolve2, _2, onCancel2) => {
|
|
131
|
+
let cancelled = false;
|
|
132
|
+
onCancel2(() => {
|
|
133
|
+
cancelled = true;
|
|
134
|
+
});
|
|
135
|
+
for await (const frame of handle.playoutSource) {
|
|
136
|
+
if (cancelled || frame === import_agent_output.SynthesisHandle.FLUSH_SENTINEL) {
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
if (firstFrame) {
|
|
140
|
+
this.#logger.child({ speechId: handle.speechId }).debug("started playing the first time");
|
|
141
|
+
this.emit(0 /* PLAYOUT_STARTED */);
|
|
142
|
+
firstFrame = false;
|
|
143
|
+
}
|
|
144
|
+
handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
|
|
145
|
+
await this.#audioSource.captureFrame(frame);
|
|
146
|
+
await this.#audioSource.waitForPlayout();
|
|
147
|
+
}
|
|
148
|
+
resolve2();
|
|
149
|
+
});
|
|
150
|
+
try {
|
|
151
|
+
await Promise.any([captureTask, handle.intFut.await]);
|
|
152
|
+
} finally {
|
|
153
|
+
cancel();
|
|
154
|
+
resolve();
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
async close() {
|
|
159
|
+
this.#closed = true;
|
|
160
|
+
await this.#playoutTask;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
164
|
+
0 && (module.exports = {
|
|
165
|
+
AgentPlayout,
|
|
166
|
+
AgentPlayoutEvent,
|
|
167
|
+
PlayoutHandle
|
|
168
|
+
});
|
|
169
|
+
//# sourceMappingURL=agent_playout.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAAyB;AACzB,iBAAoB;AACpB,mBAA6D;AAC7D,0BAAgC;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,oBAAO;AAAA,EACpB,UAAU,IAAI,oBAAO;AAAA,EAErB,YACE,UACA,aACA,eACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,mBAAAC,QAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,aAAa;AAE3E,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,kBAAM,+BAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,gCAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,oCAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","EventEmitter","resolve","_","onCancel"]}
|
|
@@ -1,143 +1,133 @@
|
|
|
1
|
-
import EventEmitter from
|
|
2
|
-
import { log } from
|
|
3
|
-
import { CancellablePromise, Future, gracefullyCancel } from
|
|
4
|
-
import { SynthesisHandle } from
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
})(AgentPlayoutEvent ||
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
}
|
|
40
|
-
this.intFut.resolve();
|
|
41
|
-
this.#interrupted = true;
|
|
42
|
-
}
|
|
43
|
-
join() {
|
|
44
|
-
return this.doneFut;
|
|
1
|
+
import EventEmitter from "node:events";
|
|
2
|
+
import { log } from "../log.js";
|
|
3
|
+
import { CancellablePromise, Future, gracefullyCancel } from "../utils.js";
|
|
4
|
+
import { SynthesisHandle } from "./agent_output.js";
|
|
5
|
+
var AgentPlayoutEvent = /* @__PURE__ */ ((AgentPlayoutEvent2) => {
|
|
6
|
+
AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STARTED"] = 0] = "PLAYOUT_STARTED";
|
|
7
|
+
AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STOPPED"] = 1] = "PLAYOUT_STOPPED";
|
|
8
|
+
return AgentPlayoutEvent2;
|
|
9
|
+
})(AgentPlayoutEvent || {});
|
|
10
|
+
class PlayoutHandle {
|
|
11
|
+
#speechId;
|
|
12
|
+
#audioSource;
|
|
13
|
+
playoutSource;
|
|
14
|
+
totalPlayedTime;
|
|
15
|
+
#interrupted = false;
|
|
16
|
+
pushedDuration = 0;
|
|
17
|
+
intFut = new Future();
|
|
18
|
+
doneFut = new Future();
|
|
19
|
+
constructor(speechId, audioSource, playoutSource) {
|
|
20
|
+
this.#speechId = speechId;
|
|
21
|
+
this.#audioSource = audioSource;
|
|
22
|
+
this.playoutSource = playoutSource;
|
|
23
|
+
}
|
|
24
|
+
get speechId() {
|
|
25
|
+
return this.#speechId;
|
|
26
|
+
}
|
|
27
|
+
get interrupted() {
|
|
28
|
+
return this.#interrupted;
|
|
29
|
+
}
|
|
30
|
+
get timePlayed() {
|
|
31
|
+
return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;
|
|
32
|
+
}
|
|
33
|
+
get done() {
|
|
34
|
+
return this.doneFut.done || this.#interrupted;
|
|
35
|
+
}
|
|
36
|
+
interrupt() {
|
|
37
|
+
if (this.done) {
|
|
38
|
+
return;
|
|
45
39
|
}
|
|
40
|
+
this.intFut.resolve();
|
|
41
|
+
this.#interrupted = true;
|
|
42
|
+
}
|
|
43
|
+
join() {
|
|
44
|
+
return this.doneFut;
|
|
45
|
+
}
|
|
46
46
|
}
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
47
|
+
class AgentPlayout extends EventEmitter {
|
|
48
|
+
#closed = false;
|
|
49
|
+
#audioSource;
|
|
50
|
+
#targetVolume = 1;
|
|
51
|
+
#playoutTask;
|
|
52
|
+
#logger = log();
|
|
53
|
+
constructor(audioSource) {
|
|
54
|
+
super();
|
|
55
|
+
this.#audioSource = audioSource;
|
|
56
|
+
}
|
|
57
|
+
get targetVolume() {
|
|
58
|
+
return this.#targetVolume;
|
|
59
|
+
}
|
|
60
|
+
set targetVolume(vol) {
|
|
61
|
+
this.#targetVolume = vol;
|
|
62
|
+
}
|
|
63
|
+
play(speechId, playoutSource) {
|
|
64
|
+
if (this.#closed) {
|
|
65
|
+
throw new Error("source closed");
|
|
56
66
|
}
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
67
|
+
const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);
|
|
68
|
+
this.#playoutTask = this.#playout(handle, this.#playoutTask);
|
|
69
|
+
return handle;
|
|
70
|
+
}
|
|
71
|
+
#playout(handle, oldTask) {
|
|
72
|
+
return new CancellablePromise(async (resolve, _, onCancel) => {
|
|
73
|
+
const cancel = () => {
|
|
74
|
+
captureTask.cancel();
|
|
75
|
+
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
|
|
76
|
+
if (handle.interrupted || captureTask.error) {
|
|
77
|
+
this.#audioSource.clearQueue();
|
|
66
78
|
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
onCancel(() => {
|
|
88
|
-
cancel();
|
|
89
|
-
});
|
|
90
|
-
if (oldTask) {
|
|
91
|
-
await gracefullyCancel(oldTask);
|
|
92
|
-
}
|
|
93
|
-
if (this.#audioSource.queuedDuration > 0) {
|
|
94
|
-
// this should not happen, but log it just in case
|
|
95
|
-
this.#logger
|
|
96
|
-
.child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })
|
|
97
|
-
.warn('new playout while the source is still playing');
|
|
98
|
-
}
|
|
99
|
-
let firstFrame = true;
|
|
100
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
101
|
-
const captureTask = new CancellablePromise(async (resolve, _, onCancel) => {
|
|
102
|
-
let cancelled = false;
|
|
103
|
-
onCancel(() => {
|
|
104
|
-
cancelled = true;
|
|
105
|
-
});
|
|
106
|
-
for await (const frame of handle.playoutSource) {
|
|
107
|
-
if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {
|
|
108
|
-
break;
|
|
109
|
-
}
|
|
110
|
-
if (firstFrame) {
|
|
111
|
-
this.#logger
|
|
112
|
-
.child({ speechId: handle.speechId })
|
|
113
|
-
.debug('started playing the first time');
|
|
114
|
-
this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);
|
|
115
|
-
firstFrame = false;
|
|
116
|
-
}
|
|
117
|
-
handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;
|
|
118
|
-
await this.#audioSource.captureFrame(frame);
|
|
119
|
-
await this.#audioSource.waitForPlayout();
|
|
120
|
-
}
|
|
121
|
-
// XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,
|
|
122
|
-
// but for some reason too many TTS frames can gunk up the buffer and lead to
|
|
123
|
-
// FFI errors. this works 🤷♀️
|
|
124
|
-
// if (this.#audioSource.queuedDuration > 0) {
|
|
125
|
-
// await this.#audioSource.waitForPlayout();
|
|
126
|
-
// }
|
|
127
|
-
resolve();
|
|
128
|
-
});
|
|
129
|
-
try {
|
|
130
|
-
await Promise.any([captureTask, handle.intFut.await]);
|
|
131
|
-
}
|
|
132
|
-
finally {
|
|
133
|
-
cancel();
|
|
134
|
-
resolve();
|
|
135
|
-
}
|
|
79
|
+
if (!firstFrame) {
|
|
80
|
+
this.emit(1 /* PLAYOUT_STOPPED */, handle.interrupted);
|
|
81
|
+
}
|
|
82
|
+
handle.doneFut.resolve();
|
|
83
|
+
this.#logger.child({ speechId: handle.speechId, interrupted: handle.interrupted }).debug("playout finished");
|
|
84
|
+
};
|
|
85
|
+
onCancel(() => {
|
|
86
|
+
cancel();
|
|
87
|
+
});
|
|
88
|
+
if (oldTask) {
|
|
89
|
+
await gracefullyCancel(oldTask);
|
|
90
|
+
}
|
|
91
|
+
if (this.#audioSource.queuedDuration > 0) {
|
|
92
|
+
this.#logger.child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration }).warn("new playout while the source is still playing");
|
|
93
|
+
}
|
|
94
|
+
let firstFrame = true;
|
|
95
|
+
const captureTask = new CancellablePromise(async (resolve2, _2, onCancel2) => {
|
|
96
|
+
let cancelled = false;
|
|
97
|
+
onCancel2(() => {
|
|
98
|
+
cancelled = true;
|
|
136
99
|
});
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
100
|
+
for await (const frame of handle.playoutSource) {
|
|
101
|
+
if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
if (firstFrame) {
|
|
105
|
+
this.#logger.child({ speechId: handle.speechId }).debug("started playing the first time");
|
|
106
|
+
this.emit(0 /* PLAYOUT_STARTED */);
|
|
107
|
+
firstFrame = false;
|
|
108
|
+
}
|
|
109
|
+
handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
|
|
110
|
+
await this.#audioSource.captureFrame(frame);
|
|
111
|
+
await this.#audioSource.waitForPlayout();
|
|
112
|
+
}
|
|
113
|
+
resolve2();
|
|
114
|
+
});
|
|
115
|
+
try {
|
|
116
|
+
await Promise.any([captureTask, handle.intFut.await]);
|
|
117
|
+
} finally {
|
|
118
|
+
cancel();
|
|
119
|
+
resolve();
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
async close() {
|
|
124
|
+
this.#closed = true;
|
|
125
|
+
await this.#playoutTask;
|
|
126
|
+
}
|
|
142
127
|
}
|
|
128
|
+
export {
|
|
129
|
+
AgentPlayout,
|
|
130
|
+
AgentPlayoutEvent,
|
|
131
|
+
PlayoutHandle
|
|
132
|
+
};
|
|
143
133
|
//# sourceMappingURL=agent_playout.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":"AAKA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AACpB,SAAS,oBAAoB,QAAQ,wBAAwB;AAC7D,SAAS,uBAAuB;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI,OAAO;AAAA,EAErB,YACE,UACA,aACA,eACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,aAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,aAAa;AAE3E,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,mBAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,gBAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","resolve","_","onCancel"]}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var human_input_exports = {};
|
|
20
|
+
__export(human_input_exports, {
|
|
21
|
+
HumanInput: () => HumanInput,
|
|
22
|
+
HumanInputEvent: () => HumanInputEvent
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(human_input_exports);
|
|
25
|
+
var import_rtc_node = require("@livekit/rtc-node");
|
|
26
|
+
var import_node_events = require("node:events");
|
|
27
|
+
var import_log = require("../log.cjs");
|
|
28
|
+
var import_stt = require("../stt/stt.cjs");
|
|
29
|
+
var import_utils = require("../utils.cjs");
|
|
30
|
+
var import_vad = require("../vad.cjs");
|
|
31
|
+
var HumanInputEvent = /* @__PURE__ */ ((HumanInputEvent2) => {
|
|
32
|
+
HumanInputEvent2[HumanInputEvent2["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
|
|
33
|
+
HumanInputEvent2[HumanInputEvent2["VAD_INFERENCE_DONE"] = 1] = "VAD_INFERENCE_DONE";
|
|
34
|
+
HumanInputEvent2[HumanInputEvent2["END_OF_SPEECH"] = 2] = "END_OF_SPEECH";
|
|
35
|
+
HumanInputEvent2[HumanInputEvent2["FINAL_TRANSCRIPT"] = 3] = "FINAL_TRANSCRIPT";
|
|
36
|
+
HumanInputEvent2[HumanInputEvent2["INTERIM_TRANSCRIPT"] = 4] = "INTERIM_TRANSCRIPT";
|
|
37
|
+
return HumanInputEvent2;
|
|
38
|
+
})(HumanInputEvent || {});
|
|
39
|
+
class HumanInput extends import_node_events.EventEmitter {
|
|
40
|
+
#closed = false;
|
|
41
|
+
#room;
|
|
42
|
+
#vad;
|
|
43
|
+
#stt;
|
|
44
|
+
#participant;
|
|
45
|
+
#subscribedTrack;
|
|
46
|
+
#recognizeTask;
|
|
47
|
+
#speaking = false;
|
|
48
|
+
#speechProbability = 0;
|
|
49
|
+
#logger = (0, import_log.log)();
|
|
50
|
+
constructor(room, vad, stt, participant) {
|
|
51
|
+
super();
|
|
52
|
+
this.#room = room;
|
|
53
|
+
this.#vad = vad;
|
|
54
|
+
this.#stt = stt;
|
|
55
|
+
this.#participant = participant;
|
|
56
|
+
this.#room.on(import_rtc_node.RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
|
|
57
|
+
this.#room.on(import_rtc_node.RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
|
|
58
|
+
this.#subscribeToMicrophone();
|
|
59
|
+
}
|
|
60
|
+
#subscribeToMicrophone() {
|
|
61
|
+
if (!this.#participant) {
|
|
62
|
+
this.#logger.error("Participant is not set");
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
let microphonePublication = void 0;
|
|
66
|
+
for (const publication of this.#participant.trackPublications.values()) {
|
|
67
|
+
if (publication.source === import_rtc_node.TrackSource.SOURCE_MICROPHONE) {
|
|
68
|
+
microphonePublication = publication;
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if (!microphonePublication) {
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
if (!microphonePublication.subscribed) {
|
|
76
|
+
microphonePublication.setSubscribed(true);
|
|
77
|
+
}
|
|
78
|
+
const track = microphonePublication.track;
|
|
79
|
+
if (track && track !== this.#subscribedTrack) {
|
|
80
|
+
this.#subscribedTrack = track;
|
|
81
|
+
if (this.#recognizeTask) {
|
|
82
|
+
this.#recognizeTask.cancel();
|
|
83
|
+
}
|
|
84
|
+
const audioStream = new import_rtc_node.AudioStream(track, 16e3);
|
|
85
|
+
this.#recognizeTask = new import_utils.CancellablePromise(async (resolve, _, onCancel) => {
|
|
86
|
+
let cancelled = false;
|
|
87
|
+
onCancel(() => {
|
|
88
|
+
cancelled = true;
|
|
89
|
+
});
|
|
90
|
+
const sttStream = this.#stt.stream();
|
|
91
|
+
const vadStream = this.#vad.stream();
|
|
92
|
+
const audioStreamCo = async () => {
|
|
93
|
+
for await (const ev of audioStream) {
|
|
94
|
+
if (cancelled) return;
|
|
95
|
+
sttStream.pushFrame(ev);
|
|
96
|
+
vadStream.pushFrame(ev);
|
|
97
|
+
}
|
|
98
|
+
};
|
|
99
|
+
const vadStreamCo = async () => {
|
|
100
|
+
for await (const ev of vadStream) {
|
|
101
|
+
if (cancelled) return;
|
|
102
|
+
switch (ev.type) {
|
|
103
|
+
case import_vad.VADEventType.START_OF_SPEECH:
|
|
104
|
+
this.#speaking = true;
|
|
105
|
+
this.emit(0 /* START_OF_SPEECH */, ev);
|
|
106
|
+
break;
|
|
107
|
+
case import_vad.VADEventType.INFERENCE_DONE:
|
|
108
|
+
this.#speechProbability = ev.probability;
|
|
109
|
+
this.emit(1 /* VAD_INFERENCE_DONE */, ev);
|
|
110
|
+
break;
|
|
111
|
+
case import_vad.VADEventType.END_OF_SPEECH:
|
|
112
|
+
this.#speaking = false;
|
|
113
|
+
this.emit(2 /* END_OF_SPEECH */, ev);
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
const sttStreamCo = async () => {
|
|
119
|
+
for await (const ev of sttStream) {
|
|
120
|
+
if (cancelled) return;
|
|
121
|
+
if (ev.type === import_stt.SpeechEventType.FINAL_TRANSCRIPT) {
|
|
122
|
+
this.emit(3 /* FINAL_TRANSCRIPT */, ev);
|
|
123
|
+
} else if (ev.type == import_stt.SpeechEventType.INTERIM_TRANSCRIPT) {
|
|
124
|
+
this.emit(4 /* INTERIM_TRANSCRIPT */, ev);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
};
|
|
128
|
+
await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);
|
|
129
|
+
sttStream.close();
|
|
130
|
+
vadStream.close();
|
|
131
|
+
resolve();
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
get speaking() {
|
|
136
|
+
return this.#speaking;
|
|
137
|
+
}
|
|
138
|
+
get speakingProbability() {
|
|
139
|
+
return this.#speechProbability;
|
|
140
|
+
}
|
|
141
|
+
async close() {
|
|
142
|
+
if (this.#closed) {
|
|
143
|
+
throw new Error("HumanInput already closed");
|
|
144
|
+
}
|
|
145
|
+
this.#closed = true;
|
|
146
|
+
this.#room.removeAllListeners();
|
|
147
|
+
this.#speaking = false;
|
|
148
|
+
if (this.#recognizeTask) {
|
|
149
|
+
await (0, import_utils.gracefullyCancel)(this.#recognizeTask);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
154
|
+
0 && (module.exports = {
|
|
155
|
+
HumanInput,
|
|
156
|
+
HumanInputEvent
|
|
157
|
+
});
|
|
158
|
+
//# sourceMappingURL=human_input.cjs.map
|