@livekit/agents 0.7.5 → 0.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio.d.cts +9 -0
- package/dist/cli.cjs +5 -0
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.cts +14 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +5 -0
- package/dist/cli.js.map +1 -1
- package/dist/constants.d.cts +5 -0
- package/dist/generator.d.cts +23 -0
- package/dist/http_server.cjs.map +1 -1
- package/dist/http_server.d.cts +19 -0
- package/dist/http_server.d.ts +1 -0
- package/dist/http_server.d.ts.map +1 -1
- package/dist/http_server.js.map +1 -1
- package/dist/index.d.cts +29 -0
- package/dist/inference_runner.d.cts +12 -0
- package/dist/ipc/index.d.cts +2 -0
- package/dist/ipc/inference_executor.d.cts +4 -0
- package/dist/ipc/inference_proc_executor.cjs +3 -2
- package/dist/ipc/inference_proc_executor.cjs.map +1 -1
- package/dist/ipc/inference_proc_executor.d.cts +23 -0
- package/dist/ipc/inference_proc_executor.js +1 -1
- package/dist/ipc/inference_proc_executor.js.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.d.cts +2 -0
- package/dist/ipc/job_executor.d.cts +18 -0
- package/dist/ipc/job_proc_executor.cjs +3 -2
- package/dist/ipc/job_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_executor.d.cts +19 -0
- package/dist/ipc/job_proc_executor.js +1 -1
- package/dist/ipc/job_proc_executor.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +2 -2
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.d.cts +2 -0
- package/dist/ipc/job_proc_lazy_main.js +2 -2
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/message.d.cts +58 -0
- package/dist/ipc/proc_pool.d.cts +31 -0
- package/dist/ipc/supervised_proc.d.cts +30 -0
- package/dist/job.d.cts +113 -0
- package/dist/llm/chat_context.d.cts +66 -0
- package/dist/llm/function_context.d.cts +47 -0
- package/dist/llm/index.d.cts +4 -0
- package/dist/llm/llm.cjs +3 -3
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +66 -0
- package/dist/llm/llm.js +3 -3
- package/dist/llm/llm.js.map +1 -1
- package/dist/log.d.cts +13 -0
- package/dist/metrics/base.d.cts +96 -0
- package/dist/metrics/index.d.cts +5 -0
- package/dist/metrics/usage_collector.d.cts +14 -0
- package/dist/metrics/utils.d.cts +10 -0
- package/dist/multimodal/agent_playout.d.cts +34 -0
- package/dist/multimodal/index.d.cts +3 -0
- package/dist/multimodal/multimodal_agent.d.cts +48 -0
- package/dist/pipeline/agent_output.d.cts +33 -0
- package/dist/pipeline/agent_playout.d.cts +40 -0
- package/dist/pipeline/human_input.d.cts +30 -0
- package/dist/pipeline/index.d.cts +2 -0
- package/dist/pipeline/pipeline_agent.d.cts +151 -0
- package/dist/pipeline/speech_handle.d.cts +37 -0
- package/dist/plugin.d.cts +10 -0
- package/dist/stt/index.d.cts +3 -0
- package/dist/stt/stream_adapter.d.cts +18 -0
- package/dist/stt/stt.d.cts +124 -0
- package/dist/tokenize/basic/basic.d.cts +18 -0
- package/dist/tokenize/basic/hyphenator.d.cts +17 -0
- package/dist/tokenize/basic/index.d.cts +2 -0
- package/dist/tokenize/basic/paragraph.d.cts +5 -0
- package/dist/tokenize/basic/sentence.d.cts +5 -0
- package/dist/tokenize/basic/word.d.cts +5 -0
- package/dist/tokenize/index.d.cts +5 -0
- package/dist/tokenize/token_stream.d.cts +39 -0
- package/dist/tokenize/tokenizer.d.cts +55 -0
- package/dist/transcription.d.cts +31 -0
- package/dist/tts/index.d.cts +3 -0
- package/dist/tts/stream_adapter.d.cts +17 -0
- package/dist/tts/tts.cjs +4 -4
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +127 -0
- package/dist/tts/tts.js +4 -4
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.d.cts +72 -0
- package/dist/vad.d.cts +78 -0
- package/dist/version.d.cts +2 -0
- package/dist/worker.cjs +10 -2
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.cts +109 -0
- package/dist/worker.d.ts +3 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +10 -2
- package/dist/worker.js.map +1 -1
- package/package.json +9 -5
- package/src/cli.ts +8 -0
- package/src/http_server.ts +1 -0
- package/src/ipc/inference_proc_executor.ts +1 -1
- package/src/ipc/job_proc_executor.ts +1 -1
- package/src/ipc/job_proc_lazy_main.ts +2 -2
- package/src/llm/llm.ts +3 -3
- package/src/tts/tts.ts +4 -4
- package/src/worker.ts +10 -1
package/dist/llm/llm.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport { AsyncIterableQueue } from '../utils.js';\nimport type { ChatContext, ChatRole } from './chat_context.js';\nimport type { FunctionCallInfo, FunctionContext } from './function_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCallInfo[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n totalTokens: number;\n}\n\nexport interface Choice {\n delta: ChoiceDelta;\n index: number;\n}\n\nexport interface ChatChunk {\n requestId: string;\n choices: Choice[];\n usage?: CompletionUsage;\n}\n\nexport enum LLMEvent {\n METRICS_COLLECTED,\n}\n\nexport type LLMCallbacks = {\n [LLMEvent.METRICS_COLLECTED]: (metrics: LLMMetrics) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n fncCtx,\n temperature,\n n,\n parallelToolCalls,\n }: {\n chatCtx: ChatContext;\n fncCtx?: FunctionContext;\n temperature?: number;\n n?: number;\n parallelToolCalls?: boolean;\n }): LLMStream;\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected _functionCalls: FunctionCallInfo[] = [];\n abstract label: string;\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #fncCtx?: FunctionContext;\n\n constructor(llm: LLM, chatCtx: ChatContext, fncCtx?: FunctionContext) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.monitorMetrics();\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint
|
|
1
|
+
{"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport { AsyncIterableQueue } from '../utils.js';\nimport type { ChatContext, ChatRole } from './chat_context.js';\nimport type { FunctionCallInfo, FunctionContext } from './function_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCallInfo[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n totalTokens: number;\n}\n\nexport interface Choice {\n delta: ChoiceDelta;\n index: number;\n}\n\nexport interface ChatChunk {\n requestId: string;\n choices: Choice[];\n usage?: CompletionUsage;\n}\n\nexport enum LLMEvent {\n METRICS_COLLECTED,\n}\n\nexport type LLMCallbacks = {\n [LLMEvent.METRICS_COLLECTED]: (metrics: LLMMetrics) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n fncCtx,\n temperature,\n n,\n parallelToolCalls,\n }: {\n chatCtx: ChatContext;\n fncCtx?: FunctionContext;\n temperature?: number;\n n?: number;\n parallelToolCalls?: boolean;\n }): LLMStream;\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected _functionCalls: FunctionCallInfo[] = [];\n abstract label: string;\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #fncCtx?: FunctionContext;\n\n constructor(llm: LLM, chatCtx: ChatContext, fncCtx?: FunctionContext) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.monitorMetrics();\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint = BigInt(-1);\n let requestId = '';\n let usage: CompletionUsage | undefined;\n\n for await (const ev of this.queue) {\n this.output.put(ev);\n requestId = ev.requestId;\n if (ttft === BigInt(-1)) {\n ttft = process.hrtime.bigint() - startTime;\n }\n if (ev.usage) {\n usage = ev.usage;\n }\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: LLMMetrics = {\n timestamp: Date.now(),\n requestId,\n ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n cancelled: false, // XXX(nbsp)\n label: this.label,\n completionTokens: usage?.completionTokens || 0,\n promptTokens: usage?.promptTokens || 0,\n totalTokens: usage?.totalTokens || 0,\n tokensPerSecond:\n (usage?.completionTokens || 0) / Math.trunc(Number(duration / BigInt(1000000000))),\n };\n this.#llm.emit(LLMEvent.METRICS_COLLECTED, metrics);\n }\n\n /** List of called functions from this stream. */\n get functionCalls(): FunctionCallInfo[] {\n return this._functionCalls;\n }\n\n /** The function context of this stream. */\n get fncCtx(): FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n /** The initial chat context of this stream. */\n get chatCtx(): ChatContext {\n return this.#chatCtx;\n }\n\n /** Execute all deferred functions of this stream concurrently. */\n executeFunctions(): FunctionCallInfo[] {\n this._functionCalls.forEach(\n (f) =>\n (f.task = f.func.execute(f.params).then(\n (result) => ({ name: f.name, toolCallId: f.toolCallId, result }),\n (error) => ({ name: f.name, toolCallId: f.toolCallId, error }),\n )),\n );\n return this._functionCalls;\n }\n\n next(): Promise<IteratorResult<ChatChunk>> {\n return this.output.next();\n }\n\n close() {\n this.output.close();\n this.queue.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): LLMStream {\n return this;\n }\n}\n"],"mappings":"AAIA,SAAS,oBAAoB;AAE7B,SAAS,0BAA0B;AA2B5B,IAAK,WAAL,kBAAKA,cAAL;AACL,EAAAA,oBAAA;AADU,SAAAA;AAAA,GAAA;AAQL,MAAe,YAAa,aAAsD;AAiBzF;AAEO,MAAe,UAAsD;AAAA,EAChE,SAAS,IAAI,mBAA8B;AAAA,EAC3C,QAAQ,IAAI,mBAA8B;AAAA,EAC1C,SAAS;AAAA,EACT,iBAAqC,CAAC;AAAA,EAGhD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,SAAsB,QAA0B;AACpE,SAAK,OAAO;AACZ,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAChB,QAAI;AAEJ,qBAAiB,MAAM,KAAK,OAAO;AACjC,WAAK,OAAO,IAAI,EAAE;AAClB,kBAAY,GAAG;AACf,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,UAAI,GAAG,OAAO;AACZ,gBAAQ,GAAG;AAAA,MACb;AAAA,IACF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,WAAW;AAAA;AAAA,MACX,OAAO,KAAK;AAAA,MACZ,mBAAkB,+BAAO,qBAAoB;AAAA,MAC7C,eAAc,+BAAO,iBAAgB;AAAA,MACrC,cAAa,+BAAO,gBAAe;AAAA,MACnC,mBACG,+BAAO,qBAAoB,KAAK,KAAK,MAAM,OAAO,WAAW,OAAO,GAAU,CAAC,CAAC;AAAA,IACrF;AACA,SAAK,KAAK,KAAK,2BAA4B,OAAO;AAAA,EACpD;AAAA;AAAA,EAGA,IAAI,gBAAoC;AACtC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,SAAsC;AACxC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,mBAAuC;AACrC,SAAK,eAAe;AAAA,MAClB,CAAC,MACE,EAAE,OAAO,EAAE,KAAK,QAAQ,EAAE,MAAM,EAAE;AAAA,QACjC,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,OAAO;AAAA,QAC9D,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,MAAM;AAAA,MAC9D;AAAA,IACJ;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA,EAEA,QAAQ;AACN,SAAK,OAAO,MAAM;AAClB,SAAK,MAAM,MAAM;AACjB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["LLMEvent"]}
|
package/dist/log.d.cts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { Logger } from 'pino';
|
|
2
|
+
/** @internal */
|
|
3
|
+
export type LoggerOptions = {
|
|
4
|
+
pretty: boolean;
|
|
5
|
+
level?: string;
|
|
6
|
+
};
|
|
7
|
+
/** @internal */
|
|
8
|
+
export declare let loggerOptions: LoggerOptions;
|
|
9
|
+
/** @internal */
|
|
10
|
+
export declare const log: () => Logger;
|
|
11
|
+
/** @internal */
|
|
12
|
+
export declare const initializeLogger: ({ pretty, level }: LoggerOptions) => void;
|
|
13
|
+
//# sourceMappingURL=log.d.ts.map
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
export interface LLMMetrics {
|
|
2
|
+
requestId: string;
|
|
3
|
+
timestamp: number;
|
|
4
|
+
ttft: number;
|
|
5
|
+
duration: number;
|
|
6
|
+
label: string;
|
|
7
|
+
cancelled: boolean;
|
|
8
|
+
completionTokens: number;
|
|
9
|
+
promptTokens: number;
|
|
10
|
+
totalTokens: number;
|
|
11
|
+
tokensPerSecond: number;
|
|
12
|
+
error?: Error;
|
|
13
|
+
}
|
|
14
|
+
export interface STTMetrics {
|
|
15
|
+
requestId: string;
|
|
16
|
+
timestamp: number;
|
|
17
|
+
duration: number;
|
|
18
|
+
label: string;
|
|
19
|
+
audioDuration: number;
|
|
20
|
+
streamed: boolean;
|
|
21
|
+
error?: Error;
|
|
22
|
+
}
|
|
23
|
+
export interface TTSMetrics {
|
|
24
|
+
requestId: string;
|
|
25
|
+
timestamp: number;
|
|
26
|
+
ttfb: number;
|
|
27
|
+
duration: number;
|
|
28
|
+
label: string;
|
|
29
|
+
audioDuration: number;
|
|
30
|
+
cancelled: boolean;
|
|
31
|
+
charactersCount: number;
|
|
32
|
+
streamed: boolean;
|
|
33
|
+
error?: Error;
|
|
34
|
+
}
|
|
35
|
+
export interface VADMetrics {
|
|
36
|
+
timestamp: number;
|
|
37
|
+
idleTime: number;
|
|
38
|
+
inferenceDurationTotal: number;
|
|
39
|
+
inferenceCount: number;
|
|
40
|
+
label: string;
|
|
41
|
+
}
|
|
42
|
+
export interface PipelineEOUMetrics {
|
|
43
|
+
/**
|
|
44
|
+
* Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics
|
|
45
|
+
*/
|
|
46
|
+
sequenceId: string;
|
|
47
|
+
/** Timestamp of when the event was recorded */
|
|
48
|
+
timestamp: number;
|
|
49
|
+
/** Amount of time between the end of speech from VAD and the decision to end the user's turn */
|
|
50
|
+
endOfUtteranceDelay: number;
|
|
51
|
+
/**
|
|
52
|
+
* Time taken to obtain the transcript after the end of the user's speech.
|
|
53
|
+
*
|
|
54
|
+
* @remarks
|
|
55
|
+
* May be 0 if the transcript was already available.
|
|
56
|
+
*/
|
|
57
|
+
transcriptionDelay: number;
|
|
58
|
+
}
|
|
59
|
+
export interface PipelineLLMMetrics extends LLMMetrics {
|
|
60
|
+
/**
|
|
61
|
+
* Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics
|
|
62
|
+
*/
|
|
63
|
+
sequenceId: string;
|
|
64
|
+
}
|
|
65
|
+
export interface PipelineTTSMetrics extends TTSMetrics {
|
|
66
|
+
/**
|
|
67
|
+
* Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics
|
|
68
|
+
*/
|
|
69
|
+
sequenceId: string;
|
|
70
|
+
}
|
|
71
|
+
export type PipelineSTTMetrics = STTMetrics;
|
|
72
|
+
export type PipelineVADMetrics = VADMetrics;
|
|
73
|
+
export declare class MultimodalLLMError extends Error {
|
|
74
|
+
type?: string;
|
|
75
|
+
reason?: string;
|
|
76
|
+
code?: string;
|
|
77
|
+
constructor({ type, reason, code, message, }?: {
|
|
78
|
+
type?: string;
|
|
79
|
+
reason?: string;
|
|
80
|
+
code?: string;
|
|
81
|
+
message?: string;
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
export interface MultimodalLLMMetrics extends LLMMetrics {
|
|
85
|
+
inputTokenDetails: {
|
|
86
|
+
cachedTokens: number;
|
|
87
|
+
textTokens: number;
|
|
88
|
+
audioTokens: number;
|
|
89
|
+
};
|
|
90
|
+
outputTokenDetails: {
|
|
91
|
+
textTokens: number;
|
|
92
|
+
audioTokens: number;
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
export type AgentMetrics = STTMetrics | LLMMetrics | TTSMetrics | VADMetrics | PipelineSTTMetrics | PipelineEOUMetrics | PipelineLLMMetrics | PipelineTTSMetrics | PipelineVADMetrics | MultimodalLLMMetrics;
|
|
96
|
+
//# sourceMappingURL=base.d.ts.map
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export type { AgentMetrics, STTMetrics, LLMMetrics, TTSMetrics, VADMetrics, PipelineSTTMetrics, PipelineEOUMetrics, PipelineLLMMetrics, PipelineTTSMetrics, PipelineVADMetrics, MultimodalLLMMetrics, } from './base.js';
|
|
2
|
+
export { MultimodalLLMError } from './base.js';
|
|
3
|
+
export { type UsageSummary, UsageCollector } from './usage_collector.js';
|
|
4
|
+
export { logMetrics } from './utils.js';
|
|
5
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { AgentMetrics } from './base.js';
|
|
2
|
+
export interface UsageSummary {
|
|
3
|
+
llmPromptTokens: number;
|
|
4
|
+
llmCompletionTokens: number;
|
|
5
|
+
ttsCharactersCount: number;
|
|
6
|
+
sttAudioDuration: number;
|
|
7
|
+
}
|
|
8
|
+
export declare class UsageCollector {
|
|
9
|
+
#private;
|
|
10
|
+
constructor();
|
|
11
|
+
collect(metrics: AgentMetrics): void;
|
|
12
|
+
get summary(): UsageSummary;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=usage_collector.d.ts.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { AgentMetrics, LLMMetrics, PipelineEOUMetrics, PipelineLLMMetrics, PipelineTTSMetrics, STTMetrics, TTSMetrics, VADMetrics } from './base.js';
|
|
2
|
+
export declare const logMetrics: (metrics: AgentMetrics) => void;
|
|
3
|
+
export declare const isLLMMetrics: (metrics: AgentMetrics) => metrics is LLMMetrics;
|
|
4
|
+
export declare const isPipelineLLMMetrics: (metrics: AgentMetrics) => metrics is PipelineLLMMetrics;
|
|
5
|
+
export declare const isVADMetrics: (metrics: AgentMetrics) => metrics is VADMetrics;
|
|
6
|
+
export declare const isPipelineEOUMetrics: (metrics: AgentMetrics) => metrics is PipelineEOUMetrics;
|
|
7
|
+
export declare const isTTSMetrics: (metrics: AgentMetrics) => metrics is TTSMetrics;
|
|
8
|
+
export declare const isPipelineTTSMetrics: (metrics: AgentMetrics) => metrics is PipelineTTSMetrics;
|
|
9
|
+
export declare const isSTTMetrics: (metrics: AgentMetrics) => metrics is STTMetrics;
|
|
10
|
+
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
+
import type { AudioFrame } from '@livekit/rtc-node';
|
|
3
|
+
import { type AudioSource } from '@livekit/rtc-node';
|
|
4
|
+
import { EventEmitter } from 'node:events';
|
|
5
|
+
import type { TextAudioSynchronizer } from '../transcription.js';
|
|
6
|
+
import { type AsyncIterableQueue, Future } from '../utils.js';
|
|
7
|
+
export declare const proto: {};
|
|
8
|
+
export declare class PlayoutHandle extends EventEmitter {
|
|
9
|
+
#private;
|
|
10
|
+
/** @internal */
|
|
11
|
+
synchronizer: TextAudioSynchronizer;
|
|
12
|
+
/** @internal */
|
|
13
|
+
doneFut: Future;
|
|
14
|
+
/** @internal */
|
|
15
|
+
intFut: Future;
|
|
16
|
+
/** @internal */
|
|
17
|
+
pushedDuration: number;
|
|
18
|
+
/** @internal */
|
|
19
|
+
totalPlayedTime: number | undefined;
|
|
20
|
+
constructor(audioSource: AudioSource, sampleRate: number, itemId: string, contentIndex: number, synchronizer: TextAudioSynchronizer);
|
|
21
|
+
get itemId(): string;
|
|
22
|
+
get audioSamples(): number;
|
|
23
|
+
get textChars(): number;
|
|
24
|
+
get contentIndex(): number;
|
|
25
|
+
get interrupted(): boolean;
|
|
26
|
+
get done(): boolean;
|
|
27
|
+
interrupt(): void;
|
|
28
|
+
}
|
|
29
|
+
export declare class AgentPlayout extends EventEmitter {
|
|
30
|
+
#private;
|
|
31
|
+
constructor(audioSource: AudioSource, sampleRate: number, numChannels: number, inFrameSize: number, outFrameSize: number);
|
|
32
|
+
play(itemId: string, contentIndex: number, synchronizer: TextAudioSynchronizer, textStream: AsyncIterableQueue<string>, audioStream: AsyncIterableQueue<AudioFrame>): PlayoutHandle;
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=agent_playout.d.ts.map
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
+
import type { NoiseCancellationOptions, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
|
|
3
|
+
import { EventEmitter } from 'node:events';
|
|
4
|
+
import * as llm from '../llm/index.js';
|
|
5
|
+
/**
|
|
6
|
+
* @internal
|
|
7
|
+
* @beta
|
|
8
|
+
*/
|
|
9
|
+
export declare abstract class RealtimeSession extends EventEmitter {
|
|
10
|
+
abstract conversation: any;
|
|
11
|
+
abstract inputAudioBuffer: any;
|
|
12
|
+
abstract fncCtx: llm.FunctionContext | undefined;
|
|
13
|
+
abstract recoverFromTextResponse(itemId: string): void;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* @internal
|
|
17
|
+
* @beta
|
|
18
|
+
*/
|
|
19
|
+
export declare abstract class RealtimeModel {
|
|
20
|
+
abstract session(options: any): RealtimeSession;
|
|
21
|
+
abstract close(): Promise<void>;
|
|
22
|
+
abstract sampleRate: number;
|
|
23
|
+
abstract numChannels: number;
|
|
24
|
+
abstract inFrameSize: number;
|
|
25
|
+
abstract outFrameSize: number;
|
|
26
|
+
}
|
|
27
|
+
export type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';
|
|
28
|
+
export declare const AGENT_STATE_ATTRIBUTE = "lk.agent.state";
|
|
29
|
+
/** @beta */
|
|
30
|
+
export declare class MultimodalAgent extends EventEmitter {
|
|
31
|
+
#private;
|
|
32
|
+
model: RealtimeModel;
|
|
33
|
+
room: Room | null;
|
|
34
|
+
linkedParticipant: RemoteParticipant | null;
|
|
35
|
+
subscribedTrack: RemoteAudioTrack | null;
|
|
36
|
+
readMicroTask: Promise<void> | null;
|
|
37
|
+
constructor({ model, chatCtx, fncCtx, maxTextResponseRetries, noiseCancellation, }: {
|
|
38
|
+
model: RealtimeModel;
|
|
39
|
+
chatCtx?: llm.ChatContext;
|
|
40
|
+
fncCtx?: llm.FunctionContext;
|
|
41
|
+
maxTextResponseRetries?: number;
|
|
42
|
+
noiseCancellation?: NoiseCancellationOptions;
|
|
43
|
+
});
|
|
44
|
+
get fncCtx(): llm.FunctionContext | undefined;
|
|
45
|
+
set fncCtx(ctx: llm.FunctionContext | undefined);
|
|
46
|
+
start(room: Room, participant?: RemoteParticipant | string | null): Promise<RealtimeSession>;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=multimodal_agent.d.ts.map
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { AudioFrame } from '@livekit/rtc-node';
|
|
2
|
+
import type { TextAudioSynchronizer } from '../transcription.js';
|
|
3
|
+
import { type TTS } from '../tts/index.js';
|
|
4
|
+
import { AsyncIterableQueue, Future } from '../utils.js';
|
|
5
|
+
import type { AgentPlayout, PlayoutHandle } from './agent_playout.js';
|
|
6
|
+
export type SpeechSource = AsyncIterable<string> | string | Promise<string>;
|
|
7
|
+
export declare class SynthesisHandle {
|
|
8
|
+
#private;
|
|
9
|
+
static readonly FLUSH_SENTINEL: unique symbol;
|
|
10
|
+
text?: string;
|
|
11
|
+
ttsSource: SpeechSource;
|
|
12
|
+
tts: TTS;
|
|
13
|
+
queue: AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
|
|
14
|
+
intFut: Future;
|
|
15
|
+
synchronizer: TextAudioSynchronizer;
|
|
16
|
+
constructor(speechId: string, ttsSource: SpeechSource, agentPlayout: AgentPlayout, tts: TTS, synchronizer: TextAudioSynchronizer);
|
|
17
|
+
get speechId(): string;
|
|
18
|
+
get validated(): boolean;
|
|
19
|
+
get interrupted(): boolean;
|
|
20
|
+
get playHandle(): PlayoutHandle | undefined;
|
|
21
|
+
/** Validate the speech for playout. */
|
|
22
|
+
play(): PlayoutHandle;
|
|
23
|
+
/** Interrupt the speech. */
|
|
24
|
+
interrupt(): void;
|
|
25
|
+
}
|
|
26
|
+
export declare class AgentOutput {
|
|
27
|
+
#private;
|
|
28
|
+
constructor(agentPlayout: AgentPlayout, tts: TTS);
|
|
29
|
+
get playout(): AgentPlayout;
|
|
30
|
+
close(): Promise<void>;
|
|
31
|
+
synthesize(speechId: string, ttsSource: SpeechSource, synchronizer: TextAudioSynchronizer): SynthesisHandle;
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=agent_output.d.ts.map
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type { AudioFrame, AudioSource } from '@livekit/rtc-node';
|
|
2
|
+
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
3
|
+
import type { TextAudioSynchronizer } from '../transcription.js';
|
|
4
|
+
import { Future } from '../utils.js';
|
|
5
|
+
import { SynthesisHandle } from './agent_output.js';
|
|
6
|
+
export declare enum AgentPlayoutEvent {
|
|
7
|
+
PLAYOUT_STARTED = 0,
|
|
8
|
+
PLAYOUT_STOPPED = 1
|
|
9
|
+
}
|
|
10
|
+
export type AgentPlayoutCallbacks = {
|
|
11
|
+
[AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;
|
|
12
|
+
[AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;
|
|
13
|
+
};
|
|
14
|
+
export declare class PlayoutHandle {
|
|
15
|
+
#private;
|
|
16
|
+
playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
|
|
17
|
+
totalPlayedTime?: number;
|
|
18
|
+
synchronizer: TextAudioSynchronizer;
|
|
19
|
+
pushedDuration: number;
|
|
20
|
+
intFut: Future;
|
|
21
|
+
doneFut: Future;
|
|
22
|
+
constructor(speechId: string, audioSource: AudioSource, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer);
|
|
23
|
+
get speechId(): string;
|
|
24
|
+
get interrupted(): boolean;
|
|
25
|
+
get timePlayed(): number;
|
|
26
|
+
get done(): boolean;
|
|
27
|
+
interrupt(): void;
|
|
28
|
+
join(): Future;
|
|
29
|
+
}
|
|
30
|
+
declare const AgentPlayout_base: new () => TypedEmitter<AgentPlayoutCallbacks>;
|
|
31
|
+
export declare class AgentPlayout extends AgentPlayout_base {
|
|
32
|
+
#private;
|
|
33
|
+
constructor(audioSource: AudioSource);
|
|
34
|
+
get targetVolume(): number;
|
|
35
|
+
set targetVolume(vol: number);
|
|
36
|
+
play(speechId: string, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer): PlayoutHandle;
|
|
37
|
+
close(): Promise<void>;
|
|
38
|
+
}
|
|
39
|
+
export {};
|
|
40
|
+
//# sourceMappingURL=agent_playout.d.ts.map
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { NoiseCancellationOptions, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
|
|
2
|
+
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
3
|
+
import type { STT, SpeechEvent } from '../stt/stt.js';
|
|
4
|
+
import type { VAD, VADEvent } from '../vad.js';
|
|
5
|
+
export declare enum HumanInputEvent {
|
|
6
|
+
START_OF_SPEECH = 0,
|
|
7
|
+
VAD_INFERENCE_DONE = 1,
|
|
8
|
+
END_OF_SPEECH = 2,
|
|
9
|
+
FINAL_TRANSCRIPT = 3,
|
|
10
|
+
INTERIM_TRANSCRIPT = 4
|
|
11
|
+
}
|
|
12
|
+
export type HumanInputCallbacks = {
|
|
13
|
+
[HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;
|
|
14
|
+
[HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;
|
|
15
|
+
[HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;
|
|
16
|
+
[HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;
|
|
17
|
+
[HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;
|
|
18
|
+
};
|
|
19
|
+
declare const HumanInput_base: new () => TypedEmitter<HumanInputCallbacks>;
|
|
20
|
+
export declare class HumanInput extends HumanInput_base {
|
|
21
|
+
#private;
|
|
22
|
+
constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant, noiseCancellation?: NoiseCancellationOptions);
|
|
23
|
+
get participant(): RemoteParticipant;
|
|
24
|
+
get subscribedTrack(): RemoteAudioTrack | undefined;
|
|
25
|
+
get speaking(): boolean;
|
|
26
|
+
get speakingProbability(): number;
|
|
27
|
+
close(): Promise<void>;
|
|
28
|
+
}
|
|
29
|
+
export {};
|
|
30
|
+
//# sourceMappingURL=human_input.d.ts.map
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import type { NoiseCancellationOptions, RemoteParticipant, Room } from '@livekit/rtc-node';
|
|
2
|
+
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
3
|
+
import type { CallableFunctionResult, FunctionCallInfo, FunctionContext, LLM } from '../llm/index.js';
|
|
4
|
+
import { LLMStream } from '../llm/index.js';
|
|
5
|
+
import { ChatContext, ChatMessage } from '../llm/index.js';
|
|
6
|
+
import type { AgentMetrics } from '../metrics/base.js';
|
|
7
|
+
import { type STT } from '../stt/index.js';
|
|
8
|
+
import type { SentenceTokenizer, WordTokenizer } from '../tokenize/tokenizer.js';
|
|
9
|
+
import type { TTS } from '../tts/index.js';
|
|
10
|
+
import { type VAD } from '../vad.js';
|
|
11
|
+
import type { SpeechSource } from './agent_output.js';
|
|
12
|
+
import { SpeechHandle } from './speech_handle.js';
|
|
13
|
+
export type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';
|
|
14
|
+
export declare const AGENT_STATE_ATTRIBUTE = "lk.agent.state";
|
|
15
|
+
export type BeforeLLMCallback = (agent: VoicePipelineAgent, chatCtx: ChatContext) => LLMStream | false | void | Promise<LLMStream | false | void>;
|
|
16
|
+
export type BeforeTTSCallback = (agent: VoicePipelineAgent, source: string | AsyncIterable<string>) => SpeechSource;
|
|
17
|
+
export declare enum VPAEvent {
|
|
18
|
+
USER_STARTED_SPEAKING = 0,
|
|
19
|
+
USER_STOPPED_SPEAKING = 1,
|
|
20
|
+
AGENT_STARTED_SPEAKING = 2,
|
|
21
|
+
AGENT_STOPPED_SPEAKING = 3,
|
|
22
|
+
USER_SPEECH_COMMITTED = 4,
|
|
23
|
+
AGENT_SPEECH_COMMITTED = 5,
|
|
24
|
+
AGENT_SPEECH_INTERRUPTED = 6,
|
|
25
|
+
FUNCTION_CALLS_COLLECTED = 7,
|
|
26
|
+
FUNCTION_CALLS_FINISHED = 8,
|
|
27
|
+
METRICS_COLLECTED = 9
|
|
28
|
+
}
|
|
29
|
+
export type VPACallbacks = {
|
|
30
|
+
[VPAEvent.USER_STARTED_SPEAKING]: () => void;
|
|
31
|
+
[VPAEvent.USER_STOPPED_SPEAKING]: () => void;
|
|
32
|
+
[VPAEvent.AGENT_STARTED_SPEAKING]: () => void;
|
|
33
|
+
[VPAEvent.AGENT_STOPPED_SPEAKING]: () => void;
|
|
34
|
+
[VPAEvent.USER_SPEECH_COMMITTED]: (msg: ChatMessage) => void;
|
|
35
|
+
[VPAEvent.AGENT_SPEECH_COMMITTED]: (msg: ChatMessage) => void;
|
|
36
|
+
[VPAEvent.AGENT_SPEECH_INTERRUPTED]: (msg: ChatMessage) => void;
|
|
37
|
+
[VPAEvent.FUNCTION_CALLS_COLLECTED]: (funcs: FunctionCallInfo[]) => void;
|
|
38
|
+
[VPAEvent.FUNCTION_CALLS_FINISHED]: (funcs: CallableFunctionResult[]) => void;
|
|
39
|
+
[VPAEvent.METRICS_COLLECTED]: (metrics: AgentMetrics) => void;
|
|
40
|
+
};
|
|
41
|
+
interface TurnDetector {
|
|
42
|
+
unlikelyThreshold: number;
|
|
43
|
+
supportsLanguage: (language?: string) => boolean;
|
|
44
|
+
predictEndOfTurn: (chatCtx: ChatContext) => Promise<number>;
|
|
45
|
+
}
|
|
46
|
+
export declare class AgentCallContext {
|
|
47
|
+
#private;
|
|
48
|
+
constructor(agent: VoicePipelineAgent, llmStream: LLMStream);
|
|
49
|
+
static getCurrent(): AgentCallContext;
|
|
50
|
+
get agent(): VoicePipelineAgent;
|
|
51
|
+
storeMetadata(key: string, value: any): void;
|
|
52
|
+
getMetadata(key: string, orDefault?: any): any;
|
|
53
|
+
get llmStream(): LLMStream;
|
|
54
|
+
get extraChatMessages(): ChatMessage[];
|
|
55
|
+
addExtraChatMessage(message: ChatMessage): void;
|
|
56
|
+
}
|
|
57
|
+
export interface AgentTranscriptionOptions {
|
|
58
|
+
/** Whether to forward the user transcription to the client */
|
|
59
|
+
userTranscription: boolean;
|
|
60
|
+
/** Whether to forward the agent transcription to the client */
|
|
61
|
+
agentTranscription: boolean;
|
|
62
|
+
/**
|
|
63
|
+
* The speed at which the agent's speech transcription is forwarded to the client.
|
|
64
|
+
* We try to mimic the agent's speech speed by adjusting the transcription speed.
|
|
65
|
+
*/
|
|
66
|
+
agentTranscriptionSpeech: number;
|
|
67
|
+
/**
|
|
68
|
+
* The tokenizer used to split the speech into sentences.
|
|
69
|
+
* This is used to decide when to mark a transcript as final for the agent transcription.
|
|
70
|
+
*/
|
|
71
|
+
sentenceTokenizer: SentenceTokenizer;
|
|
72
|
+
/**
|
|
73
|
+
* The tokenizer used to split the speech into words.
|
|
74
|
+
* This is used to simulate the "interim results" of the agent transcription.
|
|
75
|
+
*/
|
|
76
|
+
wordTokenizer: WordTokenizer;
|
|
77
|
+
/**
|
|
78
|
+
* A function that takes a string (word) as input and returns a list of strings,
|
|
79
|
+
* representing the hyphenated parts of the word.
|
|
80
|
+
*/
|
|
81
|
+
hyphenateWord: (word: string) => string[];
|
|
82
|
+
}
|
|
83
|
+
export interface VPAOptions {
|
|
84
|
+
/** Chat context for the assistant. */
|
|
85
|
+
chatCtx?: ChatContext;
|
|
86
|
+
/** Function context for the assistant. */
|
|
87
|
+
fncCtx?: FunctionContext;
|
|
88
|
+
/** Whether to allow the user to interrupt the assistant. */
|
|
89
|
+
allowInterruptions: boolean;
|
|
90
|
+
/** Minimum duration of speech to consider for interruption. */
|
|
91
|
+
interruptSpeechDuration: number;
|
|
92
|
+
/** Minimum number of words to consider for interuption. This may increase latency. */
|
|
93
|
+
interruptMinWords: number;
|
|
94
|
+
/** Delay to wait before considering the user speech done. */
|
|
95
|
+
minEndpointingDelay: number;
|
|
96
|
+
maxNestedFncCalls: number;
|
|
97
|
+
preemptiveSynthesis: boolean;
|
|
98
|
+
beforeLLMCallback: BeforeLLMCallback;
|
|
99
|
+
beforeTTSCallback: BeforeTTSCallback;
|
|
100
|
+
/** Options for assistant transcription. */
|
|
101
|
+
transcription: AgentTranscriptionOptions;
|
|
102
|
+
/** Turn detection model to use. */
|
|
103
|
+
turnDetector?: TurnDetector;
|
|
104
|
+
/** Noise cancellation options. */
|
|
105
|
+
noiseCancellation?: NoiseCancellationOptions;
|
|
106
|
+
}
|
|
107
|
+
declare const VoicePipelineAgent_base: new () => TypedEmitter<VPACallbacks>;
|
|
108
|
+
/** A pipeline agent (VAD + STT + LLM + TTS) implementation. */
|
|
109
|
+
export declare class VoicePipelineAgent extends VoicePipelineAgent_base {
|
|
110
|
+
#private;
|
|
111
|
+
/** Minimum time played for the user speech to be committed to the chat context. */
|
|
112
|
+
readonly MIN_TIME_PLAYED_FOR_COMMIT = 1.5;
|
|
113
|
+
protected static readonly FLUSH_SENTINEL: unique symbol;
|
|
114
|
+
transcribedText: string;
|
|
115
|
+
constructor(
|
|
116
|
+
/** Voice Activity Detection instance. */
|
|
117
|
+
vad: VAD,
|
|
118
|
+
/** Speech-to-Text instance. */
|
|
119
|
+
stt: STT,
|
|
120
|
+
/** Large Language Model instance. */
|
|
121
|
+
llm: LLM,
|
|
122
|
+
/** Text-to-Speech instance. */
|
|
123
|
+
tts: TTS,
|
|
124
|
+
/** Additional VoicePipelineAgent options. */
|
|
125
|
+
opts?: Partial<VPAOptions>);
|
|
126
|
+
get fncCtx(): FunctionContext | undefined;
|
|
127
|
+
set fncCtx(ctx: FunctionContext);
|
|
128
|
+
get chatCtx(): ChatContext;
|
|
129
|
+
get llm(): LLM;
|
|
130
|
+
get tts(): TTS;
|
|
131
|
+
get stt(): STT;
|
|
132
|
+
get vad(): VAD;
|
|
133
|
+
/** Start the voice assistant. */
|
|
134
|
+
start(
|
|
135
|
+
/** The room to connect to. */
|
|
136
|
+
room: Room,
|
|
137
|
+
/**
|
|
138
|
+
* The participant to listen to.
|
|
139
|
+
*
|
|
140
|
+
* @remarks
|
|
141
|
+
* Can be a participant or an identity.
|
|
142
|
+
* If omitted, the first participant in the room will be selected.
|
|
143
|
+
*/
|
|
144
|
+
participant?: RemoteParticipant | string | null): void;
|
|
145
|
+
/** Play a speech source through the voice assistant. */
|
|
146
|
+
say(source: string | LLMStream | AsyncIterable<string>, allowInterruptions?: boolean, addToChatCtx?: boolean): Promise<SpeechHandle>;
|
|
147
|
+
/** Close the voice assistant. */
|
|
148
|
+
close(): Promise<void>;
|
|
149
|
+
}
|
|
150
|
+
export {};
|
|
151
|
+
//# sourceMappingURL=pipeline_agent.d.ts.map
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { ChatMessage, LLMStream } from '../llm/index.js';
|
|
2
|
+
import type { SynthesisHandle } from './agent_output.js';
|
|
3
|
+
export declare class SpeechHandle {
|
|
4
|
+
#private;
|
|
5
|
+
constructor(id: string, allowInterruptions: boolean, addToChatCtx: boolean, isReply: boolean, userQuestion: string, fncNestedDepth?: number, extraToolsMessages?: ChatMessage[] | undefined);
|
|
6
|
+
static createAssistantReply(allowInterruptions: boolean, addToChatCtx: boolean, userQuestion: string): SpeechHandle;
|
|
7
|
+
static createAssistantSpeech(allowInterruptions: boolean, addToChatCtx: boolean): SpeechHandle;
|
|
8
|
+
static createToolSpeech(allowInterruptions: boolean, addToChatCtx: boolean, fncNestedDepth: number, extraToolsMessages: ChatMessage[]): SpeechHandle;
|
|
9
|
+
waitForInitialization(): Promise<void>;
|
|
10
|
+
initialize(source: string | LLMStream | AsyncIterable<string>, synthesisHandle: SynthesisHandle): void;
|
|
11
|
+
markUserCommitted(): void;
|
|
12
|
+
markSpeechCommitted(): void;
|
|
13
|
+
get userCommitted(): boolean;
|
|
14
|
+
get speechCommitted(): boolean;
|
|
15
|
+
get id(): string;
|
|
16
|
+
get allowInterruptions(): boolean;
|
|
17
|
+
get addToChatCtx(): boolean;
|
|
18
|
+
get source(): string | LLMStream | AsyncIterable<string>;
|
|
19
|
+
get synthesisHandle(): SynthesisHandle;
|
|
20
|
+
set synthesisHandle(handle: SynthesisHandle);
|
|
21
|
+
get initialized(): boolean;
|
|
22
|
+
get isReply(): boolean;
|
|
23
|
+
get userQuestion(): string;
|
|
24
|
+
get interrupted(): boolean;
|
|
25
|
+
get fncNestedDepth(): number;
|
|
26
|
+
get extraToolsMessages(): ChatMessage[] | undefined;
|
|
27
|
+
addNestedSpeech(handle: SpeechHandle): void;
|
|
28
|
+
get nestedSpeechHandles(): SpeechHandle[];
|
|
29
|
+
nestedSpeechChanged(): Promise<void>;
|
|
30
|
+
get nestedSpeechFinished(): boolean;
|
|
31
|
+
markNestedSpeechFinished(): void;
|
|
32
|
+
join(): Promise<void>;
|
|
33
|
+
setDone(): void;
|
|
34
|
+
interrupt(): void;
|
|
35
|
+
cancel(): void;
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=speech_handle.d.ts.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export declare abstract class Plugin {
|
|
2
|
+
#private;
|
|
3
|
+
registeredPlugins: Plugin[];
|
|
4
|
+
constructor(title: string, version: string);
|
|
5
|
+
static registerPlugins(plugin: Plugin): void;
|
|
6
|
+
abstract downloadFiles(): void;
|
|
7
|
+
get title(): string;
|
|
8
|
+
get version(): string;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=plugin.d.ts.map
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export { type SpeechEvent, type SpeechData, type STTCapabilities, type RecognitionUsage, type STTCallbacks, SpeechEventType, STT, SpeechStream, } from './stt.js';
|
|
2
|
+
export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';
|
|
3
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { AudioFrame } from '@livekit/rtc-node';
|
|
2
|
+
import type { VAD } from '../vad.js';
|
|
3
|
+
import type { SpeechEvent } from './stt.js';
|
|
4
|
+
import { STT, SpeechStream } from './stt.js';
|
|
5
|
+
export declare class StreamAdapter extends STT {
|
|
6
|
+
#private;
|
|
7
|
+
label: string;
|
|
8
|
+
constructor(stt: STT, vad: VAD);
|
|
9
|
+
_recognize(frame: AudioFrame): Promise<SpeechEvent>;
|
|
10
|
+
stream(): StreamAdapterWrapper;
|
|
11
|
+
}
|
|
12
|
+
export declare class StreamAdapterWrapper extends SpeechStream {
|
|
13
|
+
#private;
|
|
14
|
+
label: string;
|
|
15
|
+
constructor(stt: STT, vad: VAD);
|
|
16
|
+
monitorMetrics(): Promise<void>;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=stream_adapter.d.ts.map
|