@livekit/agents 0.7.6 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/dist/audio.d.cts +9 -0
  2. package/dist/cli.d.cts +14 -0
  3. package/dist/constants.d.cts +5 -0
  4. package/dist/generator.d.cts +23 -0
  5. package/dist/http_server.cjs.map +1 -1
  6. package/dist/http_server.d.cts +19 -0
  7. package/dist/http_server.d.ts +1 -0
  8. package/dist/http_server.d.ts.map +1 -1
  9. package/dist/http_server.js.map +1 -1
  10. package/dist/index.d.cts +29 -0
  11. package/dist/inference_runner.d.cts +12 -0
  12. package/dist/ipc/index.d.cts +2 -0
  13. package/dist/ipc/inference_executor.d.cts +4 -0
  14. package/dist/ipc/inference_proc_executor.cjs +3 -2
  15. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  16. package/dist/ipc/inference_proc_executor.d.cts +23 -0
  17. package/dist/ipc/inference_proc_executor.js +1 -1
  18. package/dist/ipc/inference_proc_executor.js.map +1 -1
  19. package/dist/ipc/inference_proc_lazy_main.d.cts +2 -0
  20. package/dist/ipc/job_executor.d.cts +18 -0
  21. package/dist/ipc/job_proc_executor.cjs +3 -2
  22. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  23. package/dist/ipc/job_proc_executor.d.cts +19 -0
  24. package/dist/ipc/job_proc_executor.js +1 -1
  25. package/dist/ipc/job_proc_executor.js.map +1 -1
  26. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  27. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  28. package/dist/ipc/job_proc_lazy_main.d.cts +2 -0
  29. package/dist/ipc/job_proc_lazy_main.js +1 -1
  30. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  31. package/dist/ipc/message.d.cts +58 -0
  32. package/dist/ipc/proc_pool.d.cts +31 -0
  33. package/dist/ipc/supervised_proc.d.cts +30 -0
  34. package/dist/job.d.cts +113 -0
  35. package/dist/llm/chat_context.d.cts +66 -0
  36. package/dist/llm/function_context.d.cts +47 -0
  37. package/dist/llm/index.d.cts +4 -0
  38. package/dist/llm/llm.cjs +3 -3
  39. package/dist/llm/llm.cjs.map +1 -1
  40. package/dist/llm/llm.d.cts +66 -0
  41. package/dist/llm/llm.js +3 -3
  42. package/dist/llm/llm.js.map +1 -1
  43. package/dist/log.d.cts +13 -0
  44. package/dist/metrics/base.d.cts +96 -0
  45. package/dist/metrics/index.d.cts +5 -0
  46. package/dist/metrics/usage_collector.d.cts +14 -0
  47. package/dist/metrics/utils.d.cts +10 -0
  48. package/dist/multimodal/agent_playout.d.cts +34 -0
  49. package/dist/multimodal/index.d.cts +3 -0
  50. package/dist/multimodal/multimodal_agent.d.cts +48 -0
  51. package/dist/pipeline/agent_output.d.cts +33 -0
  52. package/dist/pipeline/agent_playout.d.cts +40 -0
  53. package/dist/pipeline/human_input.d.cts +30 -0
  54. package/dist/pipeline/index.d.cts +2 -0
  55. package/dist/pipeline/pipeline_agent.d.cts +151 -0
  56. package/dist/pipeline/speech_handle.d.cts +37 -0
  57. package/dist/plugin.d.cts +10 -0
  58. package/dist/stt/index.d.cts +3 -0
  59. package/dist/stt/stream_adapter.d.cts +18 -0
  60. package/dist/stt/stt.d.cts +124 -0
  61. package/dist/tokenize/basic/basic.d.cts +18 -0
  62. package/dist/tokenize/basic/hyphenator.d.cts +17 -0
  63. package/dist/tokenize/basic/index.d.cts +2 -0
  64. package/dist/tokenize/basic/paragraph.d.cts +5 -0
  65. package/dist/tokenize/basic/sentence.d.cts +5 -0
  66. package/dist/tokenize/basic/word.d.cts +5 -0
  67. package/dist/tokenize/index.d.cts +5 -0
  68. package/dist/tokenize/token_stream.d.cts +39 -0
  69. package/dist/tokenize/tokenizer.d.cts +55 -0
  70. package/dist/transcription.d.cts +31 -0
  71. package/dist/tts/index.d.cts +3 -0
  72. package/dist/tts/stream_adapter.d.cts +17 -0
  73. package/dist/tts/tts.cjs +4 -4
  74. package/dist/tts/tts.cjs.map +1 -1
  75. package/dist/tts/tts.d.cts +127 -0
  76. package/dist/tts/tts.js +4 -4
  77. package/dist/tts/tts.js.map +1 -1
  78. package/dist/utils.d.cts +72 -0
  79. package/dist/vad.d.cts +78 -0
  80. package/dist/version.d.cts +2 -0
  81. package/dist/worker.cjs +2 -1
  82. package/dist/worker.cjs.map +1 -1
  83. package/dist/worker.d.cts +109 -0
  84. package/dist/worker.d.ts.map +1 -1
  85. package/dist/worker.js +2 -1
  86. package/dist/worker.js.map +1 -1
  87. package/package.json +9 -5
  88. package/src/http_server.ts +1 -0
  89. package/src/ipc/inference_proc_executor.ts +1 -1
  90. package/src/ipc/job_proc_executor.ts +1 -1
  91. package/src/ipc/job_proc_lazy_main.ts +1 -1
  92. package/src/llm/llm.ts +3 -3
  93. package/src/tts/tts.ts +4 -4
  94. package/src/worker.ts +1 -0
@@ -0,0 +1,58 @@
1
+ import type { RunningJobInfo } from '../job.js';
2
+ import type { LoggerOptions } from '../log.js';
3
+ export type IPCMessage = {
4
+ case: 'initializeRequest';
5
+ value: {
6
+ loggerOptions: LoggerOptions;
7
+ pingInterval?: number;
8
+ pingTimeout?: number;
9
+ highPingThreshold?: number;
10
+ };
11
+ } | {
12
+ case: 'initializeResponse';
13
+ value: undefined;
14
+ } | {
15
+ case: 'pingRequest';
16
+ value: {
17
+ timestamp: number;
18
+ };
19
+ } | {
20
+ case: 'pongResponse';
21
+ value: {
22
+ lastTimestamp: number;
23
+ timestamp: number;
24
+ };
25
+ } | {
26
+ case: 'startJobRequest';
27
+ value: {
28
+ runningJob: RunningJobInfo;
29
+ };
30
+ } | {
31
+ case: 'shutdownRequest';
32
+ value: {
33
+ reason?: string;
34
+ };
35
+ } | {
36
+ case: 'inferenceRequest';
37
+ value: {
38
+ method: string;
39
+ requestId: string;
40
+ data: unknown;
41
+ };
42
+ } | {
43
+ case: 'inferenceResponse';
44
+ value: {
45
+ requestId: string;
46
+ data: unknown;
47
+ error?: Error;
48
+ };
49
+ } | {
50
+ case: 'exiting';
51
+ value: {
52
+ reason?: string;
53
+ };
54
+ } | {
55
+ case: 'done';
56
+ value: undefined;
57
+ };
58
+ //# sourceMappingURL=message.d.ts.map
@@ -0,0 +1,31 @@
1
+ import { MultiMutex, Mutex } from '@livekit/mutex';
2
+ import type { RunningJobInfo } from '../job.js';
3
+ import { Queue } from '../utils.js';
4
+ import type { InferenceExecutor } from './inference_executor.js';
5
+ import type { JobExecutor } from './job_executor.js';
6
+ export declare class ProcPool {
7
+ agent: string;
8
+ initializeTimeout: number;
9
+ closeTimeout: number;
10
+ executors: JobExecutor[];
11
+ tasks: Promise<void>[];
12
+ started: boolean;
13
+ closed: boolean;
14
+ controller: AbortController;
15
+ initMutex: Mutex;
16
+ procMutex?: MultiMutex;
17
+ procUnlock?: () => void;
18
+ warmedProcQueue: Queue<JobExecutor>;
19
+ inferenceExecutor?: InferenceExecutor;
20
+ memoryWarnMB: number;
21
+ memoryLimitMB: number;
22
+ constructor(agent: string, numIdleProcesses: number, initializeTimeout: number, closeTimeout: number, inferenceExecutor: InferenceExecutor | undefined, memoryWarnMB: number, memoryLimitMB: number);
23
+ get processes(): JobExecutor[];
24
+ getByJobId(id: string): JobExecutor | null;
25
+ launchJob(info: RunningJobInfo): Promise<void>;
26
+ procWatchTask(): Promise<void>;
27
+ start(): void;
28
+ run(signal: AbortSignal): Promise<void>;
29
+ close(): Promise<void>;
30
+ }
31
+ //# sourceMappingURL=proc_pool.d.ts.map
@@ -0,0 +1,30 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import type { ChildProcess } from 'node:child_process';
3
+ import type { RunningJobInfo } from '../job.js';
4
+ import { Future } from '../utils.js';
5
+ export interface ProcOpts {
6
+ initializeTimeout: number;
7
+ closeTimeout: number;
8
+ memoryWarnMB: number;
9
+ memoryLimitMB: number;
10
+ pingInterval: number;
11
+ pingTimeout: number;
12
+ highPingThreshold: number;
13
+ }
14
+ export declare abstract class SupervisedProc {
15
+ #private;
16
+ proc?: ChildProcess;
17
+ protected init: Future;
18
+ constructor(initializeTimeout: number, closeTimeout: number, memoryWarnMB: number, memoryLimitMB: number, pingInterval: number, pingTimeout: number, highPingThreshold: number);
19
+ abstract createProcess(): ChildProcess;
20
+ abstract mainTask(child: ChildProcess): Promise<void>;
21
+ get started(): boolean;
22
+ get runningJob(): RunningJobInfo | undefined;
23
+ start(): Promise<void>;
24
+ run(): Promise<void>;
25
+ join(): Promise<void>;
26
+ initialize(): Promise<void>;
27
+ close(): Promise<void>;
28
+ launchJob(info: RunningJobInfo): Promise<void>;
29
+ }
30
+ //# sourceMappingURL=supervised_proc.d.ts.map
package/dist/job.d.cts ADDED
@@ -0,0 +1,113 @@
1
+ import type * as proto from '@livekit/protocol';
2
+ import type { E2EEOptions, LocalParticipant, RemoteParticipant, Room, RtcConfiguration } from '@livekit/rtc-node';
3
+ import type { InferenceExecutor } from './ipc/inference_executor.js';
4
+ export declare class CurrentJobContext {
5
+ #private;
6
+ constructor(proc: JobContext);
7
+ static getCurrent(): JobContext;
8
+ }
9
+ /** Which tracks, if any, should the agent automatically subscribe to? */
10
+ export declare enum AutoSubscribe {
11
+ SUBSCRIBE_ALL = 0,
12
+ SUBSCRIBE_NONE = 1,
13
+ VIDEO_ONLY = 2,
14
+ AUDIO_ONLY = 3
15
+ }
16
+ export type JobAcceptArguments = {
17
+ name: string;
18
+ identity: string;
19
+ metadata: string;
20
+ attributes?: {
21
+ [key: string]: string;
22
+ };
23
+ };
24
+ export type RunningJobInfo = {
25
+ acceptArguments: JobAcceptArguments;
26
+ job: proto.Job;
27
+ url: string;
28
+ token: string;
29
+ };
30
+ /** Attempted to add a function callback, but the function already exists. */
31
+ export declare class FunctionExistsError extends Error {
32
+ constructor(msg?: string);
33
+ }
34
+ /** The job and environment context as seen by the agent, accessible by the entrypoint function. */
35
+ export declare class JobContext {
36
+ #private;
37
+ /** @internal */
38
+ shutdownCallbacks: (() => Promise<void>)[];
39
+ constructor(proc: JobProcess, info: RunningJobInfo, room: Room, onConnect: () => void, onShutdown: (s: string) => void, inferenceExecutor: InferenceExecutor);
40
+ get proc(): JobProcess;
41
+ get job(): proto.Job;
42
+ /** @returns The room the agent was called into */
43
+ get room(): Room;
44
+ /** @returns The agent's participant if connected to the room, otherwise `undefined` */
45
+ get agent(): LocalParticipant | undefined;
46
+ /** @returns The global inference executor */
47
+ get inferenceExecutor(): InferenceExecutor;
48
+ /** Adds a promise to be awaited when {@link JobContext.shutdown | shutdown} is called. */
49
+ addShutdownCallback(callback: () => Promise<void>): void;
50
+ waitForParticipant(identity?: string): Promise<RemoteParticipant>;
51
+ /**
52
+ * Connects the agent to the room.
53
+ *
54
+ * @remarks
55
+ * It is recommended to run this command as early in the function as possible, as executing it
56
+ * later may cause noticeable delay between user and agent joins.
57
+ *
58
+ * @see {@link https://github.com/livekit/node-sdks/tree/main/packages/livekit-rtc#readme |
59
+ * @livekit/rtc-node} for more information about the parameters.
60
+ */
61
+ connect(e2ee?: E2EEOptions, autoSubscribe?: AutoSubscribe, rtcConfig?: RtcConfiguration): Promise<void>;
62
+ /**
63
+ * Gracefully shuts down the job, and runs all shutdown promises.
64
+ *
65
+ * @param reason - Optional reason for shutdown
66
+ */
67
+ shutdown(reason?: string): void;
68
+ /** @internal */
69
+ onParticipantConnected(p: RemoteParticipant): void;
70
+ /**
71
+ * Adds a promise to be awaited whenever a new participant joins the room.
72
+ *
73
+ * @throws {@link FunctionExistsError} if an entrypoint already exists
74
+ */
75
+ addParticipantEntrypoint(callback: (job: JobContext, p: RemoteParticipant) => Promise<void>): void;
76
+ }
77
+ export declare class JobProcess {
78
+ #private;
79
+ userData: {
80
+ [id: string]: unknown;
81
+ };
82
+ get pid(): number;
83
+ }
84
+ /**
85
+ * A request sent by the server to spawn a new agent job.
86
+ *
87
+ * @remarks
88
+ * For most applications, this is best left to the default, which simply accepts the job and
89
+ * handles the logic inside the entrypoint function. This class is useful for vetting which
90
+ * requests should fill idle processes and which should be outright rejected.
91
+ */
92
+ export declare class JobRequest {
93
+ #private;
94
+ /** @internal */
95
+ constructor(job: proto.Job, onReject: () => Promise<void>, onAccept: (args: JobAcceptArguments) => Promise<void>);
96
+ /** @returns The ID of the job, set by the LiveKit server */
97
+ get id(): string;
98
+ /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */
99
+ get job(): proto.Job;
100
+ /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */
101
+ get room(): proto.Room | undefined;
102
+ /** @see {@link https://www.npmjs.com/package/@livekit/protocol | @livekit/protocol} */
103
+ get publisher(): proto.ParticipantInfo | undefined;
104
+ /** @returns The agent's name, as set in {@link WorkerOptions} */
105
+ get agentName(): string;
106
+ /** Rejects the job. */
107
+ reject(): Promise<void>;
108
+ /** Accepts the job, launching it on an idle child process. */
109
+ accept(name?: string, identity?: string, metadata?: string, attributes?: {
110
+ [key: string]: string;
111
+ }): Promise<void>;
112
+ }
113
+ //# sourceMappingURL=job.d.ts.map
@@ -0,0 +1,66 @@
1
+ import type { AudioFrame, VideoFrame } from '@livekit/rtc-node';
2
+ import type { CallableFunctionResult, FunctionCallInfo } from './function_context.js';
3
+ export declare enum ChatRole {
4
+ SYSTEM = 0,
5
+ USER = 1,
6
+ ASSISTANT = 2,
7
+ TOOL = 3
8
+ }
9
+ export interface ChatImage {
10
+ image: string | VideoFrame;
11
+ inferenceWidth?: number;
12
+ inferenceHeight?: number;
13
+ /**
14
+ * @internal
15
+ * Used by LLM implementations to store a processed version of the image for later use.
16
+ */
17
+ cache: {
18
+ [id: string | number | symbol]: any;
19
+ };
20
+ }
21
+ export interface ChatAudio {
22
+ frame: AudioFrame | AudioFrame[];
23
+ }
24
+ export type ChatContent = string | ChatImage | ChatAudio;
25
+ export declare class ChatMessage {
26
+ readonly role: ChatRole;
27
+ readonly id?: string;
28
+ readonly name?: string;
29
+ readonly content?: ChatContent | ChatContent[];
30
+ readonly toolCalls?: FunctionCallInfo[];
31
+ readonly toolCallId?: string;
32
+ readonly toolException?: Error;
33
+ /** @internal */
34
+ constructor({ role, id, name, content, toolCalls, toolCallId, toolException, }: {
35
+ role: ChatRole;
36
+ id?: string;
37
+ name?: string;
38
+ content?: ChatContent | ChatContent[];
39
+ toolCalls?: FunctionCallInfo[];
40
+ toolCallId?: string;
41
+ toolException?: Error;
42
+ });
43
+ static createToolFromFunctionResult(func: CallableFunctionResult): ChatMessage;
44
+ static createToolCalls(toolCalls: FunctionCallInfo[], text?: string): ChatMessage;
45
+ static create(options: Partial<{
46
+ text?: string;
47
+ images: ChatImage[];
48
+ role: ChatRole;
49
+ }>): ChatMessage;
50
+ /** Returns a structured clone of this message. */
51
+ copy(): ChatMessage;
52
+ }
53
+ export declare class ChatContext {
54
+ messages: ChatMessage[];
55
+ metadata: {
56
+ [id: string]: any;
57
+ };
58
+ append(msg: {
59
+ text?: string;
60
+ images?: ChatImage[];
61
+ role: ChatRole;
62
+ }): ChatContext;
63
+ /** Returns a structured clone of this context. */
64
+ copy(): ChatContext;
65
+ }
66
+ //# sourceMappingURL=chat_context.d.ts.map
@@ -0,0 +1,47 @@
1
+ import { z } from 'zod';
2
+ /** Type reinforcement for the callable function's execute parameters. */
3
+ export type inferParameters<P extends z.ZodTypeAny> = z.infer<P>;
4
+ /** Raw OpenAI-adherent function parameters. */
5
+ export type OpenAIFunctionParameters = {
6
+ type: 'object';
7
+ properties: {
8
+ [id: string]: any;
9
+ };
10
+ required: string[];
11
+ additionalProperties: boolean;
12
+ };
13
+ /** A definition for a function callable by the LLM. */
14
+ export interface CallableFunction<P extends z.ZodTypeAny = any, R = any> {
15
+ description: string;
16
+ parameters: OpenAIFunctionParameters | P;
17
+ execute: (args: inferParameters<P>) => PromiseLike<R>;
18
+ }
19
+ /** A function that has been called but is not yet running */
20
+ export interface FunctionCallInfo<P extends z.ZodTypeAny = any, R = any> {
21
+ name: string;
22
+ func: CallableFunction<P, R>;
23
+ toolCallId: string;
24
+ rawParams: string;
25
+ params: inferParameters<P>;
26
+ task?: PromiseLike<CallableFunctionResult>;
27
+ }
28
+ /** The result of a ran FunctionCallInfo. */
29
+ export interface CallableFunctionResult {
30
+ name: string;
31
+ toolCallId: string;
32
+ result?: any;
33
+ error?: any;
34
+ }
35
+ /** An object containing callable functions and their names */
36
+ export type FunctionContext = {
37
+ [name: string]: CallableFunction;
38
+ };
39
+ /** @internal */
40
+ export declare const oaiParams: (p: z.AnyZodObject) => {
41
+ type: "object";
42
+ properties: Record<string, any>;
43
+ required: string[];
44
+ };
45
+ /** @internal */
46
+ export declare const oaiBuildFunctionInfo: (fncCtx: FunctionContext, toolCallId: string, fncName: string, rawArgs: string) => FunctionCallInfo;
47
+ //# sourceMappingURL=function_context.d.ts.map
@@ -0,0 +1,4 @@
1
+ export { type CallableFunction, type FunctionCallInfo, type CallableFunctionResult, type FunctionContext, type inferParameters, oaiParams, oaiBuildFunctionInfo, } from './function_context.js';
2
+ export { type ChatImage, type ChatAudio, type ChatContent, ChatRole, ChatMessage, ChatContext, } from './chat_context.js';
3
+ export { type ChoiceDelta, type CompletionUsage, type Choice, type ChatChunk, type LLMCallbacks, LLMEvent, LLM, LLMStream, } from './llm.js';
4
+ //# sourceMappingURL=index.d.ts.map
package/dist/llm/llm.cjs CHANGED
@@ -47,13 +47,13 @@ class LLMStream {
47
47
  }
48
48
  async monitorMetrics() {
49
49
  const startTime = process.hrtime.bigint();
50
- let ttft;
50
+ let ttft = BigInt(-1);
51
51
  let requestId = "";
52
52
  let usage;
53
53
  for await (const ev of this.queue) {
54
54
  this.output.put(ev);
55
55
  requestId = ev.requestId;
56
- if (!ttft) {
56
+ if (ttft === BigInt(-1)) {
57
57
  ttft = process.hrtime.bigint() - startTime;
58
58
  }
59
59
  if (ev.usage) {
@@ -65,7 +65,7 @@ class LLMStream {
65
65
  const metrics = {
66
66
  timestamp: Date.now(),
67
67
  requestId,
68
- ttft: Math.trunc(Number(ttft / BigInt(1e6))),
68
+ ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1e6))),
69
69
  duration: Math.trunc(Number(duration / BigInt(1e6))),
70
70
  cancelled: false,
71
71
  // XXX(nbsp)
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport { AsyncIterableQueue } from '../utils.js';\nimport type { ChatContext, ChatRole } from './chat_context.js';\nimport type { FunctionCallInfo, FunctionContext } from './function_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCallInfo[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n totalTokens: number;\n}\n\nexport interface Choice {\n delta: ChoiceDelta;\n index: number;\n}\n\nexport interface ChatChunk {\n requestId: string;\n choices: Choice[];\n usage?: CompletionUsage;\n}\n\nexport enum LLMEvent {\n METRICS_COLLECTED,\n}\n\nexport type LLMCallbacks = {\n [LLMEvent.METRICS_COLLECTED]: (metrics: LLMMetrics) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n fncCtx,\n temperature,\n n,\n parallelToolCalls,\n }: {\n chatCtx: ChatContext;\n fncCtx?: FunctionContext;\n temperature?: number;\n n?: number;\n parallelToolCalls?: boolean;\n }): LLMStream;\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected _functionCalls: FunctionCallInfo[] = [];\n abstract label: string;\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #fncCtx?: FunctionContext;\n\n constructor(llm: LLM, chatCtx: ChatContext, fncCtx?: FunctionContext) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.monitorMetrics();\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint | undefined;\n let requestId = '';\n let usage: CompletionUsage | undefined;\n\n for await (const ev of this.queue) {\n this.output.put(ev);\n requestId = ev.requestId;\n if (!ttft) {\n ttft = process.hrtime.bigint() - startTime;\n }\n if (ev.usage) {\n usage = ev.usage;\n }\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: LLMMetrics = {\n timestamp: Date.now(),\n requestId,\n ttft: Math.trunc(Number(ttft! / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n cancelled: false, // XXX(nbsp)\n label: this.label,\n completionTokens: usage?.completionTokens || 0,\n promptTokens: usage?.promptTokens || 0,\n totalTokens: usage?.totalTokens || 0,\n tokensPerSecond:\n (usage?.completionTokens || 0) / Math.trunc(Number(duration / BigInt(1000000000))),\n };\n this.#llm.emit(LLMEvent.METRICS_COLLECTED, metrics);\n }\n\n /** List of called functions from this stream. */\n get functionCalls(): FunctionCallInfo[] {\n return this._functionCalls;\n }\n\n /** The function context of this stream. */\n get fncCtx(): FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n /** The initial chat context of this stream. */\n get chatCtx(): ChatContext {\n return this.#chatCtx;\n }\n\n /** Execute all deferred functions of this stream concurrently. */\n executeFunctions(): FunctionCallInfo[] {\n this._functionCalls.forEach(\n (f) =>\n (f.task = f.func.execute(f.params).then(\n (result) => ({ name: f.name, toolCallId: f.toolCallId, result }),\n (error) => ({ name: f.name, toolCallId: f.toolCallId, error }),\n )),\n );\n return this._functionCalls;\n }\n\n next(): Promise<IteratorResult<ChatChunk>> {\n return this.output.next();\n }\n\n close() {\n this.output.close();\n this.queue.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): LLMStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,yBAA6B;AAE7B,mBAAmC;AA2B5B,IAAK,WAAL,kBAAKA,cAAL;AACL,EAAAA,oBAAA;AADU,SAAAA;AAAA,GAAA;AAQL,MAAe,YAAa,gCAAsD;AAiBzF;AAEO,MAAe,UAAsD;AAAA,EAChE,SAAS,IAAI,gCAA8B;AAAA,EAC3C,QAAQ,IAAI,gCAA8B;AAAA,EAC1C,SAAS;AAAA,EACT,iBAAqC,CAAC;AAAA,EAGhD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,SAAsB,QAA0B;AACpE,SAAK,OAAO;AACZ,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI;AACJ,QAAI,YAAY;AAChB,QAAI;AAEJ,qBAAiB,MAAM,KAAK,OAAO;AACjC,WAAK,OAAO,IAAI,EAAE;AAClB,kBAAY,GAAG;AACf,UAAI,CAAC,MAAM;AACT,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,UAAI,GAAG,OAAO;AACZ,gBAAQ,GAAG;AAAA,MACb;AAAA,IACF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,KAAK,MAAM,OAAO,OAAQ,OAAO,GAAO,CAAC,CAAC;AAAA,MAChD,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,WAAW;AAAA;AAAA,MACX,OAAO,KAAK;AAAA,MACZ,mBAAkB,+BAAO,qBAAoB;AAAA,MAC7C,eAAc,+BAAO,iBAAgB;AAAA,MACrC,cAAa,+BAAO,gBAAe;AAAA,MACnC,mBACG,+BAAO,qBAAoB,KAAK,KAAK,MAAM,OAAO,WAAW,OAAO,GAAU,CAAC,CAAC;AAAA,IACrF;AACA,SAAK,KAAK,KAAK,2BAA4B,OAAO;AAAA,EACpD;AAAA;AAAA,EAGA,IAAI,gBAAoC;AACtC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,SAAsC;AACxC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,mBAAuC;AACrC,SAAK,eAAe;AAAA,MAClB,CAAC,MACE,EAAE,OAAO,EAAE,KAAK,QAAQ,EAAE,MAAM,EAAE;AAAA,QACjC,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,OAAO;AAAA,QAC9D,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,MAAM;AAAA,MAC9D;AAAA,IACJ;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA,EAEA,QAAQ;AACN,SAAK,OAAO,MAAM;AAClB,SAAK,MAAM,MAAM;AACjB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["LLMEvent"]}
1
+ {"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport { AsyncIterableQueue } from '../utils.js';\nimport type { ChatContext, ChatRole } from './chat_context.js';\nimport type { FunctionCallInfo, FunctionContext } from './function_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCallInfo[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n totalTokens: number;\n}\n\nexport interface Choice {\n delta: ChoiceDelta;\n index: number;\n}\n\nexport interface ChatChunk {\n requestId: string;\n choices: Choice[];\n usage?: CompletionUsage;\n}\n\nexport enum LLMEvent {\n METRICS_COLLECTED,\n}\n\nexport type LLMCallbacks = {\n [LLMEvent.METRICS_COLLECTED]: (metrics: LLMMetrics) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n fncCtx,\n temperature,\n n,\n parallelToolCalls,\n }: {\n chatCtx: ChatContext;\n fncCtx?: FunctionContext;\n temperature?: number;\n n?: number;\n parallelToolCalls?: boolean;\n }): LLMStream;\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected _functionCalls: FunctionCallInfo[] = [];\n abstract label: string;\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #fncCtx?: FunctionContext;\n\n constructor(llm: LLM, chatCtx: ChatContext, fncCtx?: FunctionContext) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.monitorMetrics();\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint = BigInt(-1);\n let requestId = '';\n let usage: CompletionUsage | undefined;\n\n for await (const ev of this.queue) {\n this.output.put(ev);\n requestId = ev.requestId;\n if (ttft === BigInt(-1)) {\n ttft = process.hrtime.bigint() - startTime;\n }\n if (ev.usage) {\n usage = ev.usage;\n }\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: LLMMetrics = {\n timestamp: Date.now(),\n requestId,\n ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n cancelled: false, // XXX(nbsp)\n label: this.label,\n completionTokens: usage?.completionTokens || 0,\n promptTokens: usage?.promptTokens || 0,\n totalTokens: usage?.totalTokens || 0,\n tokensPerSecond:\n (usage?.completionTokens || 0) / Math.trunc(Number(duration / BigInt(1000000000))),\n };\n this.#llm.emit(LLMEvent.METRICS_COLLECTED, metrics);\n }\n\n /** List of called functions from this stream. */\n get functionCalls(): FunctionCallInfo[] {\n return this._functionCalls;\n }\n\n /** The function context of this stream. */\n get fncCtx(): FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n /** The initial chat context of this stream. */\n get chatCtx(): ChatContext {\n return this.#chatCtx;\n }\n\n /** Execute all deferred functions of this stream concurrently. */\n executeFunctions(): FunctionCallInfo[] {\n this._functionCalls.forEach(\n (f) =>\n (f.task = f.func.execute(f.params).then(\n (result) => ({ name: f.name, toolCallId: f.toolCallId, result }),\n (error) => ({ name: f.name, toolCallId: f.toolCallId, error }),\n )),\n );\n return this._functionCalls;\n }\n\n next(): Promise<IteratorResult<ChatChunk>> {\n return this.output.next();\n }\n\n close() {\n this.output.close();\n this.queue.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): LLMStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,yBAA6B;AAE7B,mBAAmC;AA2B5B,IAAK,WAAL,kBAAKA,cAAL;AACL,EAAAA,oBAAA;AADU,SAAAA;AAAA,GAAA;AAQL,MAAe,YAAa,gCAAsD;AAiBzF;AAEO,MAAe,UAAsD;AAAA,EAChE,SAAS,IAAI,gCAA8B;AAAA,EAC3C,QAAQ,IAAI,gCAA8B;AAAA,EAC1C,SAAS;AAAA,EACT,iBAAqC,CAAC;AAAA,EAGhD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,SAAsB,QAA0B;AACpE,SAAK,OAAO;AACZ,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAChB,QAAI;AAEJ,qBAAiB,MAAM,KAAK,OAAO;AACjC,WAAK,OAAO,IAAI,EAAE;AAClB,kBAAY,GAAG;AACf,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,UAAI,GAAG,OAAO;AACZ,gBAAQ,GAAG;AAAA,MACb;AAAA,IACF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,WAAW;AAAA;AAAA,MACX,OAAO,KAAK;AAAA,MACZ,mBAAkB,+BAAO,qBAAoB;AAAA,MAC7C,eAAc,+BAAO,iBAAgB;AAAA,MACrC,cAAa,+BAAO,gBAAe;AAAA,MACnC,mBACG,+BAAO,qBAAoB,KAAK,KAAK,MAAM,OAAO,WAAW,OAAO,GAAU,CAAC,CAAC;AAAA,IACrF;AACA,SAAK,KAAK,KAAK,2BAA4B,OAAO;AAAA,EACpD;AAAA;AAAA,EAGA,IAAI,gBAAoC;AACtC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,SAAsC;AACxC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,mBAAuC;AACrC,SAAK,eAAe;AAAA,MAClB,CAAC,MACE,EAAE,OAAO,EAAE,KAAK,QAAQ,EAAE,MAAM,EAAE;AAAA,QACjC,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,OAAO;AAAA,QAC9D,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,MAAM;AAAA,MAC9D;AAAA,IACJ;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA,EAEA,QAAQ;AACN,SAAK,OAAO,MAAM;AAClB,SAAK,MAAM,MAAM;AACjB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["LLMEvent"]}
@@ -0,0 +1,66 @@
1
+ import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
2
+ import type { LLMMetrics } from '../metrics/base.js';
3
+ import { AsyncIterableQueue } from '../utils.js';
4
+ import type { ChatContext, ChatRole } from './chat_context.js';
5
+ import type { FunctionCallInfo, FunctionContext } from './function_context.js';
6
+ export interface ChoiceDelta {
7
+ role: ChatRole;
8
+ content?: string;
9
+ toolCalls?: FunctionCallInfo[];
10
+ }
11
+ export interface CompletionUsage {
12
+ completionTokens: number;
13
+ promptTokens: number;
14
+ totalTokens: number;
15
+ }
16
+ export interface Choice {
17
+ delta: ChoiceDelta;
18
+ index: number;
19
+ }
20
+ export interface ChatChunk {
21
+ requestId: string;
22
+ choices: Choice[];
23
+ usage?: CompletionUsage;
24
+ }
25
+ export declare enum LLMEvent {
26
+ METRICS_COLLECTED = 0
27
+ }
28
+ export type LLMCallbacks = {
29
+ [LLMEvent.METRICS_COLLECTED]: (metrics: LLMMetrics) => void;
30
+ };
31
+ declare const LLM_base: new () => TypedEmitter<LLMCallbacks>;
32
+ export declare abstract class LLM extends LLM_base {
33
+ /**
34
+ * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.
35
+ */
36
+ abstract chat({ chatCtx, fncCtx, temperature, n, parallelToolCalls, }: {
37
+ chatCtx: ChatContext;
38
+ fncCtx?: FunctionContext;
39
+ temperature?: number;
40
+ n?: number;
41
+ parallelToolCalls?: boolean;
42
+ }): LLMStream;
43
+ }
44
+ export declare abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {
45
+ #private;
46
+ protected output: AsyncIterableQueue<ChatChunk>;
47
+ protected queue: AsyncIterableQueue<ChatChunk>;
48
+ protected closed: boolean;
49
+ protected _functionCalls: FunctionCallInfo[];
50
+ abstract label: string;
51
+ constructor(llm: LLM, chatCtx: ChatContext, fncCtx?: FunctionContext);
52
+ protected monitorMetrics(): Promise<void>;
53
+ /** List of called functions from this stream. */
54
+ get functionCalls(): FunctionCallInfo[];
55
+ /** The function context of this stream. */
56
+ get fncCtx(): FunctionContext | undefined;
57
+ /** The initial chat context of this stream. */
58
+ get chatCtx(): ChatContext;
59
+ /** Execute all deferred functions of this stream concurrently. */
60
+ executeFunctions(): FunctionCallInfo[];
61
+ next(): Promise<IteratorResult<ChatChunk>>;
62
+ close(): void;
63
+ [Symbol.asyncIterator](): LLMStream;
64
+ }
65
+ export {};
66
+ //# sourceMappingURL=llm.d.ts.map
package/dist/llm/llm.js CHANGED
@@ -22,13 +22,13 @@ class LLMStream {
22
22
  }
23
23
  async monitorMetrics() {
24
24
  const startTime = process.hrtime.bigint();
25
- let ttft;
25
+ let ttft = BigInt(-1);
26
26
  let requestId = "";
27
27
  let usage;
28
28
  for await (const ev of this.queue) {
29
29
  this.output.put(ev);
30
30
  requestId = ev.requestId;
31
- if (!ttft) {
31
+ if (ttft === BigInt(-1)) {
32
32
  ttft = process.hrtime.bigint() - startTime;
33
33
  }
34
34
  if (ev.usage) {
@@ -40,7 +40,7 @@ class LLMStream {
40
40
  const metrics = {
41
41
  timestamp: Date.now(),
42
42
  requestId,
43
- ttft: Math.trunc(Number(ttft / BigInt(1e6))),
43
+ ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1e6))),
44
44
  duration: Math.trunc(Number(duration / BigInt(1e6))),
45
45
  cancelled: false,
46
46
  // XXX(nbsp)
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport { AsyncIterableQueue } from '../utils.js';\nimport type { ChatContext, ChatRole } from './chat_context.js';\nimport type { FunctionCallInfo, FunctionContext } from './function_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCallInfo[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n totalTokens: number;\n}\n\nexport interface Choice {\n delta: ChoiceDelta;\n index: number;\n}\n\nexport interface ChatChunk {\n requestId: string;\n choices: Choice[];\n usage?: CompletionUsage;\n}\n\nexport enum LLMEvent {\n METRICS_COLLECTED,\n}\n\nexport type LLMCallbacks = {\n [LLMEvent.METRICS_COLLECTED]: (metrics: LLMMetrics) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n fncCtx,\n temperature,\n n,\n parallelToolCalls,\n }: {\n chatCtx: ChatContext;\n fncCtx?: FunctionContext;\n temperature?: number;\n n?: number;\n parallelToolCalls?: boolean;\n }): LLMStream;\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected _functionCalls: FunctionCallInfo[] = [];\n abstract label: string;\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #fncCtx?: FunctionContext;\n\n constructor(llm: LLM, chatCtx: ChatContext, fncCtx?: FunctionContext) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.monitorMetrics();\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint | undefined;\n let requestId = '';\n let usage: CompletionUsage | undefined;\n\n for await (const ev of this.queue) {\n this.output.put(ev);\n requestId = ev.requestId;\n if (!ttft) {\n ttft = process.hrtime.bigint() - startTime;\n }\n if (ev.usage) {\n usage = ev.usage;\n }\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: LLMMetrics = {\n timestamp: Date.now(),\n requestId,\n ttft: Math.trunc(Number(ttft! / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n cancelled: false, // XXX(nbsp)\n label: this.label,\n completionTokens: usage?.completionTokens || 0,\n promptTokens: usage?.promptTokens || 0,\n totalTokens: usage?.totalTokens || 0,\n tokensPerSecond:\n (usage?.completionTokens || 0) / Math.trunc(Number(duration / BigInt(1000000000))),\n };\n this.#llm.emit(LLMEvent.METRICS_COLLECTED, metrics);\n }\n\n /** List of called functions from this stream. */\n get functionCalls(): FunctionCallInfo[] {\n return this._functionCalls;\n }\n\n /** The function context of this stream. */\n get fncCtx(): FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n /** The initial chat context of this stream. */\n get chatCtx(): ChatContext {\n return this.#chatCtx;\n }\n\n /** Execute all deferred functions of this stream concurrently. */\n executeFunctions(): FunctionCallInfo[] {\n this._functionCalls.forEach(\n (f) =>\n (f.task = f.func.execute(f.params).then(\n (result) => ({ name: f.name, toolCallId: f.toolCallId, result }),\n (error) => ({ name: f.name, toolCallId: f.toolCallId, error }),\n )),\n );\n return this._functionCalls;\n }\n\n next(): Promise<IteratorResult<ChatChunk>> {\n return this.output.next();\n }\n\n close() {\n this.output.close();\n this.queue.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): LLMStream {\n return this;\n }\n}\n"],"mappings":"AAIA,SAAS,oBAAoB;AAE7B,SAAS,0BAA0B;AA2B5B,IAAK,WAAL,kBAAKA,cAAL;AACL,EAAAA,oBAAA;AADU,SAAAA;AAAA,GAAA;AAQL,MAAe,YAAa,aAAsD;AAiBzF;AAEO,MAAe,UAAsD;AAAA,EAChE,SAAS,IAAI,mBAA8B;AAAA,EAC3C,QAAQ,IAAI,mBAA8B;AAAA,EAC1C,SAAS;AAAA,EACT,iBAAqC,CAAC;AAAA,EAGhD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,SAAsB,QAA0B;AACpE,SAAK,OAAO;AACZ,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI;AACJ,QAAI,YAAY;AAChB,QAAI;AAEJ,qBAAiB,MAAM,KAAK,OAAO;AACjC,WAAK,OAAO,IAAI,EAAE;AAClB,kBAAY,GAAG;AACf,UAAI,CAAC,MAAM;AACT,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,UAAI,GAAG,OAAO;AACZ,gBAAQ,GAAG;AAAA,MACb;AAAA,IACF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,KAAK,MAAM,OAAO,OAAQ,OAAO,GAAO,CAAC,CAAC;AAAA,MAChD,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,WAAW;AAAA;AAAA,MACX,OAAO,KAAK;AAAA,MACZ,mBAAkB,+BAAO,qBAAoB;AAAA,MAC7C,eAAc,+BAAO,iBAAgB;AAAA,MACrC,cAAa,+BAAO,gBAAe;AAAA,MACnC,mBACG,+BAAO,qBAAoB,KAAK,KAAK,MAAM,OAAO,WAAW,OAAO,GAAU,CAAC,CAAC;AAAA,IACrF;AACA,SAAK,KAAK,KAAK,2BAA4B,OAAO;AAAA,EACpD;AAAA;AAAA,EAGA,IAAI,gBAAoC;AACtC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,SAAsC;AACxC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,mBAAuC;AACrC,SAAK,eAAe;AAAA,MAClB,CAAC,MACE,EAAE,OAAO,EAAE,KAAK,QAAQ,EAAE,MAAM,EAAE;AAAA,QACjC,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,OAAO;AAAA,QAC9D,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,MAAM;AAAA,MAC9D;AAAA,IACJ;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA,EAEA,QAAQ;AACN,SAAK,OAAO,MAAM;AAClB,SAAK,MAAM,MAAM;AACjB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["LLMEvent"]}
1
+ {"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport { AsyncIterableQueue } from '../utils.js';\nimport type { ChatContext, ChatRole } from './chat_context.js';\nimport type { FunctionCallInfo, FunctionContext } from './function_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCallInfo[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n totalTokens: number;\n}\n\nexport interface Choice {\n delta: ChoiceDelta;\n index: number;\n}\n\nexport interface ChatChunk {\n requestId: string;\n choices: Choice[];\n usage?: CompletionUsage;\n}\n\nexport enum LLMEvent {\n METRICS_COLLECTED,\n}\n\nexport type LLMCallbacks = {\n [LLMEvent.METRICS_COLLECTED]: (metrics: LLMMetrics) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n fncCtx,\n temperature,\n n,\n parallelToolCalls,\n }: {\n chatCtx: ChatContext;\n fncCtx?: FunctionContext;\n temperature?: number;\n n?: number;\n parallelToolCalls?: boolean;\n }): LLMStream;\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected _functionCalls: FunctionCallInfo[] = [];\n abstract label: string;\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #fncCtx?: FunctionContext;\n\n constructor(llm: LLM, chatCtx: ChatContext, fncCtx?: FunctionContext) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.monitorMetrics();\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint = BigInt(-1);\n let requestId = '';\n let usage: CompletionUsage | undefined;\n\n for await (const ev of this.queue) {\n this.output.put(ev);\n requestId = ev.requestId;\n if (ttft === BigInt(-1)) {\n ttft = process.hrtime.bigint() - startTime;\n }\n if (ev.usage) {\n usage = ev.usage;\n }\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: LLMMetrics = {\n timestamp: Date.now(),\n requestId,\n ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n cancelled: false, // XXX(nbsp)\n label: this.label,\n completionTokens: usage?.completionTokens || 0,\n promptTokens: usage?.promptTokens || 0,\n totalTokens: usage?.totalTokens || 0,\n tokensPerSecond:\n (usage?.completionTokens || 0) / Math.trunc(Number(duration / BigInt(1000000000))),\n };\n this.#llm.emit(LLMEvent.METRICS_COLLECTED, metrics);\n }\n\n /** List of called functions from this stream. */\n get functionCalls(): FunctionCallInfo[] {\n return this._functionCalls;\n }\n\n /** The function context of this stream. */\n get fncCtx(): FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n /** The initial chat context of this stream. */\n get chatCtx(): ChatContext {\n return this.#chatCtx;\n }\n\n /** Execute all deferred functions of this stream concurrently. */\n executeFunctions(): FunctionCallInfo[] {\n this._functionCalls.forEach(\n (f) =>\n (f.task = f.func.execute(f.params).then(\n (result) => ({ name: f.name, toolCallId: f.toolCallId, result }),\n (error) => ({ name: f.name, toolCallId: f.toolCallId, error }),\n )),\n );\n return this._functionCalls;\n }\n\n next(): Promise<IteratorResult<ChatChunk>> {\n return this.output.next();\n }\n\n close() {\n this.output.close();\n this.queue.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): LLMStream {\n return this;\n }\n}\n"],"mappings":"AAIA,SAAS,oBAAoB;AAE7B,SAAS,0BAA0B;AA2B5B,IAAK,WAAL,kBAAKA,cAAL;AACL,EAAAA,oBAAA;AADU,SAAAA;AAAA,GAAA;AAQL,MAAe,YAAa,aAAsD;AAiBzF;AAEO,MAAe,UAAsD;AAAA,EAChE,SAAS,IAAI,mBAA8B;AAAA,EAC3C,QAAQ,IAAI,mBAA8B;AAAA,EAC1C,SAAS;AAAA,EACT,iBAAqC,CAAC;AAAA,EAGhD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,SAAsB,QAA0B;AACpE,SAAK,OAAO;AACZ,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAChB,QAAI;AAEJ,qBAAiB,MAAM,KAAK,OAAO;AACjC,WAAK,OAAO,IAAI,EAAE;AAClB,kBAAY,GAAG;AACf,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,UAAI,GAAG,OAAO;AACZ,gBAAQ,GAAG;AAAA,MACb;AAAA,IACF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,WAAW;AAAA;AAAA,MACX,OAAO,KAAK;AAAA,MACZ,mBAAkB,+BAAO,qBAAoB;AAAA,MAC7C,eAAc,+BAAO,iBAAgB;AAAA,MACrC,cAAa,+BAAO,gBAAe;AAAA,MACnC,mBACG,+BAAO,qBAAoB,KAAK,KAAK,MAAM,OAAO,WAAW,OAAO,GAAU,CAAC,CAAC;AAAA,IACrF;AACA,SAAK,KAAK,KAAK,2BAA4B,OAAO;AAAA,EACpD;AAAA;AAAA,EAGA,IAAI,gBAAoC;AACtC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,SAAsC;AACxC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,mBAAuC;AACrC,SAAK,eAAe;AAAA,MAClB,CAAC,MACE,EAAE,OAAO,EAAE,KAAK,QAAQ,EAAE,MAAM,EAAE;AAAA,QACjC,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,OAAO;AAAA,QAC9D,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,YAAY,EAAE,YAAY,MAAM;AAAA,MAC9D;AAAA,IACJ;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA,EAEA,QAAQ;AACN,SAAK,OAAO,MAAM;AAClB,SAAK,MAAM,MAAM;AACjB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["LLMEvent"]}
package/dist/log.d.cts ADDED
@@ -0,0 +1,13 @@
1
+ import type { Logger } from 'pino';
2
+ /** @internal */
3
+ export type LoggerOptions = {
4
+ pretty: boolean;
5
+ level?: string;
6
+ };
7
+ /** @internal */
8
+ export declare let loggerOptions: LoggerOptions;
9
+ /** @internal */
10
+ export declare const log: () => Logger;
11
+ /** @internal */
12
+ export declare const initializeLogger: ({ pretty, level }: LoggerOptions) => void;
13
+ //# sourceMappingURL=log.d.ts.map
@@ -0,0 +1,96 @@
1
+ export interface LLMMetrics {
2
+ requestId: string;
3
+ timestamp: number;
4
+ ttft: number;
5
+ duration: number;
6
+ label: string;
7
+ cancelled: boolean;
8
+ completionTokens: number;
9
+ promptTokens: number;
10
+ totalTokens: number;
11
+ tokensPerSecond: number;
12
+ error?: Error;
13
+ }
14
+ export interface STTMetrics {
15
+ requestId: string;
16
+ timestamp: number;
17
+ duration: number;
18
+ label: string;
19
+ audioDuration: number;
20
+ streamed: boolean;
21
+ error?: Error;
22
+ }
23
+ export interface TTSMetrics {
24
+ requestId: string;
25
+ timestamp: number;
26
+ ttfb: number;
27
+ duration: number;
28
+ label: string;
29
+ audioDuration: number;
30
+ cancelled: boolean;
31
+ charactersCount: number;
32
+ streamed: boolean;
33
+ error?: Error;
34
+ }
35
+ export interface VADMetrics {
36
+ timestamp: number;
37
+ idleTime: number;
38
+ inferenceDurationTotal: number;
39
+ inferenceCount: number;
40
+ label: string;
41
+ }
42
+ export interface PipelineEOUMetrics {
43
+ /**
44
+ * Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics
45
+ */
46
+ sequenceId: string;
47
+ /** Timestamp of when the event was recorded */
48
+ timestamp: number;
49
+ /** Amount of time between the end of speech from VAD and the decision to end the user's turn */
50
+ endOfUtteranceDelay: number;
51
+ /**
52
+ * Time taken to obtain the transcript after the end of the user's speech.
53
+ *
54
+ * @remarks
55
+ * May be 0 if the transcript was already available.
56
+ */
57
+ transcriptionDelay: number;
58
+ }
59
+ export interface PipelineLLMMetrics extends LLMMetrics {
60
+ /**
61
+ * Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics
62
+ */
63
+ sequenceId: string;
64
+ }
65
+ export interface PipelineTTSMetrics extends TTSMetrics {
66
+ /**
67
+ * Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics
68
+ */
69
+ sequenceId: string;
70
+ }
71
+ export type PipelineSTTMetrics = STTMetrics;
72
+ export type PipelineVADMetrics = VADMetrics;
73
+ export declare class MultimodalLLMError extends Error {
74
+ type?: string;
75
+ reason?: string;
76
+ code?: string;
77
+ constructor({ type, reason, code, message, }?: {
78
+ type?: string;
79
+ reason?: string;
80
+ code?: string;
81
+ message?: string;
82
+ });
83
+ }
84
+ export interface MultimodalLLMMetrics extends LLMMetrics {
85
+ inputTokenDetails: {
86
+ cachedTokens: number;
87
+ textTokens: number;
88
+ audioTokens: number;
89
+ };
90
+ outputTokenDetails: {
91
+ textTokens: number;
92
+ audioTokens: number;
93
+ };
94
+ }
95
+ export type AgentMetrics = STTMetrics | LLMMetrics | TTSMetrics | VADMetrics | PipelineSTTMetrics | PipelineEOUMetrics | PipelineLLMMetrics | PipelineTTSMetrics | PipelineVADMetrics | MultimodalLLMMetrics;
96
+ //# sourceMappingURL=base.d.ts.map
@@ -0,0 +1,5 @@
1
+ export type { AgentMetrics, STTMetrics, LLMMetrics, TTSMetrics, VADMetrics, PipelineSTTMetrics, PipelineEOUMetrics, PipelineLLMMetrics, PipelineTTSMetrics, PipelineVADMetrics, MultimodalLLMMetrics, } from './base.js';
2
+ export { MultimodalLLMError } from './base.js';
3
+ export { type UsageSummary, UsageCollector } from './usage_collector.js';
4
+ export { logMetrics } from './utils.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,14 @@
1
+ import type { AgentMetrics } from './base.js';
2
+ export interface UsageSummary {
3
+ llmPromptTokens: number;
4
+ llmCompletionTokens: number;
5
+ ttsCharactersCount: number;
6
+ sttAudioDuration: number;
7
+ }
8
+ export declare class UsageCollector {
9
+ #private;
10
+ constructor();
11
+ collect(metrics: AgentMetrics): void;
12
+ get summary(): UsageSummary;
13
+ }
14
+ //# sourceMappingURL=usage_collector.d.ts.map