@livekit/agents 1.0.34 → 1.0.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +4 -4
- package/dist/inference/api_protos.d.ts +4 -4
- package/dist/inference/llm.cjs +30 -3
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +3 -1
- package/dist/inference/llm.d.ts +3 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +30 -3
- package/dist/inference/llm.js.map +1 -1
- package/dist/ipc/inference_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +1 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/llm/chat_context.cjs +20 -2
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +9 -0
- package/dist/llm/chat_context.d.ts +9 -0
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +20 -2
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -0
- package/dist/llm/llm.d.ts +1 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/openai.cjs +43 -20
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.d.ts.map +1 -1
- package/dist/llm/provider_format/openai.js +43 -20
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +35 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +35 -0
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/llm/provider_format/utils.cjs +1 -1
- package/dist/llm/provider_format/utils.cjs.map +1 -1
- package/dist/llm/provider_format/utils.d.ts.map +1 -1
- package/dist/llm/provider_format/utils.js +1 -1
- package/dist/llm/provider_format/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +19 -19
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +19 -19
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +64 -25
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +25 -1
- package/dist/voice/agent_session.d.ts +25 -1
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +64 -25
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/background_audio.cjs.map +1 -1
- package/dist/voice/generation.cjs +2 -1
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +2 -1
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +14 -1
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +3 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +1 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +1 -0
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/speech_handle.cjs +12 -3
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +12 -2
- package/dist/voice/speech_handle.d.ts +12 -2
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +10 -2
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/voice/testing/index.cjs +52 -0
- package/dist/voice/testing/index.cjs.map +1 -0
- package/dist/voice/testing/index.d.cts +20 -0
- package/dist/voice/testing/index.d.ts +20 -0
- package/dist/voice/testing/index.d.ts.map +1 -0
- package/dist/voice/testing/index.js +31 -0
- package/dist/voice/testing/index.js.map +1 -0
- package/dist/voice/testing/run_result.cjs +477 -0
- package/dist/voice/testing/run_result.cjs.map +1 -0
- package/dist/voice/testing/run_result.d.cts +226 -0
- package/dist/voice/testing/run_result.d.ts +226 -0
- package/dist/voice/testing/run_result.d.ts.map +1 -0
- package/dist/voice/testing/run_result.js +451 -0
- package/dist/voice/testing/run_result.js.map +1 -0
- package/dist/voice/testing/types.cjs +46 -0
- package/dist/voice/testing/types.cjs.map +1 -0
- package/dist/voice/testing/types.d.cts +83 -0
- package/dist/voice/testing/types.d.ts +83 -0
- package/dist/voice/testing/types.d.ts.map +1 -0
- package/dist/voice/testing/types.js +19 -0
- package/dist/voice/testing/types.js.map +1 -0
- package/package.json +3 -3
- package/src/inference/llm.ts +42 -3
- package/src/ipc/job_proc_lazy_main.ts +1 -1
- package/src/llm/chat_context.ts +32 -2
- package/src/llm/llm.ts +1 -0
- package/src/llm/provider_format/openai.test.ts +40 -0
- package/src/llm/provider_format/openai.ts +46 -19
- package/src/llm/provider_format/utils.ts +5 -1
- package/src/voice/agent_activity.ts +24 -22
- package/src/voice/agent_session.ts +73 -28
- package/src/voice/generation.ts +1 -0
- package/src/voice/index.ts +1 -0
- package/src/voice/room_io/room_io.ts +1 -0
- package/src/voice/speech_handle.ts +24 -4
- package/src/voice/testing/index.ts +49 -0
- package/src/voice/testing/run_result.ts +576 -0
- package/src/voice/testing/types.ts +118 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import type { AgentHandoffItem, ChatMessage, ChatRole, FunctionCall, FunctionCallOutput } from '../../llm/chat_context.js';
|
|
2
|
+
import type { Agent } from '../agent.js';
|
|
3
|
+
/**
|
|
4
|
+
* Event representing an assistant or user message in the conversation.
|
|
5
|
+
*/
|
|
6
|
+
export interface ChatMessageEvent {
|
|
7
|
+
type: 'message';
|
|
8
|
+
item: ChatMessage;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Event representing a function/tool call initiated by the LLM.
|
|
12
|
+
*/
|
|
13
|
+
export interface FunctionCallEvent {
|
|
14
|
+
type: 'function_call';
|
|
15
|
+
item: FunctionCall;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Event representing the output/result of a function call.
|
|
19
|
+
*/
|
|
20
|
+
export interface FunctionCallOutputEvent {
|
|
21
|
+
type: 'function_call_output';
|
|
22
|
+
item: FunctionCallOutput;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Event representing an agent handoff (switching from one agent to another).
|
|
26
|
+
*/
|
|
27
|
+
export interface AgentHandoffEvent {
|
|
28
|
+
type: 'agent_handoff';
|
|
29
|
+
item: AgentHandoffItem;
|
|
30
|
+
oldAgent?: Agent;
|
|
31
|
+
newAgent: Agent;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Union type of all possible run events that can occur during a test run.
|
|
35
|
+
*/
|
|
36
|
+
export type RunEvent = ChatMessageEvent | FunctionCallEvent | FunctionCallOutputEvent | AgentHandoffEvent;
|
|
37
|
+
/**
|
|
38
|
+
* Type guard to check if an event is a ChatMessageEvent.
|
|
39
|
+
*/
|
|
40
|
+
export declare function isChatMessageEvent(event: RunEvent): event is ChatMessageEvent;
|
|
41
|
+
/**
|
|
42
|
+
* Type guard to check if an event is a FunctionCallEvent.
|
|
43
|
+
*/
|
|
44
|
+
export declare function isFunctionCallEvent(event: RunEvent): event is FunctionCallEvent;
|
|
45
|
+
/**
|
|
46
|
+
* Type guard to check if an event is a FunctionCallOutputEvent.
|
|
47
|
+
*/
|
|
48
|
+
export declare function isFunctionCallOutputEvent(event: RunEvent): event is FunctionCallOutputEvent;
|
|
49
|
+
/**
|
|
50
|
+
* Type guard to check if an event is an AgentHandoffEvent.
|
|
51
|
+
*/
|
|
52
|
+
export declare function isAgentHandoffEvent(event: RunEvent): event is AgentHandoffEvent;
|
|
53
|
+
/**
|
|
54
|
+
* Options for message assertion.
|
|
55
|
+
*/
|
|
56
|
+
export interface MessageAssertOptions {
|
|
57
|
+
role?: ChatRole;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Options for function call assertion.
|
|
61
|
+
*/
|
|
62
|
+
export interface FunctionCallAssertOptions {
|
|
63
|
+
name?: string;
|
|
64
|
+
args?: Record<string, unknown>;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Options for function call output assertion.
|
|
68
|
+
*/
|
|
69
|
+
export interface FunctionCallOutputAssertOptions {
|
|
70
|
+
output?: string;
|
|
71
|
+
isError?: boolean;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Options for agent handoff assertion.
|
|
75
|
+
*/
|
|
76
|
+
export interface AgentHandoffAssertOptions {
|
|
77
|
+
newAgentType?: new (...args: any[]) => Agent;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Event type literals for type-safe event filtering.
|
|
81
|
+
*/
|
|
82
|
+
export type EventType = 'message' | 'function_call' | 'function_call_output' | 'agent_handoff';
|
|
83
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/voice/testing/types.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,gBAAgB,EAChB,WAAW,EACX,QAAQ,EACR,YAAY,EACZ,kBAAkB,EACnB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,SAAS,CAAC;IAChB,IAAI,EAAE,WAAW,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,YAAY,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,sBAAsB,CAAC;IAC7B,IAAI,EAAE,kBAAkB,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,gBAAgB,CAAC;IACvB,QAAQ,CAAC,EAAE,KAAK,CAAC;IACjB,QAAQ,EAAE,KAAK,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAChB,gBAAgB,GAChB,iBAAiB,GACjB,uBAAuB,GACvB,iBAAiB,CAAC;AAEtB;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,gBAAgB,CAE7E;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,uBAAuB,CAE3F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,IAAI,CAAC,EAAE,QAAQ,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IAExC,YAAY,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,KAAK,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,eAAe,GAAG,sBAAsB,GAAG,eAAe,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
function isChatMessageEvent(event) {
|
|
2
|
+
return event.type === "message";
|
|
3
|
+
}
|
|
4
|
+
function isFunctionCallEvent(event) {
|
|
5
|
+
return event.type === "function_call";
|
|
6
|
+
}
|
|
7
|
+
function isFunctionCallOutputEvent(event) {
|
|
8
|
+
return event.type === "function_call_output";
|
|
9
|
+
}
|
|
10
|
+
function isAgentHandoffEvent(event) {
|
|
11
|
+
return event.type === "agent_handoff";
|
|
12
|
+
}
|
|
13
|
+
export {
|
|
14
|
+
isAgentHandoffEvent,
|
|
15
|
+
isChatMessageEvent,
|
|
16
|
+
isFunctionCallEvent,
|
|
17
|
+
isFunctionCallOutputEvent
|
|
18
|
+
};
|
|
19
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/voice/testing/types.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n AgentHandoffItem,\n ChatMessage,\n ChatRole,\n FunctionCall,\n FunctionCallOutput,\n} from '../../llm/chat_context.js';\nimport type { Agent } from '../agent.js';\n\n/**\n * Event representing an assistant or user message in the conversation.\n */\nexport interface ChatMessageEvent {\n type: 'message';\n item: ChatMessage;\n}\n\n/**\n * Event representing a function/tool call initiated by the LLM.\n */\nexport interface FunctionCallEvent {\n type: 'function_call';\n item: FunctionCall;\n}\n\n/**\n * Event representing the output/result of a function call.\n */\nexport interface FunctionCallOutputEvent {\n type: 'function_call_output';\n item: FunctionCallOutput;\n}\n\n/**\n * Event representing an agent handoff (switching from one agent to another).\n */\nexport interface AgentHandoffEvent {\n type: 'agent_handoff';\n item: AgentHandoffItem;\n oldAgent?: Agent;\n newAgent: Agent;\n}\n\n/**\n * Union type of all possible run events that can occur during a test run.\n */\nexport type RunEvent =\n | ChatMessageEvent\n | FunctionCallEvent\n | FunctionCallOutputEvent\n | AgentHandoffEvent;\n\n/**\n * Type guard to check if an event is a ChatMessageEvent.\n */\nexport function isChatMessageEvent(event: RunEvent): event is ChatMessageEvent {\n return event.type === 'message';\n}\n\n/**\n * Type guard to check if an event is a FunctionCallEvent.\n */\nexport function isFunctionCallEvent(event: RunEvent): event is FunctionCallEvent {\n return event.type === 'function_call';\n}\n\n/**\n * Type guard to check if an event is a FunctionCallOutputEvent.\n */\nexport function isFunctionCallOutputEvent(event: RunEvent): event is FunctionCallOutputEvent {\n return event.type === 'function_call_output';\n}\n\n/**\n * Type guard to check if an event is an AgentHandoffEvent.\n */\nexport function isAgentHandoffEvent(event: RunEvent): event is AgentHandoffEvent {\n return event.type === 'agent_handoff';\n}\n\n/**\n * Options for message assertion.\n */\nexport interface MessageAssertOptions {\n role?: ChatRole;\n}\n\n/**\n * Options for function call assertion.\n */\nexport interface FunctionCallAssertOptions {\n name?: string;\n args?: Record<string, unknown>;\n}\n\n/**\n * Options for function call output assertion.\n */\nexport interface FunctionCallOutputAssertOptions {\n output?: string;\n isError?: boolean;\n}\n\n/**\n * Options for agent handoff assertion.\n */\nexport interface AgentHandoffAssertOptions {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n newAgentType?: new (...args: any[]) => Agent;\n}\n\n/**\n * Event type literals for type-safe event filtering.\n */\nexport type EventType = 'message' | 'function_call' | 'function_call_output' | 'agent_handoff';\n"],"mappings":"AA0DO,SAAS,mBAAmB,OAA4C;AAC7E,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,oBAAoB,OAA6C;AAC/E,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,0BAA0B,OAAmD;AAC3F,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,oBAAoB,OAA6C;AAC/E,SAAO,MAAM,SAAS;AACxB;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.36",
|
|
4
4
|
"description": "LiveKit Agents - Node.js",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"README.md"
|
|
27
27
|
],
|
|
28
28
|
"devDependencies": {
|
|
29
|
-
"@livekit/rtc-node": "^0.13.
|
|
29
|
+
"@livekit/rtc-node": "^0.13.24",
|
|
30
30
|
"@microsoft/api-extractor": "^7.35.0",
|
|
31
31
|
"@types/fluent-ffmpeg": "^2.1.28",
|
|
32
32
|
"@types/json-schema": "^7.0.15",
|
|
@@ -70,7 +70,7 @@
|
|
|
70
70
|
"zod-to-json-schema": "^3.24.6"
|
|
71
71
|
},
|
|
72
72
|
"peerDependencies": {
|
|
73
|
-
"@livekit/rtc-node": "^0.13.
|
|
73
|
+
"@livekit/rtc-node": "^0.13.24",
|
|
74
74
|
"zod": "^3.25.76 || ^4.1.8"
|
|
75
75
|
},
|
|
76
76
|
"scripts": {
|
package/src/inference/llm.ts
CHANGED
|
@@ -27,7 +27,14 @@ export type OpenAIModels =
|
|
|
27
27
|
| 'openai/gpt-4o-mini'
|
|
28
28
|
| 'openai/gpt-oss-120b';
|
|
29
29
|
|
|
30
|
-
export type GoogleModels =
|
|
30
|
+
export type GoogleModels =
|
|
31
|
+
| 'google/gemini-3-pro-preview'
|
|
32
|
+
| 'google/gemini-3-flash-preview'
|
|
33
|
+
| 'google/gemini-2.5-pro'
|
|
34
|
+
| 'google/gemini-2.5-flash'
|
|
35
|
+
| 'google/gemini-2.5-flash-lite'
|
|
36
|
+
| 'google/gemini-2.0-flash'
|
|
37
|
+
| 'google/gemini-2.0-flash-lite';
|
|
31
38
|
|
|
32
39
|
export type QwenModels = 'qwen/qwen3-235b-a22b-instruct';
|
|
33
40
|
|
|
@@ -235,6 +242,7 @@ export class LLMStream extends llm.LLMStream {
|
|
|
235
242
|
private toolIndex?: number;
|
|
236
243
|
private fncName?: string;
|
|
237
244
|
private fncRawArguments?: string;
|
|
245
|
+
private toolExtra?: Record<string, unknown>;
|
|
238
246
|
|
|
239
247
|
constructor(
|
|
240
248
|
llm: LLM,
|
|
@@ -277,6 +285,7 @@ export class LLMStream extends llm.LLMStream {
|
|
|
277
285
|
// (defined inside the run method to make sure the state is reset for each run/attempt)
|
|
278
286
|
let retryable = true;
|
|
279
287
|
this.toolCallId = this.fncName = this.fncRawArguments = this.toolIndex = undefined;
|
|
288
|
+
this.toolExtra = undefined;
|
|
280
289
|
|
|
281
290
|
try {
|
|
282
291
|
const messages = (await this.chatCtx.toProviderFormat(
|
|
@@ -428,6 +437,7 @@ export class LLMStream extends llm.LLMStream {
|
|
|
428
437
|
if (this.toolCallId && tool.id && tool.index !== this.toolIndex) {
|
|
429
438
|
callChunk = this.createRunningToolCallChunk(id, delta);
|
|
430
439
|
this.toolCallId = this.fncName = this.fncRawArguments = undefined;
|
|
440
|
+
this.toolExtra = undefined;
|
|
431
441
|
}
|
|
432
442
|
|
|
433
443
|
// Start or continue building the current tool call
|
|
@@ -436,6 +446,10 @@ export class LLMStream extends llm.LLMStream {
|
|
|
436
446
|
this.toolCallId = tool.id;
|
|
437
447
|
this.fncName = tool.function.name;
|
|
438
448
|
this.fncRawArguments = tool.function.arguments || '';
|
|
449
|
+
// Extract extra from tool call (e.g., Google thought signatures)
|
|
450
|
+
this.toolExtra =
|
|
451
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
452
|
+
((tool as any).extra_content as Record<string, unknown> | undefined) ?? undefined;
|
|
439
453
|
} else if (tool.function.arguments) {
|
|
440
454
|
this.fncRawArguments = (this.fncRawArguments || '') + tool.function.arguments;
|
|
441
455
|
}
|
|
@@ -454,11 +468,17 @@ export class LLMStream extends llm.LLMStream {
|
|
|
454
468
|
) {
|
|
455
469
|
const callChunk = this.createRunningToolCallChunk(id, delta);
|
|
456
470
|
this.toolCallId = this.fncName = this.fncRawArguments = undefined;
|
|
471
|
+
this.toolExtra = undefined;
|
|
457
472
|
return callChunk;
|
|
458
473
|
}
|
|
459
474
|
|
|
475
|
+
// Extract extra from delta (e.g., Google thought signatures on text parts)
|
|
476
|
+
const deltaExtra =
|
|
477
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
478
|
+
((delta as any).extra_content as Record<string, unknown> | undefined) ?? undefined;
|
|
479
|
+
|
|
460
480
|
// Regular content message
|
|
461
|
-
if (!delta.content) {
|
|
481
|
+
if (!delta.content && !deltaExtra) {
|
|
462
482
|
return undefined;
|
|
463
483
|
}
|
|
464
484
|
|
|
@@ -466,7 +486,8 @@ export class LLMStream extends llm.LLMStream {
|
|
|
466
486
|
id,
|
|
467
487
|
delta: {
|
|
468
488
|
role: 'assistant',
|
|
469
|
-
content: delta.content,
|
|
489
|
+
content: delta.content || undefined,
|
|
490
|
+
extra: deltaExtra,
|
|
470
491
|
},
|
|
471
492
|
};
|
|
472
493
|
}
|
|
@@ -475,19 +496,37 @@ export class LLMStream extends llm.LLMStream {
|
|
|
475
496
|
id: string,
|
|
476
497
|
delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta,
|
|
477
498
|
): llm.ChatChunk {
|
|
499
|
+
const toolExtra = this.toolExtra ? { ...this.toolExtra } : {};
|
|
500
|
+
const thoughtSignature = this.extractThoughtSignature(toolExtra);
|
|
501
|
+
const deltaExtra =
|
|
502
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
503
|
+
((delta as any).extra_content as Record<string, unknown> | undefined) ?? undefined;
|
|
504
|
+
|
|
478
505
|
return {
|
|
479
506
|
id,
|
|
480
507
|
delta: {
|
|
481
508
|
role: 'assistant',
|
|
482
509
|
content: delta.content || undefined,
|
|
510
|
+
extra: deltaExtra,
|
|
483
511
|
toolCalls: [
|
|
484
512
|
llm.FunctionCall.create({
|
|
485
513
|
callId: this.toolCallId || '',
|
|
486
514
|
name: this.fncName || '',
|
|
487
515
|
args: this.fncRawArguments || '',
|
|
516
|
+
extra: toolExtra,
|
|
517
|
+
thoughtSignature,
|
|
488
518
|
}),
|
|
489
519
|
],
|
|
490
520
|
},
|
|
491
521
|
};
|
|
492
522
|
}
|
|
523
|
+
|
|
524
|
+
private extractThoughtSignature(extra?: Record<string, unknown>): string | undefined {
|
|
525
|
+
const googleExtra = extra?.google;
|
|
526
|
+
if (googleExtra && typeof googleExtra === 'object') {
|
|
527
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
528
|
+
return (googleExtra as any).thoughtSignature || (googleExtra as any).thought_signature;
|
|
529
|
+
}
|
|
530
|
+
return undefined;
|
|
531
|
+
}
|
|
493
532
|
}
|
|
@@ -136,7 +136,7 @@ const startJob = (
|
|
|
136
136
|
shutdownTasks.push(callback());
|
|
137
137
|
}
|
|
138
138
|
await Promise.all(shutdownTasks).catch((error) =>
|
|
139
|
-
logger.error('error while shutting down the job'
|
|
139
|
+
logger.error({ error }, 'error while shutting down the job'),
|
|
140
140
|
);
|
|
141
141
|
|
|
142
142
|
process.send!({ case: 'done' });
|
package/src/llm/chat_context.ts
CHANGED
|
@@ -189,6 +189,12 @@ export class FunctionCall {
|
|
|
189
189
|
|
|
190
190
|
createdAt: number;
|
|
191
191
|
|
|
192
|
+
extra: Record<string, unknown>;
|
|
193
|
+
/**
|
|
194
|
+
* Optional grouping identifier for parallel tool calls.
|
|
195
|
+
*/
|
|
196
|
+
groupId?: string;
|
|
197
|
+
|
|
192
198
|
/**
|
|
193
199
|
* Opaque signature for Gemini thinking mode.
|
|
194
200
|
* When using Gemini 3+ models with thinking enabled, this signature must be
|
|
@@ -202,6 +208,8 @@ export class FunctionCall {
|
|
|
202
208
|
args: string;
|
|
203
209
|
id?: string;
|
|
204
210
|
createdAt?: number;
|
|
211
|
+
extra?: Record<string, unknown>;
|
|
212
|
+
groupId?: string;
|
|
205
213
|
thoughtSignature?: string;
|
|
206
214
|
}) {
|
|
207
215
|
const {
|
|
@@ -210,6 +218,8 @@ export class FunctionCall {
|
|
|
210
218
|
args,
|
|
211
219
|
id = shortuuid('item_'),
|
|
212
220
|
createdAt = Date.now(),
|
|
221
|
+
extra = {},
|
|
222
|
+
groupId,
|
|
213
223
|
thoughtSignature,
|
|
214
224
|
} = params;
|
|
215
225
|
this.id = id;
|
|
@@ -217,7 +227,15 @@ export class FunctionCall {
|
|
|
217
227
|
this.args = args;
|
|
218
228
|
this.name = name;
|
|
219
229
|
this.createdAt = createdAt;
|
|
220
|
-
this.
|
|
230
|
+
this.extra = { ...extra };
|
|
231
|
+
this.groupId = groupId;
|
|
232
|
+
this.thoughtSignature =
|
|
233
|
+
thoughtSignature ??
|
|
234
|
+
(typeof this.extra.google === 'object' && this.extra.google !== null
|
|
235
|
+
? // eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
236
|
+
(this.extra.google as any).thoughtSignature ||
|
|
237
|
+
(this.extra.google as any).thought_signature
|
|
238
|
+
: undefined);
|
|
221
239
|
}
|
|
222
240
|
|
|
223
241
|
static create(params: {
|
|
@@ -226,6 +244,8 @@ export class FunctionCall {
|
|
|
226
244
|
args: string;
|
|
227
245
|
id?: string;
|
|
228
246
|
createdAt?: number;
|
|
247
|
+
extra?: Record<string, unknown>;
|
|
248
|
+
groupId?: string;
|
|
229
249
|
thoughtSignature?: string;
|
|
230
250
|
}) {
|
|
231
251
|
return new FunctionCall(params);
|
|
@@ -241,6 +261,14 @@ export class FunctionCall {
|
|
|
241
261
|
args: this.args,
|
|
242
262
|
};
|
|
243
263
|
|
|
264
|
+
if (Object.keys(this.extra).length > 0) {
|
|
265
|
+
result.extra = this.extra as JSONValue;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (this.groupId) {
|
|
269
|
+
result.groupId = this.groupId;
|
|
270
|
+
}
|
|
271
|
+
|
|
244
272
|
if (this.thoughtSignature) {
|
|
245
273
|
result.thoughtSignature = this.thoughtSignature;
|
|
246
274
|
}
|
|
@@ -627,7 +655,9 @@ export class ChatContext {
|
|
|
627
655
|
a.name !== b.name ||
|
|
628
656
|
a.callId !== b.callId ||
|
|
629
657
|
a.args !== b.args ||
|
|
630
|
-
a.thoughtSignature !== b.thoughtSignature
|
|
658
|
+
a.thoughtSignature !== b.thoughtSignature ||
|
|
659
|
+
a.groupId !== b.groupId ||
|
|
660
|
+
JSON.stringify(a.extra) !== JSON.stringify(b.extra)
|
|
631
661
|
) {
|
|
632
662
|
return false;
|
|
633
663
|
}
|
package/src/llm/llm.ts
CHANGED
|
@@ -258,6 +258,46 @@ describe('toChatCtx', () => {
|
|
|
258
258
|
]);
|
|
259
259
|
});
|
|
260
260
|
|
|
261
|
+
it('should include provider-specific extra content on tool calls', async () => {
|
|
262
|
+
const ctx = ChatContext.empty();
|
|
263
|
+
const msg = ctx.addMessage({ role: 'assistant', content: 'Running tool' });
|
|
264
|
+
|
|
265
|
+
const toolCall = FunctionCall.create({
|
|
266
|
+
id: `${msg.id}/tool_1`,
|
|
267
|
+
callId: 'call_789',
|
|
268
|
+
name: 'google_call',
|
|
269
|
+
args: '{}',
|
|
270
|
+
extra: { google: { thoughtSignature: 'sig-123' } },
|
|
271
|
+
});
|
|
272
|
+
const toolOutput = FunctionCallOutput.create({
|
|
273
|
+
callId: 'call_789',
|
|
274
|
+
output: '{"result": "ok"}',
|
|
275
|
+
isError: false,
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
ctx.insert([toolCall, toolOutput]);
|
|
279
|
+
|
|
280
|
+
const result = await toChatCtx(ctx);
|
|
281
|
+
|
|
282
|
+
expect(result[0]).toEqual({
|
|
283
|
+
role: 'assistant',
|
|
284
|
+
content: 'Running tool',
|
|
285
|
+
tool_calls: [
|
|
286
|
+
{
|
|
287
|
+
type: 'function',
|
|
288
|
+
id: 'call_789',
|
|
289
|
+
function: { name: 'google_call', arguments: '{}' },
|
|
290
|
+
extra_content: { google: { thoughtSignature: 'sig-123' } },
|
|
291
|
+
},
|
|
292
|
+
],
|
|
293
|
+
});
|
|
294
|
+
expect(result[1]).toEqual({
|
|
295
|
+
role: 'tool',
|
|
296
|
+
tool_call_id: 'call_789',
|
|
297
|
+
content: '{"result": "ok"}',
|
|
298
|
+
});
|
|
299
|
+
});
|
|
300
|
+
|
|
261
301
|
it('should handle multiple tool calls in one message', async () => {
|
|
262
302
|
const ctx = ChatContext.empty();
|
|
263
303
|
|
|
@@ -17,11 +17,20 @@ export async function toChatCtx(chatCtx: ChatContext, injectDummyUserMessage: bo
|
|
|
17
17
|
? await toChatItem(group.message)
|
|
18
18
|
: { role: 'assistant' };
|
|
19
19
|
|
|
20
|
-
const toolCalls = group.toolCalls.map((toolCall) =>
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
const toolCalls = group.toolCalls.map((toolCall) => {
|
|
21
|
+
const tc: Record<string, any> = {
|
|
22
|
+
type: 'function',
|
|
23
|
+
id: toolCall.callId,
|
|
24
|
+
function: { name: toolCall.name, arguments: toolCall.args },
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
// Include provider-specific extra content (e.g., Google thought signatures)
|
|
28
|
+
const googleExtra = getGoogleExtra(toolCall);
|
|
29
|
+
if (googleExtra) {
|
|
30
|
+
tc.extra_content = { google: googleExtra };
|
|
31
|
+
}
|
|
32
|
+
return tc;
|
|
33
|
+
});
|
|
25
34
|
|
|
26
35
|
if (toolCalls.length > 0) {
|
|
27
36
|
message['tool_calls'] = toolCalls;
|
|
@@ -53,24 +62,33 @@ async function toChatItem(item: ChatItem) {
|
|
|
53
62
|
}
|
|
54
63
|
}
|
|
55
64
|
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
65
|
+
const result: Record<string, any> = { role: item.role };
|
|
66
|
+
if (listContent.length === 0) {
|
|
67
|
+
result.content = textContent;
|
|
68
|
+
} else {
|
|
69
|
+
if (textContent.length > 0) {
|
|
70
|
+
listContent.push({ type: 'text', text: textContent });
|
|
71
|
+
}
|
|
72
|
+
result.content = listContent;
|
|
73
|
+
}
|
|
62
74
|
|
|
63
|
-
return
|
|
75
|
+
return result;
|
|
64
76
|
} else if (item.type === 'function_call') {
|
|
77
|
+
const tc: Record<string, any> = {
|
|
78
|
+
id: item.callId,
|
|
79
|
+
type: 'function',
|
|
80
|
+
function: { name: item.name, arguments: item.args },
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// Include provider-specific extra content (e.g., Google thought signatures)
|
|
84
|
+
const googleExtra = getGoogleExtra(item);
|
|
85
|
+
if (googleExtra) {
|
|
86
|
+
tc.extra_content = { google: googleExtra };
|
|
87
|
+
}
|
|
88
|
+
|
|
65
89
|
return {
|
|
66
90
|
role: 'assistant',
|
|
67
|
-
tool_calls: [
|
|
68
|
-
{
|
|
69
|
-
id: item.callId,
|
|
70
|
-
type: 'function',
|
|
71
|
-
function: { name: item.name, arguments: item.args },
|
|
72
|
-
},
|
|
73
|
-
],
|
|
91
|
+
tool_calls: [tc],
|
|
74
92
|
};
|
|
75
93
|
} else if (item.type === 'function_call_output') {
|
|
76
94
|
return {
|
|
@@ -84,6 +102,15 @@ async function toChatItem(item: ChatItem) {
|
|
|
84
102
|
throw new Error(`Unsupported item type: ${item['type']}`);
|
|
85
103
|
}
|
|
86
104
|
|
|
105
|
+
function getGoogleExtra(
|
|
106
|
+
item: Partial<{ extra?: Record<string, unknown>; thoughtSignature?: string }>,
|
|
107
|
+
): Record<string, unknown> | undefined {
|
|
108
|
+
const googleExtra =
|
|
109
|
+
(item.extra?.google as Record<string, unknown> | undefined) ||
|
|
110
|
+
(item.thoughtSignature ? { thoughtSignature: item.thoughtSignature } : undefined);
|
|
111
|
+
return googleExtra;
|
|
112
|
+
}
|
|
113
|
+
|
|
87
114
|
async function toImageContent(content: ImageContent) {
|
|
88
115
|
const cacheKey = 'serialized_image'; // TODO: use hash of encoding options if available
|
|
89
116
|
let serialized: SerializedImage;
|
|
@@ -133,7 +133,11 @@ export function groupToolCalls(chatCtx: ChatContext) {
|
|
|
133
133
|
|
|
134
134
|
if (isAssistantMessage || isFunctionCall) {
|
|
135
135
|
// only assistant messages and function calls can be grouped
|
|
136
|
-
|
|
136
|
+
// For function calls, use group_id if available (for parallel function calls),
|
|
137
|
+
// otherwise fall back to id-based grouping for backwards compatibility
|
|
138
|
+
const groupId =
|
|
139
|
+
item.type === 'function_call' && item.groupId ? item.groupId : item.id.split('/')[0]!;
|
|
140
|
+
|
|
137
141
|
if (itemGroups[groupId] === undefined) {
|
|
138
142
|
itemGroups[groupId] = ChatItemGroup.create();
|
|
139
143
|
|
|
@@ -1350,11 +1350,14 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1350
1350
|
);
|
|
1351
1351
|
tasks.push(llmTask);
|
|
1352
1352
|
|
|
1353
|
-
const [ttsTextInput, llmOutput] = llmGenData.textStream.tee();
|
|
1354
|
-
|
|
1355
1353
|
let ttsTask: Task<void> | null = null;
|
|
1356
1354
|
let ttsStream: ReadableStream<AudioFrame> | null = null;
|
|
1355
|
+
let llmOutput: ReadableStream<string>;
|
|
1356
|
+
|
|
1357
1357
|
if (audioOutput) {
|
|
1358
|
+
// Only tee the stream when we need TTS
|
|
1359
|
+
const [ttsTextInput, textOutput] = llmGenData.textStream.tee();
|
|
1360
|
+
llmOutput = textOutput;
|
|
1358
1361
|
[ttsTask, ttsStream] = performTTSInference(
|
|
1359
1362
|
(...args) => this.agent.ttsNode(...args),
|
|
1360
1363
|
ttsTextInput,
|
|
@@ -1362,6 +1365,9 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1362
1365
|
replyAbortController,
|
|
1363
1366
|
);
|
|
1364
1367
|
tasks.push(ttsTask);
|
|
1368
|
+
} else {
|
|
1369
|
+
// No TTS needed, use the stream directly
|
|
1370
|
+
llmOutput = llmGenData.textStream;
|
|
1365
1371
|
}
|
|
1366
1372
|
|
|
1367
1373
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
|
|
@@ -1421,12 +1427,16 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1421
1427
|
//TODO(AJS-272): before executing tools, make sure we generated all the text
|
|
1422
1428
|
// (this ensure everything is kept ordered)
|
|
1423
1429
|
|
|
1424
|
-
const onToolExecutionStarted = (
|
|
1425
|
-
|
|
1430
|
+
const onToolExecutionStarted = (f: FunctionCall) => {
|
|
1431
|
+
speechHandle._itemAdded([f]);
|
|
1432
|
+
this.agent._chatCtx.items.push(f);
|
|
1433
|
+
this.agentSession._toolItemsAdded([f]);
|
|
1426
1434
|
};
|
|
1427
1435
|
|
|
1428
|
-
const onToolExecutionCompleted = (
|
|
1429
|
-
|
|
1436
|
+
const onToolExecutionCompleted = (out: ToolExecutionOutput) => {
|
|
1437
|
+
if (out.toolCallOutput) {
|
|
1438
|
+
speechHandle._itemAdded([out.toolCallOutput]);
|
|
1439
|
+
}
|
|
1430
1440
|
};
|
|
1431
1441
|
|
|
1432
1442
|
const [executeToolsTask, toolOutput] = performToolExecutions({
|
|
@@ -1501,6 +1511,7 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1501
1511
|
});
|
|
1502
1512
|
chatCtx.insert(message);
|
|
1503
1513
|
this.agent._chatCtx.insert(message);
|
|
1514
|
+
speechHandle._itemAdded([message]);
|
|
1504
1515
|
this.agentSession._conversationItemAdded(message);
|
|
1505
1516
|
}
|
|
1506
1517
|
|
|
@@ -1528,6 +1539,7 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1528
1539
|
});
|
|
1529
1540
|
chatCtx.insert(message);
|
|
1530
1541
|
this.agent._chatCtx.insert(message);
|
|
1542
|
+
speechHandle._itemAdded([message]);
|
|
1531
1543
|
this.agentSession._conversationItemAdded(message);
|
|
1532
1544
|
this.logger.info(
|
|
1533
1545
|
{ speech_id: speechHandle.id, message: textOut.text },
|
|
@@ -1612,28 +1624,18 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1612
1624
|
if (shouldGenerateToolReply) {
|
|
1613
1625
|
chatCtx.insert(toolMessages);
|
|
1614
1626
|
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
stepIndex: speechHandle._stepIndex + 1,
|
|
1618
|
-
parent: speechHandle,
|
|
1619
|
-
});
|
|
1620
|
-
this.agentSession.emit(
|
|
1621
|
-
AgentSessionEventTypes.SpeechCreated,
|
|
1622
|
-
createSpeechCreatedEvent({
|
|
1623
|
-
userInitiated: false,
|
|
1624
|
-
source: 'tool_response',
|
|
1625
|
-
speechHandle: handle,
|
|
1626
|
-
}),
|
|
1627
|
-
);
|
|
1627
|
+
// Increment step count on SAME handle (parity with Python agent_activity.py L2081)
|
|
1628
|
+
speechHandle._numSteps += 1;
|
|
1628
1629
|
|
|
1629
1630
|
// Avoid setting tool_choice to "required" or a specific function when
|
|
1630
1631
|
// passing tool response back to the LLM
|
|
1631
1632
|
const respondToolChoice = draining || modelSettings.toolChoice === 'none' ? 'none' : 'auto';
|
|
1632
1633
|
|
|
1634
|
+
// Reuse same speechHandle for tool response (parity with Python agent_activity.py L2122-2140)
|
|
1633
1635
|
const toolResponseTask = this.createSpeechTask({
|
|
1634
1636
|
task: Task.from(() =>
|
|
1635
1637
|
this.pipelineReplyTask(
|
|
1636
|
-
|
|
1638
|
+
speechHandle,
|
|
1637
1639
|
chatCtx,
|
|
1638
1640
|
toolCtx,
|
|
1639
1641
|
{ toolChoice: respondToolChoice },
|
|
@@ -1643,13 +1645,13 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1643
1645
|
toolMessages,
|
|
1644
1646
|
),
|
|
1645
1647
|
),
|
|
1646
|
-
ownedSpeechHandle:
|
|
1648
|
+
ownedSpeechHandle: speechHandle,
|
|
1647
1649
|
name: 'AgentActivity.pipelineReply',
|
|
1648
1650
|
});
|
|
1649
1651
|
|
|
1650
1652
|
toolResponseTask.finally(() => this.onPipelineReplyDone());
|
|
1651
1653
|
|
|
1652
|
-
this.scheduleSpeech(
|
|
1654
|
+
this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
|
|
1653
1655
|
} else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
|
|
1654
1656
|
for (const msg of toolMessages) {
|
|
1655
1657
|
msg.createdAt = replyStartedAt;
|