@livekit/agents 1.0.34 → 1.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/dist/cli.cjs.map +1 -1
  2. package/dist/inference/api_protos.d.cts +4 -4
  3. package/dist/inference/api_protos.d.ts +4 -4
  4. package/dist/inference/llm.cjs +30 -3
  5. package/dist/inference/llm.cjs.map +1 -1
  6. package/dist/inference/llm.d.cts +3 -1
  7. package/dist/inference/llm.d.ts +3 -1
  8. package/dist/inference/llm.d.ts.map +1 -1
  9. package/dist/inference/llm.js +30 -3
  10. package/dist/inference/llm.js.map +1 -1
  11. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  12. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  13. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  14. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  15. package/dist/ipc/job_proc_lazy_main.js +1 -1
  16. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  17. package/dist/llm/chat_context.cjs +20 -2
  18. package/dist/llm/chat_context.cjs.map +1 -1
  19. package/dist/llm/chat_context.d.cts +9 -0
  20. package/dist/llm/chat_context.d.ts +9 -0
  21. package/dist/llm/chat_context.d.ts.map +1 -1
  22. package/dist/llm/chat_context.js +20 -2
  23. package/dist/llm/chat_context.js.map +1 -1
  24. package/dist/llm/llm.cjs.map +1 -1
  25. package/dist/llm/llm.d.cts +1 -0
  26. package/dist/llm/llm.d.ts +1 -0
  27. package/dist/llm/llm.d.ts.map +1 -1
  28. package/dist/llm/llm.js.map +1 -1
  29. package/dist/llm/provider_format/openai.cjs +43 -20
  30. package/dist/llm/provider_format/openai.cjs.map +1 -1
  31. package/dist/llm/provider_format/openai.d.ts.map +1 -1
  32. package/dist/llm/provider_format/openai.js +43 -20
  33. package/dist/llm/provider_format/openai.js.map +1 -1
  34. package/dist/llm/provider_format/openai.test.cjs +35 -0
  35. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  36. package/dist/llm/provider_format/openai.test.js +35 -0
  37. package/dist/llm/provider_format/openai.test.js.map +1 -1
  38. package/dist/llm/provider_format/utils.cjs +1 -1
  39. package/dist/llm/provider_format/utils.cjs.map +1 -1
  40. package/dist/llm/provider_format/utils.d.ts.map +1 -1
  41. package/dist/llm/provider_format/utils.js +1 -1
  42. package/dist/llm/provider_format/utils.js.map +1 -1
  43. package/dist/voice/agent_activity.cjs +19 -19
  44. package/dist/voice/agent_activity.cjs.map +1 -1
  45. package/dist/voice/agent_activity.d.ts.map +1 -1
  46. package/dist/voice/agent_activity.js +19 -19
  47. package/dist/voice/agent_activity.js.map +1 -1
  48. package/dist/voice/agent_session.cjs +64 -25
  49. package/dist/voice/agent_session.cjs.map +1 -1
  50. package/dist/voice/agent_session.d.cts +25 -1
  51. package/dist/voice/agent_session.d.ts +25 -1
  52. package/dist/voice/agent_session.d.ts.map +1 -1
  53. package/dist/voice/agent_session.js +64 -25
  54. package/dist/voice/agent_session.js.map +1 -1
  55. package/dist/voice/background_audio.cjs.map +1 -1
  56. package/dist/voice/generation.cjs +2 -1
  57. package/dist/voice/generation.cjs.map +1 -1
  58. package/dist/voice/generation.d.ts.map +1 -1
  59. package/dist/voice/generation.js +2 -1
  60. package/dist/voice/generation.js.map +1 -1
  61. package/dist/voice/index.cjs +14 -1
  62. package/dist/voice/index.cjs.map +1 -1
  63. package/dist/voice/index.d.cts +1 -0
  64. package/dist/voice/index.d.ts +1 -0
  65. package/dist/voice/index.d.ts.map +1 -1
  66. package/dist/voice/index.js +3 -1
  67. package/dist/voice/index.js.map +1 -1
  68. package/dist/voice/room_io/room_io.cjs +1 -0
  69. package/dist/voice/room_io/room_io.cjs.map +1 -1
  70. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  71. package/dist/voice/room_io/room_io.js +1 -0
  72. package/dist/voice/room_io/room_io.js.map +1 -1
  73. package/dist/voice/speech_handle.cjs +12 -3
  74. package/dist/voice/speech_handle.cjs.map +1 -1
  75. package/dist/voice/speech_handle.d.cts +12 -2
  76. package/dist/voice/speech_handle.d.ts +12 -2
  77. package/dist/voice/speech_handle.d.ts.map +1 -1
  78. package/dist/voice/speech_handle.js +10 -2
  79. package/dist/voice/speech_handle.js.map +1 -1
  80. package/dist/voice/testing/index.cjs +52 -0
  81. package/dist/voice/testing/index.cjs.map +1 -0
  82. package/dist/voice/testing/index.d.cts +20 -0
  83. package/dist/voice/testing/index.d.ts +20 -0
  84. package/dist/voice/testing/index.d.ts.map +1 -0
  85. package/dist/voice/testing/index.js +31 -0
  86. package/dist/voice/testing/index.js.map +1 -0
  87. package/dist/voice/testing/run_result.cjs +477 -0
  88. package/dist/voice/testing/run_result.cjs.map +1 -0
  89. package/dist/voice/testing/run_result.d.cts +226 -0
  90. package/dist/voice/testing/run_result.d.ts +226 -0
  91. package/dist/voice/testing/run_result.d.ts.map +1 -0
  92. package/dist/voice/testing/run_result.js +451 -0
  93. package/dist/voice/testing/run_result.js.map +1 -0
  94. package/dist/voice/testing/types.cjs +46 -0
  95. package/dist/voice/testing/types.cjs.map +1 -0
  96. package/dist/voice/testing/types.d.cts +83 -0
  97. package/dist/voice/testing/types.d.ts +83 -0
  98. package/dist/voice/testing/types.d.ts.map +1 -0
  99. package/dist/voice/testing/types.js +19 -0
  100. package/dist/voice/testing/types.js.map +1 -0
  101. package/package.json +3 -3
  102. package/src/inference/llm.ts +42 -3
  103. package/src/ipc/job_proc_lazy_main.ts +1 -1
  104. package/src/llm/chat_context.ts +32 -2
  105. package/src/llm/llm.ts +1 -0
  106. package/src/llm/provider_format/openai.test.ts +40 -0
  107. package/src/llm/provider_format/openai.ts +46 -19
  108. package/src/llm/provider_format/utils.ts +5 -1
  109. package/src/voice/agent_activity.ts +24 -22
  110. package/src/voice/agent_session.ts +73 -28
  111. package/src/voice/generation.ts +1 -0
  112. package/src/voice/index.ts +1 -0
  113. package/src/voice/room_io/room_io.ts +1 -0
  114. package/src/voice/speech_handle.ts +24 -4
  115. package/src/voice/testing/index.ts +49 -0
  116. package/src/voice/testing/run_result.ts +576 -0
  117. package/src/voice/testing/types.ts +118 -0
@@ -0,0 +1,83 @@
1
+ import type { AgentHandoffItem, ChatMessage, ChatRole, FunctionCall, FunctionCallOutput } from '../../llm/chat_context.js';
2
+ import type { Agent } from '../agent.js';
3
+ /**
4
+ * Event representing an assistant or user message in the conversation.
5
+ */
6
+ export interface ChatMessageEvent {
7
+ type: 'message';
8
+ item: ChatMessage;
9
+ }
10
+ /**
11
+ * Event representing a function/tool call initiated by the LLM.
12
+ */
13
+ export interface FunctionCallEvent {
14
+ type: 'function_call';
15
+ item: FunctionCall;
16
+ }
17
+ /**
18
+ * Event representing the output/result of a function call.
19
+ */
20
+ export interface FunctionCallOutputEvent {
21
+ type: 'function_call_output';
22
+ item: FunctionCallOutput;
23
+ }
24
+ /**
25
+ * Event representing an agent handoff (switching from one agent to another).
26
+ */
27
+ export interface AgentHandoffEvent {
28
+ type: 'agent_handoff';
29
+ item: AgentHandoffItem;
30
+ oldAgent?: Agent;
31
+ newAgent: Agent;
32
+ }
33
+ /**
34
+ * Union type of all possible run events that can occur during a test run.
35
+ */
36
+ export type RunEvent = ChatMessageEvent | FunctionCallEvent | FunctionCallOutputEvent | AgentHandoffEvent;
37
+ /**
38
+ * Type guard to check if an event is a ChatMessageEvent.
39
+ */
40
+ export declare function isChatMessageEvent(event: RunEvent): event is ChatMessageEvent;
41
+ /**
42
+ * Type guard to check if an event is a FunctionCallEvent.
43
+ */
44
+ export declare function isFunctionCallEvent(event: RunEvent): event is FunctionCallEvent;
45
+ /**
46
+ * Type guard to check if an event is a FunctionCallOutputEvent.
47
+ */
48
+ export declare function isFunctionCallOutputEvent(event: RunEvent): event is FunctionCallOutputEvent;
49
+ /**
50
+ * Type guard to check if an event is an AgentHandoffEvent.
51
+ */
52
+ export declare function isAgentHandoffEvent(event: RunEvent): event is AgentHandoffEvent;
53
+ /**
54
+ * Options for message assertion.
55
+ */
56
+ export interface MessageAssertOptions {
57
+ role?: ChatRole;
58
+ }
59
+ /**
60
+ * Options for function call assertion.
61
+ */
62
+ export interface FunctionCallAssertOptions {
63
+ name?: string;
64
+ args?: Record<string, unknown>;
65
+ }
66
+ /**
67
+ * Options for function call output assertion.
68
+ */
69
+ export interface FunctionCallOutputAssertOptions {
70
+ output?: string;
71
+ isError?: boolean;
72
+ }
73
+ /**
74
+ * Options for agent handoff assertion.
75
+ */
76
+ export interface AgentHandoffAssertOptions {
77
+ newAgentType?: new (...args: any[]) => Agent;
78
+ }
79
+ /**
80
+ * Event type literals for type-safe event filtering.
81
+ */
82
+ export type EventType = 'message' | 'function_call' | 'function_call_output' | 'agent_handoff';
83
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/voice/testing/types.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,gBAAgB,EAChB,WAAW,EACX,QAAQ,EACR,YAAY,EACZ,kBAAkB,EACnB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,SAAS,CAAC;IAChB,IAAI,EAAE,WAAW,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,YAAY,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,sBAAsB,CAAC;IAC7B,IAAI,EAAE,kBAAkB,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,gBAAgB,CAAC;IACvB,QAAQ,CAAC,EAAE,KAAK,CAAC;IACjB,QAAQ,EAAE,KAAK,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAChB,gBAAgB,GAChB,iBAAiB,GACjB,uBAAuB,GACvB,iBAAiB,CAAC;AAEtB;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,gBAAgB,CAE7E;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,uBAAuB,CAE3F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,IAAI,CAAC,EAAE,QAAQ,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IAExC,YAAY,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,KAAK,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,eAAe,GAAG,sBAAsB,GAAG,eAAe,CAAC"}
@@ -0,0 +1,19 @@
1
+ function isChatMessageEvent(event) {
2
+ return event.type === "message";
3
+ }
4
+ function isFunctionCallEvent(event) {
5
+ return event.type === "function_call";
6
+ }
7
+ function isFunctionCallOutputEvent(event) {
8
+ return event.type === "function_call_output";
9
+ }
10
+ function isAgentHandoffEvent(event) {
11
+ return event.type === "agent_handoff";
12
+ }
13
+ export {
14
+ isAgentHandoffEvent,
15
+ isChatMessageEvent,
16
+ isFunctionCallEvent,
17
+ isFunctionCallOutputEvent
18
+ };
19
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/voice/testing/types.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n AgentHandoffItem,\n ChatMessage,\n ChatRole,\n FunctionCall,\n FunctionCallOutput,\n} from '../../llm/chat_context.js';\nimport type { Agent } from '../agent.js';\n\n/**\n * Event representing an assistant or user message in the conversation.\n */\nexport interface ChatMessageEvent {\n type: 'message';\n item: ChatMessage;\n}\n\n/**\n * Event representing a function/tool call initiated by the LLM.\n */\nexport interface FunctionCallEvent {\n type: 'function_call';\n item: FunctionCall;\n}\n\n/**\n * Event representing the output/result of a function call.\n */\nexport interface FunctionCallOutputEvent {\n type: 'function_call_output';\n item: FunctionCallOutput;\n}\n\n/**\n * Event representing an agent handoff (switching from one agent to another).\n */\nexport interface AgentHandoffEvent {\n type: 'agent_handoff';\n item: AgentHandoffItem;\n oldAgent?: Agent;\n newAgent: Agent;\n}\n\n/**\n * Union type of all possible run events that can occur during a test run.\n */\nexport type RunEvent =\n | ChatMessageEvent\n | FunctionCallEvent\n | FunctionCallOutputEvent\n | AgentHandoffEvent;\n\n/**\n * Type guard to check if an event is a ChatMessageEvent.\n */\nexport function isChatMessageEvent(event: RunEvent): event is ChatMessageEvent {\n return event.type === 'message';\n}\n\n/**\n * Type guard to check if an event is a FunctionCallEvent.\n */\nexport function isFunctionCallEvent(event: RunEvent): event is FunctionCallEvent {\n return event.type === 'function_call';\n}\n\n/**\n * Type guard to check if an event is a FunctionCallOutputEvent.\n */\nexport function isFunctionCallOutputEvent(event: RunEvent): event is FunctionCallOutputEvent {\n return event.type === 'function_call_output';\n}\n\n/**\n * Type guard to check if an event is an AgentHandoffEvent.\n */\nexport function isAgentHandoffEvent(event: RunEvent): event is AgentHandoffEvent {\n return event.type === 'agent_handoff';\n}\n\n/**\n * Options for message assertion.\n */\nexport interface MessageAssertOptions {\n role?: ChatRole;\n}\n\n/**\n * Options for function call assertion.\n */\nexport interface FunctionCallAssertOptions {\n name?: string;\n args?: Record<string, unknown>;\n}\n\n/**\n * Options for function call output assertion.\n */\nexport interface FunctionCallOutputAssertOptions {\n output?: string;\n isError?: boolean;\n}\n\n/**\n * Options for agent handoff assertion.\n */\nexport interface AgentHandoffAssertOptions {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n newAgentType?: new (...args: any[]) => Agent;\n}\n\n/**\n * Event type literals for type-safe event filtering.\n */\nexport type EventType = 'message' | 'function_call' | 'function_call_output' | 'agent_handoff';\n"],"mappings":"AA0DO,SAAS,mBAAmB,OAA4C;AAC7E,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,oBAAoB,OAA6C;AAC/E,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,0BAA0B,OAAmD;AAC3F,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,oBAAoB,OAA6C;AAC/E,SAAO,MAAM,SAAS;AACxB;","names":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents",
3
- "version": "1.0.34",
3
+ "version": "1.0.36",
4
4
  "description": "LiveKit Agents - Node.js",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -26,7 +26,7 @@
26
26
  "README.md"
27
27
  ],
28
28
  "devDependencies": {
29
- "@livekit/rtc-node": "^0.13.22",
29
+ "@livekit/rtc-node": "^0.13.24",
30
30
  "@microsoft/api-extractor": "^7.35.0",
31
31
  "@types/fluent-ffmpeg": "^2.1.28",
32
32
  "@types/json-schema": "^7.0.15",
@@ -70,7 +70,7 @@
70
70
  "zod-to-json-schema": "^3.24.6"
71
71
  },
72
72
  "peerDependencies": {
73
- "@livekit/rtc-node": "^0.13.22",
73
+ "@livekit/rtc-node": "^0.13.24",
74
74
  "zod": "^3.25.76 || ^4.1.8"
75
75
  },
76
76
  "scripts": {
@@ -27,7 +27,14 @@ export type OpenAIModels =
27
27
  | 'openai/gpt-4o-mini'
28
28
  | 'openai/gpt-oss-120b';
29
29
 
30
- export type GoogleModels = 'google/gemini-2.0-flash-lite';
30
+ export type GoogleModels =
31
+ | 'google/gemini-3-pro-preview'
32
+ | 'google/gemini-3-flash-preview'
33
+ | 'google/gemini-2.5-pro'
34
+ | 'google/gemini-2.5-flash'
35
+ | 'google/gemini-2.5-flash-lite'
36
+ | 'google/gemini-2.0-flash'
37
+ | 'google/gemini-2.0-flash-lite';
31
38
 
32
39
  export type QwenModels = 'qwen/qwen3-235b-a22b-instruct';
33
40
 
@@ -235,6 +242,7 @@ export class LLMStream extends llm.LLMStream {
235
242
  private toolIndex?: number;
236
243
  private fncName?: string;
237
244
  private fncRawArguments?: string;
245
+ private toolExtra?: Record<string, unknown>;
238
246
 
239
247
  constructor(
240
248
  llm: LLM,
@@ -277,6 +285,7 @@ export class LLMStream extends llm.LLMStream {
277
285
  // (defined inside the run method to make sure the state is reset for each run/attempt)
278
286
  let retryable = true;
279
287
  this.toolCallId = this.fncName = this.fncRawArguments = this.toolIndex = undefined;
288
+ this.toolExtra = undefined;
280
289
 
281
290
  try {
282
291
  const messages = (await this.chatCtx.toProviderFormat(
@@ -428,6 +437,7 @@ export class LLMStream extends llm.LLMStream {
428
437
  if (this.toolCallId && tool.id && tool.index !== this.toolIndex) {
429
438
  callChunk = this.createRunningToolCallChunk(id, delta);
430
439
  this.toolCallId = this.fncName = this.fncRawArguments = undefined;
440
+ this.toolExtra = undefined;
431
441
  }
432
442
 
433
443
  // Start or continue building the current tool call
@@ -436,6 +446,10 @@ export class LLMStream extends llm.LLMStream {
436
446
  this.toolCallId = tool.id;
437
447
  this.fncName = tool.function.name;
438
448
  this.fncRawArguments = tool.function.arguments || '';
449
+ // Extract extra from tool call (e.g., Google thought signatures)
450
+ this.toolExtra =
451
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
452
+ ((tool as any).extra_content as Record<string, unknown> | undefined) ?? undefined;
439
453
  } else if (tool.function.arguments) {
440
454
  this.fncRawArguments = (this.fncRawArguments || '') + tool.function.arguments;
441
455
  }
@@ -454,11 +468,17 @@ export class LLMStream extends llm.LLMStream {
454
468
  ) {
455
469
  const callChunk = this.createRunningToolCallChunk(id, delta);
456
470
  this.toolCallId = this.fncName = this.fncRawArguments = undefined;
471
+ this.toolExtra = undefined;
457
472
  return callChunk;
458
473
  }
459
474
 
475
+ // Extract extra from delta (e.g., Google thought signatures on text parts)
476
+ const deltaExtra =
477
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
478
+ ((delta as any).extra_content as Record<string, unknown> | undefined) ?? undefined;
479
+
460
480
  // Regular content message
461
- if (!delta.content) {
481
+ if (!delta.content && !deltaExtra) {
462
482
  return undefined;
463
483
  }
464
484
 
@@ -466,7 +486,8 @@ export class LLMStream extends llm.LLMStream {
466
486
  id,
467
487
  delta: {
468
488
  role: 'assistant',
469
- content: delta.content,
489
+ content: delta.content || undefined,
490
+ extra: deltaExtra,
470
491
  },
471
492
  };
472
493
  }
@@ -475,19 +496,37 @@ export class LLMStream extends llm.LLMStream {
475
496
  id: string,
476
497
  delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta,
477
498
  ): llm.ChatChunk {
499
+ const toolExtra = this.toolExtra ? { ...this.toolExtra } : {};
500
+ const thoughtSignature = this.extractThoughtSignature(toolExtra);
501
+ const deltaExtra =
502
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
503
+ ((delta as any).extra_content as Record<string, unknown> | undefined) ?? undefined;
504
+
478
505
  return {
479
506
  id,
480
507
  delta: {
481
508
  role: 'assistant',
482
509
  content: delta.content || undefined,
510
+ extra: deltaExtra,
483
511
  toolCalls: [
484
512
  llm.FunctionCall.create({
485
513
  callId: this.toolCallId || '',
486
514
  name: this.fncName || '',
487
515
  args: this.fncRawArguments || '',
516
+ extra: toolExtra,
517
+ thoughtSignature,
488
518
  }),
489
519
  ],
490
520
  },
491
521
  };
492
522
  }
523
+
524
+ private extractThoughtSignature(extra?: Record<string, unknown>): string | undefined {
525
+ const googleExtra = extra?.google;
526
+ if (googleExtra && typeof googleExtra === 'object') {
527
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
528
+ return (googleExtra as any).thoughtSignature || (googleExtra as any).thought_signature;
529
+ }
530
+ return undefined;
531
+ }
493
532
  }
@@ -136,7 +136,7 @@ const startJob = (
136
136
  shutdownTasks.push(callback());
137
137
  }
138
138
  await Promise.all(shutdownTasks).catch((error) =>
139
- logger.error('error while shutting down the job', error),
139
+ logger.error({ error }, 'error while shutting down the job'),
140
140
  );
141
141
 
142
142
  process.send!({ case: 'done' });
@@ -189,6 +189,12 @@ export class FunctionCall {
189
189
 
190
190
  createdAt: number;
191
191
 
192
+ extra: Record<string, unknown>;
193
+ /**
194
+ * Optional grouping identifier for parallel tool calls.
195
+ */
196
+ groupId?: string;
197
+
192
198
  /**
193
199
  * Opaque signature for Gemini thinking mode.
194
200
  * When using Gemini 3+ models with thinking enabled, this signature must be
@@ -202,6 +208,8 @@ export class FunctionCall {
202
208
  args: string;
203
209
  id?: string;
204
210
  createdAt?: number;
211
+ extra?: Record<string, unknown>;
212
+ groupId?: string;
205
213
  thoughtSignature?: string;
206
214
  }) {
207
215
  const {
@@ -210,6 +218,8 @@ export class FunctionCall {
210
218
  args,
211
219
  id = shortuuid('item_'),
212
220
  createdAt = Date.now(),
221
+ extra = {},
222
+ groupId,
213
223
  thoughtSignature,
214
224
  } = params;
215
225
  this.id = id;
@@ -217,7 +227,15 @@ export class FunctionCall {
217
227
  this.args = args;
218
228
  this.name = name;
219
229
  this.createdAt = createdAt;
220
- this.thoughtSignature = thoughtSignature;
230
+ this.extra = { ...extra };
231
+ this.groupId = groupId;
232
+ this.thoughtSignature =
233
+ thoughtSignature ??
234
+ (typeof this.extra.google === 'object' && this.extra.google !== null
235
+ ? // eslint-disable-next-line @typescript-eslint/no-explicit-any
236
+ (this.extra.google as any).thoughtSignature ||
237
+ (this.extra.google as any).thought_signature
238
+ : undefined);
221
239
  }
222
240
 
223
241
  static create(params: {
@@ -226,6 +244,8 @@ export class FunctionCall {
226
244
  args: string;
227
245
  id?: string;
228
246
  createdAt?: number;
247
+ extra?: Record<string, unknown>;
248
+ groupId?: string;
229
249
  thoughtSignature?: string;
230
250
  }) {
231
251
  return new FunctionCall(params);
@@ -241,6 +261,14 @@ export class FunctionCall {
241
261
  args: this.args,
242
262
  };
243
263
 
264
+ if (Object.keys(this.extra).length > 0) {
265
+ result.extra = this.extra as JSONValue;
266
+ }
267
+
268
+ if (this.groupId) {
269
+ result.groupId = this.groupId;
270
+ }
271
+
244
272
  if (this.thoughtSignature) {
245
273
  result.thoughtSignature = this.thoughtSignature;
246
274
  }
@@ -627,7 +655,9 @@ export class ChatContext {
627
655
  a.name !== b.name ||
628
656
  a.callId !== b.callId ||
629
657
  a.args !== b.args ||
630
- a.thoughtSignature !== b.thoughtSignature
658
+ a.thoughtSignature !== b.thoughtSignature ||
659
+ a.groupId !== b.groupId ||
660
+ JSON.stringify(a.extra) !== JSON.stringify(b.extra)
631
661
  ) {
632
662
  return false;
633
663
  }
package/src/llm/llm.ts CHANGED
@@ -17,6 +17,7 @@ export interface ChoiceDelta {
17
17
  role: ChatRole;
18
18
  content?: string;
19
19
  toolCalls?: FunctionCall[];
20
+ extra?: Record<string, unknown>;
20
21
  }
21
22
 
22
23
  export interface CompletionUsage {
@@ -258,6 +258,46 @@ describe('toChatCtx', () => {
258
258
  ]);
259
259
  });
260
260
 
261
+ it('should include provider-specific extra content on tool calls', async () => {
262
+ const ctx = ChatContext.empty();
263
+ const msg = ctx.addMessage({ role: 'assistant', content: 'Running tool' });
264
+
265
+ const toolCall = FunctionCall.create({
266
+ id: `${msg.id}/tool_1`,
267
+ callId: 'call_789',
268
+ name: 'google_call',
269
+ args: '{}',
270
+ extra: { google: { thoughtSignature: 'sig-123' } },
271
+ });
272
+ const toolOutput = FunctionCallOutput.create({
273
+ callId: 'call_789',
274
+ output: '{"result": "ok"}',
275
+ isError: false,
276
+ });
277
+
278
+ ctx.insert([toolCall, toolOutput]);
279
+
280
+ const result = await toChatCtx(ctx);
281
+
282
+ expect(result[0]).toEqual({
283
+ role: 'assistant',
284
+ content: 'Running tool',
285
+ tool_calls: [
286
+ {
287
+ type: 'function',
288
+ id: 'call_789',
289
+ function: { name: 'google_call', arguments: '{}' },
290
+ extra_content: { google: { thoughtSignature: 'sig-123' } },
291
+ },
292
+ ],
293
+ });
294
+ expect(result[1]).toEqual({
295
+ role: 'tool',
296
+ tool_call_id: 'call_789',
297
+ content: '{"result": "ok"}',
298
+ });
299
+ });
300
+
261
301
  it('should handle multiple tool calls in one message', async () => {
262
302
  const ctx = ChatContext.empty();
263
303
 
@@ -17,11 +17,20 @@ export async function toChatCtx(chatCtx: ChatContext, injectDummyUserMessage: bo
17
17
  ? await toChatItem(group.message)
18
18
  : { role: 'assistant' };
19
19
 
20
- const toolCalls = group.toolCalls.map((toolCall) => ({
21
- type: 'function',
22
- id: toolCall.callId,
23
- function: { name: toolCall.name, arguments: toolCall.args },
24
- }));
20
+ const toolCalls = group.toolCalls.map((toolCall) => {
21
+ const tc: Record<string, any> = {
22
+ type: 'function',
23
+ id: toolCall.callId,
24
+ function: { name: toolCall.name, arguments: toolCall.args },
25
+ };
26
+
27
+ // Include provider-specific extra content (e.g., Google thought signatures)
28
+ const googleExtra = getGoogleExtra(toolCall);
29
+ if (googleExtra) {
30
+ tc.extra_content = { google: googleExtra };
31
+ }
32
+ return tc;
33
+ });
25
34
 
26
35
  if (toolCalls.length > 0) {
27
36
  message['tool_calls'] = toolCalls;
@@ -53,24 +62,33 @@ async function toChatItem(item: ChatItem) {
53
62
  }
54
63
  }
55
64
 
56
- const content =
57
- listContent.length == 0
58
- ? textContent
59
- : textContent.length == 0
60
- ? listContent
61
- : [...listContent, { type: 'text', text: textContent }];
65
+ const result: Record<string, any> = { role: item.role };
66
+ if (listContent.length === 0) {
67
+ result.content = textContent;
68
+ } else {
69
+ if (textContent.length > 0) {
70
+ listContent.push({ type: 'text', text: textContent });
71
+ }
72
+ result.content = listContent;
73
+ }
62
74
 
63
- return { role: item.role, content };
75
+ return result;
64
76
  } else if (item.type === 'function_call') {
77
+ const tc: Record<string, any> = {
78
+ id: item.callId,
79
+ type: 'function',
80
+ function: { name: item.name, arguments: item.args },
81
+ };
82
+
83
+ // Include provider-specific extra content (e.g., Google thought signatures)
84
+ const googleExtra = getGoogleExtra(item);
85
+ if (googleExtra) {
86
+ tc.extra_content = { google: googleExtra };
87
+ }
88
+
65
89
  return {
66
90
  role: 'assistant',
67
- tool_calls: [
68
- {
69
- id: item.callId,
70
- type: 'function',
71
- function: { name: item.name, arguments: item.args },
72
- },
73
- ],
91
+ tool_calls: [tc],
74
92
  };
75
93
  } else if (item.type === 'function_call_output') {
76
94
  return {
@@ -84,6 +102,15 @@ async function toChatItem(item: ChatItem) {
84
102
  throw new Error(`Unsupported item type: ${item['type']}`);
85
103
  }
86
104
 
105
+ function getGoogleExtra(
106
+ item: Partial<{ extra?: Record<string, unknown>; thoughtSignature?: string }>,
107
+ ): Record<string, unknown> | undefined {
108
+ const googleExtra =
109
+ (item.extra?.google as Record<string, unknown> | undefined) ||
110
+ (item.thoughtSignature ? { thoughtSignature: item.thoughtSignature } : undefined);
111
+ return googleExtra;
112
+ }
113
+
87
114
  async function toImageContent(content: ImageContent) {
88
115
  const cacheKey = 'serialized_image'; // TODO: use hash of encoding options if available
89
116
  let serialized: SerializedImage;
@@ -133,7 +133,11 @@ export function groupToolCalls(chatCtx: ChatContext) {
133
133
 
134
134
  if (isAssistantMessage || isFunctionCall) {
135
135
  // only assistant messages and function calls can be grouped
136
- const groupId = item.id.split('/')[0]!;
136
+ // For function calls, use group_id if available (for parallel function calls),
137
+ // otherwise fall back to id-based grouping for backwards compatibility
138
+ const groupId =
139
+ item.type === 'function_call' && item.groupId ? item.groupId : item.id.split('/')[0]!;
140
+
137
141
  if (itemGroups[groupId] === undefined) {
138
142
  itemGroups[groupId] = ChatItemGroup.create();
139
143
 
@@ -1350,11 +1350,14 @@ export class AgentActivity implements RecognitionHooks {
1350
1350
  );
1351
1351
  tasks.push(llmTask);
1352
1352
 
1353
- const [ttsTextInput, llmOutput] = llmGenData.textStream.tee();
1354
-
1355
1353
  let ttsTask: Task<void> | null = null;
1356
1354
  let ttsStream: ReadableStream<AudioFrame> | null = null;
1355
+ let llmOutput: ReadableStream<string>;
1356
+
1357
1357
  if (audioOutput) {
1358
+ // Only tee the stream when we need TTS
1359
+ const [ttsTextInput, textOutput] = llmGenData.textStream.tee();
1360
+ llmOutput = textOutput;
1358
1361
  [ttsTask, ttsStream] = performTTSInference(
1359
1362
  (...args) => this.agent.ttsNode(...args),
1360
1363
  ttsTextInput,
@@ -1362,6 +1365,9 @@ export class AgentActivity implements RecognitionHooks {
1362
1365
  replyAbortController,
1363
1366
  );
1364
1367
  tasks.push(ttsTask);
1368
+ } else {
1369
+ // No TTS needed, use the stream directly
1370
+ llmOutput = llmGenData.textStream;
1365
1371
  }
1366
1372
 
1367
1373
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
@@ -1421,12 +1427,16 @@ export class AgentActivity implements RecognitionHooks {
1421
1427
  //TODO(AJS-272): before executing tools, make sure we generated all the text
1422
1428
  // (this ensure everything is kept ordered)
1423
1429
 
1424
- const onToolExecutionStarted = (_: FunctionCall) => {
1425
- // TODO(brian): handle speech_handle item_added
1430
+ const onToolExecutionStarted = (f: FunctionCall) => {
1431
+ speechHandle._itemAdded([f]);
1432
+ this.agent._chatCtx.items.push(f);
1433
+ this.agentSession._toolItemsAdded([f]);
1426
1434
  };
1427
1435
 
1428
- const onToolExecutionCompleted = (_: ToolExecutionOutput) => {
1429
- // TODO(brian): handle speech_handle item_added
1436
+ const onToolExecutionCompleted = (out: ToolExecutionOutput) => {
1437
+ if (out.toolCallOutput) {
1438
+ speechHandle._itemAdded([out.toolCallOutput]);
1439
+ }
1430
1440
  };
1431
1441
 
1432
1442
  const [executeToolsTask, toolOutput] = performToolExecutions({
@@ -1501,6 +1511,7 @@ export class AgentActivity implements RecognitionHooks {
1501
1511
  });
1502
1512
  chatCtx.insert(message);
1503
1513
  this.agent._chatCtx.insert(message);
1514
+ speechHandle._itemAdded([message]);
1504
1515
  this.agentSession._conversationItemAdded(message);
1505
1516
  }
1506
1517
 
@@ -1528,6 +1539,7 @@ export class AgentActivity implements RecognitionHooks {
1528
1539
  });
1529
1540
  chatCtx.insert(message);
1530
1541
  this.agent._chatCtx.insert(message);
1542
+ speechHandle._itemAdded([message]);
1531
1543
  this.agentSession._conversationItemAdded(message);
1532
1544
  this.logger.info(
1533
1545
  { speech_id: speechHandle.id, message: textOut.text },
@@ -1612,28 +1624,18 @@ export class AgentActivity implements RecognitionHooks {
1612
1624
  if (shouldGenerateToolReply) {
1613
1625
  chatCtx.insert(toolMessages);
1614
1626
 
1615
- const handle = SpeechHandle.create({
1616
- allowInterruptions: speechHandle.allowInterruptions,
1617
- stepIndex: speechHandle._stepIndex + 1,
1618
- parent: speechHandle,
1619
- });
1620
- this.agentSession.emit(
1621
- AgentSessionEventTypes.SpeechCreated,
1622
- createSpeechCreatedEvent({
1623
- userInitiated: false,
1624
- source: 'tool_response',
1625
- speechHandle: handle,
1626
- }),
1627
- );
1627
+ // Increment step count on SAME handle (parity with Python agent_activity.py L2081)
1628
+ speechHandle._numSteps += 1;
1628
1629
 
1629
1630
  // Avoid setting tool_choice to "required" or a specific function when
1630
1631
  // passing tool response back to the LLM
1631
1632
  const respondToolChoice = draining || modelSettings.toolChoice === 'none' ? 'none' : 'auto';
1632
1633
 
1634
+ // Reuse same speechHandle for tool response (parity with Python agent_activity.py L2122-2140)
1633
1635
  const toolResponseTask = this.createSpeechTask({
1634
1636
  task: Task.from(() =>
1635
1637
  this.pipelineReplyTask(
1636
- handle,
1638
+ speechHandle,
1637
1639
  chatCtx,
1638
1640
  toolCtx,
1639
1641
  { toolChoice: respondToolChoice },
@@ -1643,13 +1645,13 @@ export class AgentActivity implements RecognitionHooks {
1643
1645
  toolMessages,
1644
1646
  ),
1645
1647
  ),
1646
- ownedSpeechHandle: handle,
1648
+ ownedSpeechHandle: speechHandle,
1647
1649
  name: 'AgentActivity.pipelineReply',
1648
1650
  });
1649
1651
 
1650
1652
  toolResponseTask.finally(() => this.onPipelineReplyDone());
1651
1653
 
1652
- this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1654
+ this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1653
1655
  } else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
1654
1656
  for (const msg of toolMessages) {
1655
1657
  msg.createdAt = replyStartedAt;