lite-agent 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lite-agent might be problematic. Click here for more details.

lite_agent/agent.py CHANGED
@@ -7,9 +7,21 @@ from funcall import Funcall
7
7
  from jinja2 import Environment, FileSystemLoader
8
8
 
9
9
  from lite_agent.client import BaseLLMClient, LiteLLMClient, ReasoningConfig
10
+ from lite_agent.constants import CompletionMode, ToolName
10
11
  from lite_agent.loggers import logger
11
12
  from lite_agent.response_handlers import CompletionResponseHandler, ResponsesAPIHandler
12
- from lite_agent.types import AgentChunk, FunctionCallEvent, FunctionCallOutputEvent, RunnerMessages, ToolCall, message_to_llm_dict, system_message_to_llm_dict
13
+ from lite_agent.types import (
14
+ AgentChunk,
15
+ AssistantTextContent,
16
+ AssistantToolCall,
17
+ AssistantToolCallResult,
18
+ FunctionCallEvent,
19
+ FunctionCallOutputEvent,
20
+ RunnerMessages,
21
+ ToolCall,
22
+ message_to_llm_dict,
23
+ system_message_to_llm_dict,
24
+ )
13
25
  from lite_agent.types.messages import NewAssistantMessage, NewSystemMessage, NewUserMessage
14
26
 
15
27
  TEMPLATES_DIR = Path(__file__).parent / "templates"
@@ -32,10 +44,24 @@ class Agent:
32
44
  message_transfer: Callable[[RunnerMessages], RunnerMessages] | None = None,
33
45
  completion_condition: str = "stop",
34
46
  reasoning: ReasoningConfig = None,
47
+ stop_before_tools: list[str] | list[Callable] | None = None,
35
48
  ) -> None:
36
49
  self.name = name
37
50
  self.instructions = instructions
38
51
  self.reasoning = reasoning
52
+ # Convert stop_before_functions to function names
53
+ if stop_before_tools:
54
+ self.stop_before_functions = set()
55
+ for func in stop_before_tools:
56
+ if isinstance(func, str):
57
+ self.stop_before_functions.add(func)
58
+ elif callable(func):
59
+ self.stop_before_functions.add(func.__name__)
60
+ else:
61
+ msg = f"stop_before_functions must contain strings or callables, got {type(func)}"
62
+ raise TypeError(msg)
63
+ else:
64
+ self.stop_before_functions = set()
39
65
 
40
66
  if isinstance(model, BaseLLMClient):
41
67
  # If model is a BaseLLMClient instance, use it directly
@@ -54,7 +80,7 @@ class Agent:
54
80
  self.fc = Funcall(tools)
55
81
 
56
82
  # Add wait_for_user tool if completion condition is "call"
57
- if completion_condition == "call":
83
+ if completion_condition == CompletionMode.CALL:
58
84
  self._add_wait_for_user_tool()
59
85
 
60
86
  # Set parent for handoff agents
@@ -99,7 +125,7 @@ class Agent:
99
125
 
100
126
  # Add single dynamic tool for all transfers
101
127
  self.fc.add_dynamic_tool(
102
- name="transfer_to_agent",
128
+ name=ToolName.TRANSFER_TO_AGENT,
103
129
  description="Transfer conversation to another agent.",
104
130
  parameters={
105
131
  "name": {
@@ -129,7 +155,7 @@ class Agent:
129
155
 
130
156
  # Add dynamic tool for parent transfer
131
157
  self.fc.add_dynamic_tool(
132
- name="transfer_to_parent",
158
+ name=ToolName.TRANSFER_TO_PARENT,
133
159
  description="Transfer conversation back to parent agent when current task is completed or cannot be solved by current agent",
134
160
  parameters={},
135
161
  required=[],
@@ -160,7 +186,7 @@ class Agent:
160
186
  try:
161
187
  # Try to remove the existing transfer tool
162
188
  if hasattr(self.fc, "remove_dynamic_tool"):
163
- self.fc.remove_dynamic_tool("transfer_to_agent")
189
+ self.fc.remove_dynamic_tool(ToolName.TRANSFER_TO_AGENT)
164
190
  except Exception as e:
165
191
  # If removal fails, log and continue anyway
166
192
  logger.debug(f"Failed to remove existing transfer tool: {e}")
@@ -205,31 +231,30 @@ class Agent:
205
231
  for message in messages:
206
232
  if isinstance(message, NewAssistantMessage):
207
233
  for item in message.content:
208
- match item.type:
209
- case "text":
210
- res.append(
211
- {
212
- "role": "assistant",
213
- "content": item.text,
214
- },
215
- )
216
- case "tool_call":
217
- res.append(
218
- {
219
- "type": "function_call",
220
- "call_id": item.call_id,
221
- "name": item.name,
222
- "arguments": item.arguments,
223
- },
224
- )
225
- case "tool_call_result":
226
- res.append(
227
- {
228
- "type": "function_call_output",
229
- "call_id": item.call_id,
230
- "output": item.output,
231
- },
232
- )
234
+ if isinstance(item, AssistantTextContent):
235
+ res.append(
236
+ {
237
+ "role": "assistant",
238
+ "content": item.text,
239
+ },
240
+ )
241
+ elif isinstance(item, AssistantToolCall):
242
+ res.append(
243
+ {
244
+ "type": "function_call",
245
+ "call_id": item.call_id,
246
+ "name": item.name,
247
+ "arguments": item.arguments,
248
+ },
249
+ )
250
+ elif isinstance(item, AssistantToolCallResult):
251
+ res.append(
252
+ {
253
+ "type": "function_call_output",
254
+ "call_id": item.call_id,
255
+ "output": item.output,
256
+ },
257
+ )
233
258
  elif isinstance(message, NewSystemMessage):
234
259
  res.append(
235
260
  {
@@ -269,9 +294,6 @@ class Agent:
269
294
  "content": contents,
270
295
  },
271
296
  )
272
- # Handle dict messages (legacy format)
273
- elif isinstance(message, dict):
274
- res.append(message)
275
297
  return res
276
298
 
277
299
  async def completion(
@@ -279,6 +301,7 @@ class Agent:
279
301
  messages: RunnerMessages,
280
302
  record_to_file: Path | None = None,
281
303
  reasoning: ReasoningConfig = None,
304
+ *,
282
305
  streaming: bool = True,
283
306
  ) -> AsyncGenerator[AgentChunk, None]:
284
307
  # Apply message transfer callback if provided - always use legacy format for LLM compatibility
@@ -301,13 +324,14 @@ class Agent:
301
324
 
302
325
  # Use response handler for unified processing
303
326
  handler = CompletionResponseHandler()
304
- return handler.handle(resp, streaming, record_to_file)
327
+ return handler.handle(resp, streaming=streaming, record_to=record_to_file)
305
328
 
306
329
  async def responses(
307
330
  self,
308
331
  messages: RunnerMessages,
309
332
  record_to_file: Path | None = None,
310
333
  reasoning: ReasoningConfig = None,
334
+ *,
311
335
  streaming: bool = True,
312
336
  ) -> AsyncGenerator[AgentChunk, None]:
313
337
  # Apply message transfer callback if provided - always use legacy format for LLM compatibility
@@ -328,20 +352,29 @@ class Agent:
328
352
  )
329
353
  # Use response handler for unified processing
330
354
  handler = ResponsesAPIHandler()
331
- return handler.handle(resp, streaming, record_to_file)
355
+ return handler.handle(resp, streaming=streaming, record_to=record_to_file)
332
356
 
333
357
  async def list_require_confirm_tools(self, tool_calls: Sequence[ToolCall] | None) -> Sequence[ToolCall]:
334
358
  if not tool_calls:
335
359
  return []
336
360
  results = []
337
361
  for tool_call in tool_calls:
338
- tool_func = self.fc.function_registry.get(tool_call.function.name)
362
+ function_name = tool_call.function.name
363
+
364
+ # Check if function is in dynamic stop_before_functions list
365
+ if function_name in self.stop_before_functions:
366
+ logger.debug('Tool call "%s" requires confirmation (stop_before_functions)', tool_call.id)
367
+ results.append(tool_call)
368
+ continue
369
+
370
+ # Check decorator-based require_confirmation
371
+ tool_func = self.fc.function_registry.get(function_name)
339
372
  if not tool_func:
340
- logger.warning("Tool function %s not found in registry", tool_call.function.name)
373
+ logger.warning("Tool function %s not found in registry", function_name)
341
374
  continue
342
- tool_meta = self.fc.get_tool_meta(tool_call.function.name)
375
+ tool_meta = self.fc.get_tool_meta(function_name)
343
376
  if tool_meta["require_confirm"]:
344
- logger.debug('Tool call "%s" requires confirmation', tool_call.id)
377
+ logger.debug('Tool call "%s" requires confirmation (decorator)', tool_call.id)
345
378
  results.append(tool_call)
346
379
  return results
347
380
 
@@ -396,10 +429,42 @@ class Agent:
396
429
  role = message_dict.get("role")
397
430
 
398
431
  if role == "assistant":
399
- # Look ahead for function_call messages
432
+ # Extract tool_calls from content if present
400
433
  tool_calls = []
434
+ content = message_dict.get("content", [])
435
+
436
+ # Handle both string and array content
437
+ if isinstance(content, list):
438
+ # Extract tool_calls from content array and filter out non-text content
439
+ filtered_content = []
440
+ for item in content:
441
+ if isinstance(item, dict):
442
+ if item.get("type") == "tool_call":
443
+ tool_call = {
444
+ "id": item.get("call_id", ""),
445
+ "type": "function",
446
+ "function": {
447
+ "name": item.get("name", ""),
448
+ "arguments": item.get("arguments", "{}"),
449
+ },
450
+ "index": len(tool_calls),
451
+ }
452
+ tool_calls.append(tool_call)
453
+ elif item.get("type") == "text":
454
+ filtered_content.append(item)
455
+ # Skip tool_call_result - they should be handled by separate function_call_output messages
456
+
457
+ # Update content to only include text items
458
+ if filtered_content:
459
+ message_dict = message_dict.copy()
460
+ message_dict["content"] = filtered_content
461
+ elif tool_calls:
462
+ # If we have tool_calls but no text content, set content to None per OpenAI API spec
463
+ message_dict = message_dict.copy()
464
+ message_dict["content"] = None
465
+
466
+ # Look ahead for function_call messages (legacy support)
401
467
  j = i + 1
402
-
403
468
  while j < len(messages):
404
469
  next_message = messages[j]
405
470
  next_dict = message_to_llm_dict(next_message) if isinstance(next_message, (NewUserMessage, NewSystemMessage, NewAssistantMessage)) else next_message
@@ -424,6 +489,13 @@ class Agent:
424
489
  if tool_calls:
425
490
  assistant_msg["tool_calls"] = tool_calls # type: ignore
426
491
 
492
+ # Convert content format for OpenAI API compatibility
493
+ content = assistant_msg.get("content", [])
494
+ if isinstance(content, list):
495
+ # Extract text content and convert to string using list comprehension
496
+ text_parts = [item.get("text", "") for item in content if isinstance(item, dict) and item.get("type") == "text"]
497
+ assistant_msg["content"] = " ".join(text_parts) if text_parts else None
498
+
427
499
  converted_messages.append(assistant_msg)
428
500
  i = j # Skip the function_call messages we've processed
429
501
 
@@ -536,10 +608,73 @@ class Agent:
536
608
 
537
609
  # Add dynamic tool for task completion
538
610
  self.fc.add_dynamic_tool(
539
- name="wait_for_user",
611
+ name=ToolName.WAIT_FOR_USER,
540
612
  description="Call this function when you have completed your assigned task or need more information from the user.",
541
613
  parameters={},
542
614
  required=[],
543
615
  handler=wait_for_user_handler,
544
616
  )
545
617
 
618
+ def set_stop_before_functions(self, functions: list[str] | list[Callable]) -> None:
619
+ """Set the list of functions that require confirmation before execution.
620
+
621
+ Args:
622
+ functions: List of function names (str) or callable objects
623
+ """
624
+ self.stop_before_functions = set()
625
+ for func in functions:
626
+ if isinstance(func, str):
627
+ self.stop_before_functions.add(func)
628
+ elif callable(func):
629
+ self.stop_before_functions.add(func.__name__)
630
+ else:
631
+ msg = f"stop_before_functions must contain strings or callables, got {type(func)}"
632
+ raise TypeError(msg)
633
+ logger.debug(f"Set stop_before_functions to: {self.stop_before_functions}")
634
+
635
+ def add_stop_before_function(self, function: str | Callable) -> None:
636
+ """Add a function to the stop_before_functions list.
637
+
638
+ Args:
639
+ function: Function name (str) or callable object to add
640
+ """
641
+ if isinstance(function, str):
642
+ function_name = function
643
+ elif callable(function):
644
+ function_name = function.__name__
645
+ else:
646
+ msg = f"function must be a string or callable, got {type(function)}"
647
+ raise TypeError(msg)
648
+
649
+ self.stop_before_functions.add(function_name)
650
+ logger.debug(f"Added '{function_name}' to stop_before_functions")
651
+
652
+ def remove_stop_before_function(self, function: str | Callable) -> None:
653
+ """Remove a function from the stop_before_functions list.
654
+
655
+ Args:
656
+ function: Function name (str) or callable object to remove
657
+ """
658
+ if isinstance(function, str):
659
+ function_name = function
660
+ elif callable(function):
661
+ function_name = function.__name__
662
+ else:
663
+ msg = f"function must be a string or callable, got {type(function)}"
664
+ raise TypeError(msg)
665
+
666
+ self.stop_before_functions.discard(function_name)
667
+ logger.debug(f"Removed '{function_name}' from stop_before_functions")
668
+
669
+ def clear_stop_before_functions(self) -> None:
670
+ """Clear all function names from the stop_before_functions list."""
671
+ self.stop_before_functions.clear()
672
+ logger.debug("Cleared all stop_before_functions")
673
+
674
+ def get_stop_before_functions(self) -> set[str]:
675
+ """Get the current set of function names that require confirmation.
676
+
677
+ Returns:
678
+ Set of function names
679
+ """
680
+ return self.stop_before_functions.copy()
@@ -26,6 +26,8 @@ from lite_agent.types import (
26
26
  AgentSystemMessage,
27
27
  AgentUserMessage,
28
28
  AssistantMessageMeta,
29
+ AssistantToolCall,
30
+ AssistantToolCallResult,
29
31
  BasicMessageMeta,
30
32
  FlexibleRunnerMessage,
31
33
  LLMResponseMeta,
@@ -228,9 +230,9 @@ def _update_message_counts(message: FlexibleRunnerMessage, counts: dict[str, int
228
230
  counts["Assistant"] += 1
229
231
  # Count tool calls and outputs within the assistant message
230
232
  for content_item in message.content:
231
- if content_item.type == "tool_call":
233
+ if isinstance(content_item, AssistantToolCall):
232
234
  counts["Function Call"] += 1
233
- elif content_item.type == "tool_call_result":
235
+ elif isinstance(content_item, AssistantToolCallResult):
234
236
  counts["Function Output"] += 1
235
237
  elif isinstance(message, NewSystemMessage):
236
238
  counts["System"] += 1
@@ -295,10 +297,18 @@ def _process_object_meta(meta: BasicMessageMeta | LLMResponseMeta | AssistantMes
295
297
  """处理对象类型的 meta 数据。"""
296
298
  # LLMResponseMeta 和 AssistantMessageMeta 都有这些字段
297
299
  if isinstance(meta, (LLMResponseMeta, AssistantMessageMeta)):
298
- if hasattr(meta, "input_tokens") and meta.input_tokens is not None:
299
- total_input += int(meta.input_tokens)
300
- if hasattr(meta, "output_tokens") and meta.output_tokens is not None:
301
- total_output += int(meta.output_tokens)
300
+ # For AssistantMessageMeta, use the structured usage field
301
+ if isinstance(meta, AssistantMessageMeta) and meta.usage is not None:
302
+ if meta.usage.input_tokens is not None:
303
+ total_input += int(meta.usage.input_tokens)
304
+ if meta.usage.output_tokens is not None:
305
+ total_output += int(meta.usage.output_tokens)
306
+ # For LLMResponseMeta, use the flat fields
307
+ elif isinstance(meta, LLMResponseMeta):
308
+ if hasattr(meta, "input_tokens") and meta.input_tokens is not None:
309
+ total_input += int(meta.input_tokens)
310
+ if hasattr(meta, "output_tokens") and meta.output_tokens is not None:
311
+ total_output += int(meta.output_tokens)
302
312
  if hasattr(meta, "latency_ms") and meta.latency_ms is not None:
303
313
  total_latency += int(meta.latency_ms)
304
314
  if hasattr(meta, "output_time_ms") and meta.output_time_ms is not None:
@@ -363,11 +373,9 @@ def display_chat_summary(messages: RunnerMessages, *, console: Console | None =
363
373
  messages: 要汇总的消息列表
364
374
  console: Rich Console 实例,如果为 None 则创建新的
365
375
  """
366
- if console is None:
367
- console = Console()
368
-
376
+ active_console = console or Console()
369
377
  summary_table = build_chat_summary_table(messages)
370
- console.print(summary_table)
378
+ active_console.print(summary_table)
371
379
 
372
380
 
373
381
  def display_messages(
@@ -577,9 +585,9 @@ def _display_assistant_message_compact_v2(message: AgentAssistantMessage, contex
577
585
  meta_parts.append(f"Latency:{message.meta.latency_ms}ms")
578
586
  if message.meta.output_time_ms is not None:
579
587
  meta_parts.append(f"Output:{message.meta.output_time_ms}ms")
580
- if message.meta.input_tokens is not None and message.meta.output_tokens is not None:
581
- total_tokens = message.meta.input_tokens + message.meta.output_tokens
582
- meta_parts.append(f"Tokens:↑{message.meta.input_tokens}↓{message.meta.output_tokens}={total_tokens}")
588
+ if message.meta.usage and message.meta.usage.input_tokens is not None and message.meta.usage.output_tokens is not None:
589
+ total_tokens = message.meta.usage.input_tokens + message.meta.usage.output_tokens
590
+ meta_parts.append(f"Tokens:↑{message.meta.usage.input_tokens}↓{message.meta.usage.output_tokens}={total_tokens}")
583
591
 
584
592
  if meta_parts:
585
593
  meta_info = f" [dim]({' | '.join(meta_parts)})[/dim]"
lite_agent/client.py CHANGED
@@ -100,6 +100,7 @@ class BaseLLMClient(abc.ABC):
100
100
  tools: list[ChatCompletionToolParam] | None = None,
101
101
  tool_choice: str = "auto",
102
102
  reasoning: ReasoningConfig = None,
103
+ *,
103
104
  streaming: bool = True,
104
105
  **kwargs: Any, # noqa: ANN401
105
106
  ) -> Any: # noqa: ANN401
@@ -112,6 +113,7 @@ class BaseLLMClient(abc.ABC):
112
113
  tools: list[FunctionToolParam] | None = None,
113
114
  tool_choice: Literal["none", "auto", "required"] = "auto",
114
115
  reasoning: ReasoningConfig = None,
116
+ *,
115
117
  streaming: bool = True,
116
118
  **kwargs: Any, # noqa: ANN401
117
119
  ) -> Any: # noqa: ANN401
@@ -136,6 +138,7 @@ class LiteLLMClient(BaseLLMClient):
136
138
  tools: list[ChatCompletionToolParam] | None = None,
137
139
  tool_choice: str = "auto",
138
140
  reasoning: ReasoningConfig = None,
141
+ *,
139
142
  streaming: bool = True,
140
143
  **kwargs: Any, # noqa: ANN401
141
144
  ) -> Any: # noqa: ANN401
@@ -187,6 +190,7 @@ class LiteLLMClient(BaseLLMClient):
187
190
  tools: list[FunctionToolParam] | None = None,
188
191
  tool_choice: Literal["none", "auto", "required"] = "auto",
189
192
  reasoning: ReasoningConfig = None,
193
+ *,
190
194
  streaming: bool = True,
191
195
  **kwargs: Any, # noqa: ANN401
192
196
  ) -> Any: # type: ignore[return] # noqa: ANN401
@@ -0,0 +1,30 @@
1
+ from typing import Literal
2
+
3
+
4
+ class CompletionMode:
5
+ """Agent completion modes."""
6
+
7
+ STOP: Literal["stop"] = "stop" # Traditional completion until model decides to stop
8
+ CALL: Literal["call"] = "call" # Completion until specific tool is called
9
+
10
+
11
+ class ToolName:
12
+ """System tool names."""
13
+
14
+ TRANSFER_TO_AGENT = "transfer_to_agent"
15
+ TRANSFER_TO_PARENT = "transfer_to_parent"
16
+ WAIT_FOR_USER = "wait_for_user"
17
+
18
+
19
+ class StreamIncludes:
20
+ """Default stream includes configuration."""
21
+
22
+ DEFAULT_INCLUDES = (
23
+ "completion_raw",
24
+ "usage",
25
+ "function_call",
26
+ "function_call_output",
27
+ "content_delta",
28
+ "function_call_delta",
29
+ "assistant_message",
30
+ )
@@ -5,7 +5,7 @@ This module provides common message transfer functions that can be used
5
5
  with agents to preprocess messages before sending them to the API.
6
6
  """
7
7
 
8
- from lite_agent.types import RunnerMessages
8
+ from lite_agent.types import NewUserMessage, RunnerMessages, UserTextContent
9
9
 
10
10
 
11
11
  def consolidate_history_transfer(messages: RunnerMessages) -> RunnerMessages:
@@ -43,8 +43,8 @@ def consolidate_history_transfer(messages: RunnerMessages) -> RunnerMessages:
43
43
  # Create the consolidated message
44
44
  consolidated_content = "以下是目前发生的所有交互:\n\n" + "\n".join(xml_content) + "\n\n接下来该做什么?"
45
45
 
46
- # Return a single user message
47
- return [{"role": "user", "content": consolidated_content}]
46
+ # Return a single user message using NewMessage format
47
+ return [NewUserMessage(content=[UserTextContent(text=consolidated_content)])]
48
48
 
49
49
 
50
50
  def _process_message_to_xml(message: dict | object) -> list[str]:
@@ -26,6 +26,7 @@ from lite_agent.types import (
26
26
  ToolCallFunction,
27
27
  UsageEvent,
28
28
  )
29
+ from lite_agent.utils.metrics import TimingMetrics
29
30
 
30
31
 
31
32
  class CompletionEventProcessor:
@@ -71,21 +72,18 @@ class CompletionEventProcessor:
71
72
  if not self.yielded_content:
72
73
  self.yielded_content = True
73
74
  end_time = datetime.now(timezone.utc)
74
- latency_ms = None
75
- output_time_ms = None
76
- # latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
77
- if self._start_time and self._first_output_time:
78
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
79
- # output_time_ms: 从输出第一个字符到输出完成的时间差
80
- if self._first_output_time and self._output_complete_time:
81
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
75
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
76
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
82
77
 
83
78
  usage = MessageUsage(
84
79
  input_tokens=self._usage_data.get("input_tokens"),
85
80
  output_tokens=self._usage_data.get("output_tokens"),
86
81
  )
82
+ # Extract model information from chunk
83
+ model_name = getattr(chunk, "model", None)
87
84
  meta = AssistantMessageMeta(
88
85
  sent_at=end_time,
86
+ model=model_name,
89
87
  latency_ms=latency_ms,
90
88
  total_time_ms=output_time_ms,
91
89
  usage=usage,
@@ -152,21 +150,18 @@ class CompletionEventProcessor:
152
150
  if not self.yielded_content:
153
151
  self.yielded_content = True
154
152
  end_time = datetime.now(timezone.utc)
155
- latency_ms = None
156
- output_time_ms = None
157
- # latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
158
- if self._start_time and self._first_output_time:
159
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
160
- # output_time_ms: 从输出第一个字符到输出完成的时间差
161
- if self._first_output_time and self._output_complete_time:
162
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
153
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
154
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
163
155
 
164
156
  usage = MessageUsage(
165
157
  input_tokens=self._usage_data.get("input_tokens"),
166
158
  output_tokens=self._usage_data.get("output_tokens"),
167
159
  )
160
+ # Extract model information from chunk
161
+ model_name = getattr(chunk, "model", None)
168
162
  meta = AssistantMessageMeta(
169
163
  sent_at=end_time,
164
+ model=model_name,
170
165
  latency_ms=latency_ms,
171
166
  total_time_ms=output_time_ms,
172
167
  usage=usage,
@@ -199,10 +194,9 @@ class CompletionEventProcessor:
199
194
  results.append(UsageEvent(usage=EventUsage(input_tokens=usage["prompt_tokens"], output_tokens=usage["completion_tokens"])))
200
195
 
201
196
  # Then yield timing event if we have timing data
202
- if self._start_time and self._first_output_time and self._output_complete_time:
203
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
204
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
205
-
197
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
198
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
199
+ if latency_ms is not None and output_time_ms is not None:
206
200
  results.append(
207
201
  TimingEvent(
208
202
  timing=Timing(
@@ -22,12 +22,14 @@ from lite_agent.types import (
22
22
  ContentDeltaEvent,
23
23
  EventUsage,
24
24
  FunctionCallEvent,
25
+ MessageUsage,
25
26
  NewAssistantMessage,
26
27
  ResponseRawEvent,
27
28
  Timing,
28
29
  TimingEvent,
29
30
  UsageEvent,
30
31
  )
32
+ from lite_agent.utils.metrics import TimingMetrics
31
33
 
32
34
 
33
35
  class ResponseEventProcessor:
@@ -111,21 +113,26 @@ class ResponseEventProcessor:
111
113
  content = item.get("content", [])
112
114
  if content and isinstance(content, list) and len(content) > 0:
113
115
  end_time = datetime.now(timezone.utc)
114
- latency_ms = None
115
- output_time_ms = None
116
- # latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
117
- if self._start_time and self._first_output_time:
118
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
119
- # output_time_ms: 从输出第一个字符到输出完成的时间差
120
- if self._first_output_time and self._output_complete_time:
121
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
122
-
116
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
117
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
118
+
119
+ # Extract model information from event
120
+ model_name = getattr(event, "model", None)
121
+ # Debug: check if event has model info in different location
122
+ if hasattr(event, "response") and hasattr(event.response, "model"):
123
+ model_name = getattr(event.response, "model", None)
124
+ # Create usage information
125
+ usage = MessageUsage(
126
+ input_tokens=self._usage_data.get("input_tokens"),
127
+ output_tokens=self._usage_data.get("output_tokens"),
128
+ total_tokens=(self._usage_data.get("input_tokens") or 0) + (self._usage_data.get("output_tokens") or 0),
129
+ )
123
130
  meta = AssistantMessageMeta(
124
131
  sent_at=end_time,
132
+ model=model_name,
125
133
  latency_ms=latency_ms,
126
134
  output_time_ms=output_time_ms,
127
- input_tokens=self._usage_data.get("input_tokens"),
128
- output_tokens=self._usage_data.get("output_tokens"),
135
+ usage=usage,
129
136
  )
130
137
  return [
131
138
  AssistantMessageEvent(
@@ -173,10 +180,9 @@ class ResponseEventProcessor:
173
180
  )
174
181
 
175
182
  # Then yield timing event if we have timing data
176
- if self._start_time and self._first_output_time and self._output_complete_time:
177
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
178
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
179
-
183
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
184
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
185
+ if latency_ms is not None and output_time_ms is not None:
180
186
  results.append(
181
187
  TimingEvent(
182
188
  timing=Timing(
@@ -1,4 +1,5 @@
1
1
  """Response handlers for unified streaming and non-streaming processing."""
2
+
2
3
  from lite_agent.response_handlers.base import ResponseHandler
3
4
  from lite_agent.response_handlers.completion import CompletionResponseHandler
4
5
  from lite_agent.response_handlers.responses import ResponsesAPIHandler