lite-agent 0.6.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lite-agent might be problematic. Click here for more details.

@@ -22,12 +22,14 @@ from lite_agent.types import (
22
22
  ContentDeltaEvent,
23
23
  EventUsage,
24
24
  FunctionCallEvent,
25
+ MessageUsage,
25
26
  NewAssistantMessage,
26
27
  ResponseRawEvent,
27
28
  Timing,
28
29
  TimingEvent,
29
30
  UsageEvent,
30
31
  )
32
+ from lite_agent.utils.metrics import TimingMetrics
31
33
 
32
34
 
33
35
  class ResponseEventProcessor:
@@ -111,21 +113,28 @@ class ResponseEventProcessor:
111
113
  content = item.get("content", [])
112
114
  if content and isinstance(content, list) and len(content) > 0:
113
115
  end_time = datetime.now(timezone.utc)
114
- latency_ms = None
115
- output_time_ms = None
116
- # latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
117
- if self._start_time and self._first_output_time:
118
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
119
- # output_time_ms: 从输出第一个字符到输出完成的时间差
120
- if self._first_output_time and self._output_complete_time:
121
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
122
-
116
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
117
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
118
+
119
+ # Extract model information from event
120
+ model_name = getattr(event, "model", None)
121
+ # Debug: check if event has model info in different location
122
+ if hasattr(event, "response"):
123
+ response = getattr(event, "response", None)
124
+ if response and hasattr(response, "model"):
125
+ model_name = getattr(response, "model", None)
126
+ # Create usage information
127
+ usage = MessageUsage(
128
+ input_tokens=self._usage_data.get("input_tokens"),
129
+ output_tokens=self._usage_data.get("output_tokens"),
130
+ total_tokens=(self._usage_data.get("input_tokens") or 0) + (self._usage_data.get("output_tokens") or 0),
131
+ )
123
132
  meta = AssistantMessageMeta(
124
133
  sent_at=end_time,
134
+ model=model_name,
125
135
  latency_ms=latency_ms,
126
136
  output_time_ms=output_time_ms,
127
- input_tokens=self._usage_data.get("input_tokens"),
128
- output_tokens=self._usage_data.get("output_tokens"),
137
+ usage=usage,
129
138
  )
130
139
  return [
131
140
  AssistantMessageEvent(
@@ -173,10 +182,9 @@ class ResponseEventProcessor:
173
182
  )
174
183
 
175
184
  # Then yield timing event if we have timing data
176
- if self._start_time and self._first_output_time and self._output_complete_time:
177
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
178
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
179
-
185
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
186
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
187
+ if latency_ms is not None and output_time_ms is not None:
180
188
  results.append(
181
189
  TimingEvent(
182
190
  timing=Timing(
@@ -1,4 +1,5 @@
1
1
  """Response handlers for unified streaming and non-streaming processing."""
2
+
2
3
  from lite_agent.response_handlers.base import ResponseHandler
3
4
  from lite_agent.response_handlers.completion import CompletionResponseHandler
4
5
  from lite_agent.response_handlers.responses import ResponsesAPIHandler
@@ -1,4 +1,5 @@
1
1
  """Base response handler for unified streaming and non-streaming response processing."""
2
+
2
3
  from abc import ABC, abstractmethod
3
4
  from collections.abc import AsyncGenerator
4
5
  from pathlib import Path
@@ -12,35 +13,42 @@ class ResponseHandler(ABC):
12
13
 
13
14
  async def handle(
14
15
  self,
15
- response: Any,
16
+ response: Any, # noqa: ANN401
17
+ *,
16
18
  streaming: bool,
17
19
  record_to: Path | None = None,
18
20
  ) -> AsyncGenerator[AgentChunk, None]:
19
21
  """Handle a response in either streaming or non-streaming mode.
20
-
22
+
21
23
  Args:
22
24
  response: The LLM response object
23
25
  streaming: Whether to process as streaming or non-streaming
24
26
  record_to: Optional file path to record the conversation
25
-
27
+
26
28
  Yields:
27
29
  AgentChunk: Processed chunks from the response
28
30
  """
29
31
  if streaming:
30
- async for chunk in self._handle_streaming(response, record_to):
32
+ stream = self._handle_streaming(response, record_to)
33
+ async for chunk in stream:
31
34
  yield chunk
32
35
  else:
33
- async for chunk in self._handle_non_streaming(response, record_to):
36
+ stream = self._handle_non_streaming(response, record_to)
37
+ async for chunk in stream:
34
38
  yield chunk
35
39
 
36
40
  @abstractmethod
37
- async def _handle_streaming(
38
- self, response: Any, record_to: Path | None = None,
41
+ def _handle_streaming(
42
+ self,
43
+ response: Any, # noqa: ANN401
44
+ record_to: Path | None = None,
39
45
  ) -> AsyncGenerator[AgentChunk, None]:
40
46
  """Handle streaming response."""
41
47
 
42
48
  @abstractmethod
43
- async def _handle_non_streaming(
44
- self, response: Any, record_to: Path | None = None,
49
+ def _handle_non_streaming(
50
+ self,
51
+ response: Any, # noqa: ANN401
52
+ record_to: Path | None = None,
45
53
  ) -> AsyncGenerator[AgentChunk, None]:
46
54
  """Handle non-streaming response."""
@@ -1,4 +1,5 @@
1
1
  """Completion API response handler."""
2
+
2
3
  from collections.abc import AsyncGenerator
3
4
  from datetime import datetime, timezone
4
5
  from pathlib import Path
@@ -9,15 +10,17 @@ from litellm import CustomStreamWrapper
9
10
  from lite_agent.response_handlers.base import ResponseHandler
10
11
  from lite_agent.stream_handlers import litellm_completion_stream_handler
11
12
  from lite_agent.types import AgentChunk
12
- from lite_agent.types.events import AssistantMessageEvent
13
- from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, NewAssistantMessage
13
+ from lite_agent.types.events import AssistantMessageEvent, Usage, UsageEvent
14
+ from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, AssistantToolCall, NewAssistantMessage
14
15
 
15
16
 
16
17
  class CompletionResponseHandler(ResponseHandler):
17
18
  """Handler for Completion API responses."""
18
19
 
19
20
  async def _handle_streaming(
20
- self, response: Any, record_to: Path | None = None,
21
+ self,
22
+ response: Any, # noqa: ANN401
23
+ record_to: Path | None = None,
21
24
  ) -> AsyncGenerator[AgentChunk, None]:
22
25
  """Handle streaming completion response."""
23
26
  if isinstance(response, CustomStreamWrapper):
@@ -28,7 +31,9 @@ class CompletionResponseHandler(ResponseHandler):
28
31
  raise TypeError(msg)
29
32
 
30
33
  async def _handle_non_streaming(
31
- self, response: Any, record_to: Path | None = None,
34
+ self,
35
+ response: Any, # noqa: ANN401
36
+ record_to: Path | None = None, # noqa: ARG002
32
37
  ) -> AsyncGenerator[AgentChunk, None]:
33
38
  """Handle non-streaming completion response."""
34
39
  # Convert completion response to chunks
@@ -40,11 +45,34 @@ class CompletionResponseHandler(ResponseHandler):
40
45
  if choice.message and choice.message.content:
41
46
  content_items.append(AssistantTextContent(text=choice.message.content))
42
47
 
43
- # TODO: Handle tool calls in the future
48
+ # Handle tool calls
49
+ if choice.message and choice.message.tool_calls:
50
+ for tool_call in choice.message.tool_calls:
51
+ content_items.append( # noqa: PERF401
52
+ AssistantToolCall(
53
+ call_id=tool_call.id,
54
+ name=tool_call.function.name,
55
+ arguments=tool_call.function.arguments,
56
+ ),
57
+ )
44
58
 
45
- if content_items:
59
+ # Always yield assistant message, even if content is empty for tool calls
60
+ if choice.message and (content_items or choice.message.tool_calls):
61
+ # Extract model information from response
62
+ model_name = getattr(response, "model", None)
46
63
  message = NewAssistantMessage(
47
64
  content=content_items,
48
- meta=AssistantMessageMeta(sent_at=datetime.now(timezone.utc)),
65
+ meta=AssistantMessageMeta(
66
+ sent_at=datetime.now(timezone.utc),
67
+ model=model_name,
68
+ ),
49
69
  )
50
70
  yield AssistantMessageEvent(message=message)
71
+
72
+ # Yield usage information if available
73
+ if hasattr(response, "usage") and response.usage:
74
+ usage = Usage(
75
+ input_tokens=response.usage.prompt_tokens,
76
+ output_tokens=response.usage.completion_tokens,
77
+ )
78
+ yield UsageEvent(usage=usage)
@@ -1,4 +1,5 @@
1
1
  """Responses API response handler."""
2
+
2
3
  from collections.abc import AsyncGenerator
3
4
  from datetime import datetime, timezone
4
5
  from pathlib import Path
@@ -7,36 +8,69 @@ from typing import Any
7
8
  from lite_agent.response_handlers.base import ResponseHandler
8
9
  from lite_agent.stream_handlers import litellm_response_stream_handler
9
10
  from lite_agent.types import AgentChunk
10
- from lite_agent.types.events import AssistantMessageEvent
11
- from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, NewAssistantMessage
11
+ from lite_agent.types.events import AssistantMessageEvent, Usage, UsageEvent
12
+ from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, AssistantToolCall, NewAssistantMessage
12
13
 
13
14
 
14
15
  class ResponsesAPIHandler(ResponseHandler):
15
16
  """Handler for Responses API responses."""
16
17
 
17
18
  async def _handle_streaming(
18
- self, response: Any, record_to: Path | None = None,
19
+ self,
20
+ response: Any, # noqa: ANN401
21
+ record_to: Path | None = None,
19
22
  ) -> AsyncGenerator[AgentChunk, None]:
20
23
  """Handle streaming responses API response."""
21
24
  async for chunk in litellm_response_stream_handler(response, record_to):
22
25
  yield chunk
23
26
 
24
27
  async def _handle_non_streaming(
25
- self, response: Any, record_to: Path | None = None,
28
+ self,
29
+ response: Any, # noqa: ANN401
30
+ record_to: Path | None = None, # noqa: ARG002
26
31
  ) -> AsyncGenerator[AgentChunk, None]:
27
32
  """Handle non-streaming responses API response."""
28
33
  # Convert ResponsesAPIResponse to chunks
29
34
  if hasattr(response, "output") and response.output:
30
- for output_message in response.output:
31
- if hasattr(output_message, "content") and output_message.content:
35
+ content_items = []
36
+
37
+ for output_item in response.output:
38
+ # Handle function tool calls
39
+ if hasattr(output_item, "type") and output_item.type == "function_call":
40
+ content_items.append(
41
+ AssistantToolCall(
42
+ call_id=output_item.call_id,
43
+ name=output_item.name,
44
+ arguments=output_item.arguments,
45
+ ),
46
+ )
47
+ # Handle text content (if exists)
48
+ elif hasattr(output_item, "content") and output_item.content:
32
49
  content_text = ""
33
- for content_item in output_message.content:
50
+ for content_item in output_item.content:
34
51
  if hasattr(content_item, "text"):
35
52
  content_text += content_item.text
36
53
 
37
54
  if content_text:
38
- message = NewAssistantMessage(
39
- content=[AssistantTextContent(text=content_text)],
40
- meta=AssistantMessageMeta(sent_at=datetime.now(timezone.utc)),
41
- )
42
- yield AssistantMessageEvent(message=message)
55
+ content_items.append(AssistantTextContent(text=content_text))
56
+
57
+ # Create assistant message if we have any content
58
+ if content_items:
59
+ # Extract model information from response
60
+ model_name = getattr(response, "model", None)
61
+ message = NewAssistantMessage(
62
+ content=content_items,
63
+ meta=AssistantMessageMeta(
64
+ sent_at=datetime.now(timezone.utc),
65
+ model=model_name,
66
+ ),
67
+ )
68
+ yield AssistantMessageEvent(message=message)
69
+
70
+ # Yield usage information if available
71
+ if hasattr(response, "usage") and response.usage:
72
+ usage = Usage(
73
+ input_tokens=response.usage.input_tokens,
74
+ output_tokens=response.usage.output_tokens,
75
+ )
76
+ yield UsageEvent(usage=usage)