lite-agent 0.5.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lite-agent might be problematic. Click here for more details.

@@ -26,6 +26,7 @@ from lite_agent.types import (
26
26
  ToolCallFunction,
27
27
  UsageEvent,
28
28
  )
29
+ from lite_agent.utils.metrics import TimingMetrics
29
30
 
30
31
 
31
32
  class CompletionEventProcessor:
@@ -71,21 +72,18 @@ class CompletionEventProcessor:
71
72
  if not self.yielded_content:
72
73
  self.yielded_content = True
73
74
  end_time = datetime.now(timezone.utc)
74
- latency_ms = None
75
- output_time_ms = None
76
- # latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
77
- if self._start_time and self._first_output_time:
78
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
79
- # output_time_ms: 从输出第一个字符到输出完成的时间差
80
- if self._first_output_time and self._output_complete_time:
81
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
75
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
76
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
82
77
 
83
78
  usage = MessageUsage(
84
79
  input_tokens=self._usage_data.get("input_tokens"),
85
80
  output_tokens=self._usage_data.get("output_tokens"),
86
81
  )
82
+ # Extract model information from chunk
83
+ model_name = getattr(chunk, "model", None)
87
84
  meta = AssistantMessageMeta(
88
85
  sent_at=end_time,
86
+ model=model_name,
89
87
  latency_ms=latency_ms,
90
88
  total_time_ms=output_time_ms,
91
89
  usage=usage,
@@ -152,21 +150,18 @@ class CompletionEventProcessor:
152
150
  if not self.yielded_content:
153
151
  self.yielded_content = True
154
152
  end_time = datetime.now(timezone.utc)
155
- latency_ms = None
156
- output_time_ms = None
157
- # latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
158
- if self._start_time and self._first_output_time:
159
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
160
- # output_time_ms: 从输出第一个字符到输出完成的时间差
161
- if self._first_output_time and self._output_complete_time:
162
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
153
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
154
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
163
155
 
164
156
  usage = MessageUsage(
165
157
  input_tokens=self._usage_data.get("input_tokens"),
166
158
  output_tokens=self._usage_data.get("output_tokens"),
167
159
  )
160
+ # Extract model information from chunk
161
+ model_name = getattr(chunk, "model", None)
168
162
  meta = AssistantMessageMeta(
169
163
  sent_at=end_time,
164
+ model=model_name,
170
165
  latency_ms=latency_ms,
171
166
  total_time_ms=output_time_ms,
172
167
  usage=usage,
@@ -199,10 +194,9 @@ class CompletionEventProcessor:
199
194
  results.append(UsageEvent(usage=EventUsage(input_tokens=usage["prompt_tokens"], output_tokens=usage["completion_tokens"])))
200
195
 
201
196
  # Then yield timing event if we have timing data
202
- if self._start_time and self._first_output_time and self._output_complete_time:
203
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
204
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
205
-
197
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
198
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
199
+ if latency_ms is not None and output_time_ms is not None:
206
200
  results.append(
207
201
  TimingEvent(
208
202
  timing=Timing(
@@ -22,12 +22,14 @@ from lite_agent.types import (
22
22
  ContentDeltaEvent,
23
23
  EventUsage,
24
24
  FunctionCallEvent,
25
+ MessageUsage,
25
26
  NewAssistantMessage,
26
27
  ResponseRawEvent,
27
28
  Timing,
28
29
  TimingEvent,
29
30
  UsageEvent,
30
31
  )
32
+ from lite_agent.utils.metrics import TimingMetrics
31
33
 
32
34
 
33
35
  class ResponseEventProcessor:
@@ -111,21 +113,26 @@ class ResponseEventProcessor:
111
113
  content = item.get("content", [])
112
114
  if content and isinstance(content, list) and len(content) > 0:
113
115
  end_time = datetime.now(timezone.utc)
114
- latency_ms = None
115
- output_time_ms = None
116
- # latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
117
- if self._start_time and self._first_output_time:
118
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
119
- # output_time_ms: 从输出第一个字符到输出完成的时间差
120
- if self._first_output_time and self._output_complete_time:
121
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
122
-
116
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
117
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
118
+
119
+ # Extract model information from event
120
+ model_name = getattr(event, "model", None)
121
+ # Debug: check if event has model info in different location
122
+ if hasattr(event, "response") and hasattr(event.response, "model"):
123
+ model_name = getattr(event.response, "model", None)
124
+ # Create usage information
125
+ usage = MessageUsage(
126
+ input_tokens=self._usage_data.get("input_tokens"),
127
+ output_tokens=self._usage_data.get("output_tokens"),
128
+ total_tokens=(self._usage_data.get("input_tokens") or 0) + (self._usage_data.get("output_tokens") or 0),
129
+ )
123
130
  meta = AssistantMessageMeta(
124
131
  sent_at=end_time,
132
+ model=model_name,
125
133
  latency_ms=latency_ms,
126
134
  output_time_ms=output_time_ms,
127
- input_tokens=self._usage_data.get("input_tokens"),
128
- output_tokens=self._usage_data.get("output_tokens"),
135
+ usage=usage,
129
136
  )
130
137
  return [
131
138
  AssistantMessageEvent(
@@ -173,10 +180,9 @@ class ResponseEventProcessor:
173
180
  )
174
181
 
175
182
  # Then yield timing event if we have timing data
176
- if self._start_time and self._first_output_time and self._output_complete_time:
177
- latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
178
- output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
179
-
183
+ latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
184
+ output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
185
+ if latency_ms is not None and output_time_ms is not None:
180
186
  results.append(
181
187
  TimingEvent(
182
188
  timing=Timing(
@@ -0,0 +1,11 @@
1
+ """Response handlers for unified streaming and non-streaming processing."""
2
+
3
+ from lite_agent.response_handlers.base import ResponseHandler
4
+ from lite_agent.response_handlers.completion import CompletionResponseHandler
5
+ from lite_agent.response_handlers.responses import ResponsesAPIHandler
6
+
7
+ __all__ = [
8
+ "CompletionResponseHandler",
9
+ "ResponseHandler",
10
+ "ResponsesAPIHandler",
11
+ ]
@@ -0,0 +1,54 @@
1
+ """Base response handler for unified streaming and non-streaming response processing."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import AsyncGenerator
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from lite_agent.types import AgentChunk
9
+
10
+
11
+ class ResponseHandler(ABC):
12
+ """Base class for handling both streaming and non-streaming responses."""
13
+
14
+ async def handle(
15
+ self,
16
+ response: Any, # noqa: ANN401
17
+ *,
18
+ streaming: bool,
19
+ record_to: Path | None = None,
20
+ ) -> AsyncGenerator[AgentChunk, None]:
21
+ """Handle a response in either streaming or non-streaming mode.
22
+
23
+ Args:
24
+ response: The LLM response object
25
+ streaming: Whether to process as streaming or non-streaming
26
+ record_to: Optional file path to record the conversation
27
+
28
+ Yields:
29
+ AgentChunk: Processed chunks from the response
30
+ """
31
+ if streaming:
32
+ stream = self._handle_streaming(response, record_to)
33
+ async for chunk in stream:
34
+ yield chunk
35
+ else:
36
+ stream = self._handle_non_streaming(response, record_to)
37
+ async for chunk in stream:
38
+ yield chunk
39
+
40
+ @abstractmethod
41
+ def _handle_streaming(
42
+ self,
43
+ response: Any, # noqa: ANN401
44
+ record_to: Path | None = None,
45
+ ) -> AsyncGenerator[AgentChunk, None]:
46
+ """Handle streaming response."""
47
+
48
+ @abstractmethod
49
+ def _handle_non_streaming(
50
+ self,
51
+ response: Any, # noqa: ANN401
52
+ record_to: Path | None = None,
53
+ ) -> AsyncGenerator[AgentChunk, None]:
54
+ """Handle non-streaming response."""
@@ -0,0 +1,78 @@
1
+ """Completion API response handler."""
2
+
3
+ from collections.abc import AsyncGenerator
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from litellm import CustomStreamWrapper
9
+
10
+ from lite_agent.response_handlers.base import ResponseHandler
11
+ from lite_agent.stream_handlers import litellm_completion_stream_handler
12
+ from lite_agent.types import AgentChunk
13
+ from lite_agent.types.events import AssistantMessageEvent, Usage, UsageEvent
14
+ from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, AssistantToolCall, NewAssistantMessage
15
+
16
+
17
+ class CompletionResponseHandler(ResponseHandler):
18
+ """Handler for Completion API responses."""
19
+
20
+ async def _handle_streaming(
21
+ self,
22
+ response: Any, # noqa: ANN401
23
+ record_to: Path | None = None,
24
+ ) -> AsyncGenerator[AgentChunk, None]:
25
+ """Handle streaming completion response."""
26
+ if isinstance(response, CustomStreamWrapper):
27
+ async for chunk in litellm_completion_stream_handler(response, record_to):
28
+ yield chunk
29
+ else:
30
+ msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
31
+ raise TypeError(msg)
32
+
33
+ async def _handle_non_streaming(
34
+ self,
35
+ response: Any, # noqa: ANN401
36
+ record_to: Path | None = None, # noqa: ARG002
37
+ ) -> AsyncGenerator[AgentChunk, None]:
38
+ """Handle non-streaming completion response."""
39
+ # Convert completion response to chunks
40
+ if hasattr(response, "choices") and response.choices:
41
+ choice = response.choices[0]
42
+ content_items = []
43
+
44
+ # Add text content
45
+ if choice.message and choice.message.content:
46
+ content_items.append(AssistantTextContent(text=choice.message.content))
47
+
48
+ # Handle tool calls
49
+ if choice.message and choice.message.tool_calls:
50
+ for tool_call in choice.message.tool_calls:
51
+ content_items.append( # noqa: PERF401
52
+ AssistantToolCall(
53
+ call_id=tool_call.id,
54
+ name=tool_call.function.name,
55
+ arguments=tool_call.function.arguments,
56
+ ),
57
+ )
58
+
59
+ # Always yield assistant message, even if content is empty for tool calls
60
+ if choice.message and (content_items or choice.message.tool_calls):
61
+ # Extract model information from response
62
+ model_name = getattr(response, "model", None)
63
+ message = NewAssistantMessage(
64
+ content=content_items,
65
+ meta=AssistantMessageMeta(
66
+ sent_at=datetime.now(timezone.utc),
67
+ model=model_name,
68
+ ),
69
+ )
70
+ yield AssistantMessageEvent(message=message)
71
+
72
+ # Yield usage information if available
73
+ if hasattr(response, "usage") and response.usage:
74
+ usage = Usage(
75
+ input_tokens=response.usage.prompt_tokens,
76
+ output_tokens=response.usage.completion_tokens,
77
+ )
78
+ yield UsageEvent(usage=usage)
@@ -0,0 +1,76 @@
1
+ """Responses API response handler."""
2
+
3
+ from collections.abc import AsyncGenerator
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from lite_agent.response_handlers.base import ResponseHandler
9
+ from lite_agent.stream_handlers import litellm_response_stream_handler
10
+ from lite_agent.types import AgentChunk
11
+ from lite_agent.types.events import AssistantMessageEvent, Usage, UsageEvent
12
+ from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, AssistantToolCall, NewAssistantMessage
13
+
14
+
15
+ class ResponsesAPIHandler(ResponseHandler):
16
+ """Handler for Responses API responses."""
17
+
18
+ async def _handle_streaming(
19
+ self,
20
+ response: Any, # noqa: ANN401
21
+ record_to: Path | None = None,
22
+ ) -> AsyncGenerator[AgentChunk, None]:
23
+ """Handle streaming responses API response."""
24
+ async for chunk in litellm_response_stream_handler(response, record_to):
25
+ yield chunk
26
+
27
+ async def _handle_non_streaming(
28
+ self,
29
+ response: Any, # noqa: ANN401
30
+ record_to: Path | None = None, # noqa: ARG002
31
+ ) -> AsyncGenerator[AgentChunk, None]:
32
+ """Handle non-streaming responses API response."""
33
+ # Convert ResponsesAPIResponse to chunks
34
+ if hasattr(response, "output") and response.output:
35
+ content_items = []
36
+
37
+ for output_item in response.output:
38
+ # Handle function tool calls
39
+ if hasattr(output_item, "type") and output_item.type == "function_call":
40
+ content_items.append(
41
+ AssistantToolCall(
42
+ call_id=output_item.call_id,
43
+ name=output_item.name,
44
+ arguments=output_item.arguments,
45
+ ),
46
+ )
47
+ # Handle text content (if exists)
48
+ elif hasattr(output_item, "content") and output_item.content:
49
+ content_text = ""
50
+ for content_item in output_item.content:
51
+ if hasattr(content_item, "text"):
52
+ content_text += content_item.text
53
+
54
+ if content_text:
55
+ content_items.append(AssistantTextContent(text=content_text))
56
+
57
+ # Create assistant message if we have any content
58
+ if content_items:
59
+ # Extract model information from response
60
+ model_name = getattr(response, "model", None)
61
+ message = NewAssistantMessage(
62
+ content=content_items,
63
+ meta=AssistantMessageMeta(
64
+ sent_at=datetime.now(timezone.utc),
65
+ model=model_name,
66
+ ),
67
+ )
68
+ yield AssistantMessageEvent(message=message)
69
+
70
+ # Yield usage information if available
71
+ if hasattr(response, "usage") and response.usage:
72
+ usage = Usage(
73
+ input_tokens=response.usage.input_tokens,
74
+ output_tokens=response.usage.output_tokens,
75
+ )
76
+ yield UsageEvent(usage=usage)