lite-agent 0.5.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lite-agent might be problematic. Click here for more details.
- lite_agent/agent.py +188 -49
- lite_agent/chat_display.py +22 -14
- lite_agent/client.py +65 -3
- lite_agent/constants.py +30 -0
- lite_agent/message_transfers.py +3 -3
- lite_agent/processors/completion_event_processor.py +14 -20
- lite_agent/processors/response_event_processor.py +21 -15
- lite_agent/response_handlers/__init__.py +11 -0
- lite_agent/response_handlers/base.py +54 -0
- lite_agent/response_handlers/completion.py +78 -0
- lite_agent/response_handlers/responses.py +76 -0
- lite_agent/runner.py +312 -247
- lite_agent/types/__init__.py +2 -0
- lite_agent/types/messages.py +6 -5
- lite_agent/utils/__init__.py +0 -0
- lite_agent/utils/message_builder.py +211 -0
- lite_agent/utils/metrics.py +50 -0
- {lite_agent-0.5.0.dist-info → lite_agent-0.8.0.dist-info}/METADATA +2 -1
- lite_agent-0.8.0.dist-info/RECORD +31 -0
- lite_agent-0.5.0.dist-info/RECORD +0 -23
- {lite_agent-0.5.0.dist-info → lite_agent-0.8.0.dist-info}/WHEEL +0 -0
|
@@ -26,6 +26,7 @@ from lite_agent.types import (
|
|
|
26
26
|
ToolCallFunction,
|
|
27
27
|
UsageEvent,
|
|
28
28
|
)
|
|
29
|
+
from lite_agent.utils.metrics import TimingMetrics
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
class CompletionEventProcessor:
|
|
@@ -71,21 +72,18 @@ class CompletionEventProcessor:
|
|
|
71
72
|
if not self.yielded_content:
|
|
72
73
|
self.yielded_content = True
|
|
73
74
|
end_time = datetime.now(timezone.utc)
|
|
74
|
-
latency_ms =
|
|
75
|
-
output_time_ms =
|
|
76
|
-
# latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
|
|
77
|
-
if self._start_time and self._first_output_time:
|
|
78
|
-
latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
|
|
79
|
-
# output_time_ms: 从输出第一个字符到输出完成的时间差
|
|
80
|
-
if self._first_output_time and self._output_complete_time:
|
|
81
|
-
output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
|
|
75
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
76
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
82
77
|
|
|
83
78
|
usage = MessageUsage(
|
|
84
79
|
input_tokens=self._usage_data.get("input_tokens"),
|
|
85
80
|
output_tokens=self._usage_data.get("output_tokens"),
|
|
86
81
|
)
|
|
82
|
+
# Extract model information from chunk
|
|
83
|
+
model_name = getattr(chunk, "model", None)
|
|
87
84
|
meta = AssistantMessageMeta(
|
|
88
85
|
sent_at=end_time,
|
|
86
|
+
model=model_name,
|
|
89
87
|
latency_ms=latency_ms,
|
|
90
88
|
total_time_ms=output_time_ms,
|
|
91
89
|
usage=usage,
|
|
@@ -152,21 +150,18 @@ class CompletionEventProcessor:
|
|
|
152
150
|
if not self.yielded_content:
|
|
153
151
|
self.yielded_content = True
|
|
154
152
|
end_time = datetime.now(timezone.utc)
|
|
155
|
-
latency_ms =
|
|
156
|
-
output_time_ms =
|
|
157
|
-
# latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
|
|
158
|
-
if self._start_time and self._first_output_time:
|
|
159
|
-
latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
|
|
160
|
-
# output_time_ms: 从输出第一个字符到输出完成的时间差
|
|
161
|
-
if self._first_output_time and self._output_complete_time:
|
|
162
|
-
output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
|
|
153
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
154
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
163
155
|
|
|
164
156
|
usage = MessageUsage(
|
|
165
157
|
input_tokens=self._usage_data.get("input_tokens"),
|
|
166
158
|
output_tokens=self._usage_data.get("output_tokens"),
|
|
167
159
|
)
|
|
160
|
+
# Extract model information from chunk
|
|
161
|
+
model_name = getattr(chunk, "model", None)
|
|
168
162
|
meta = AssistantMessageMeta(
|
|
169
163
|
sent_at=end_time,
|
|
164
|
+
model=model_name,
|
|
170
165
|
latency_ms=latency_ms,
|
|
171
166
|
total_time_ms=output_time_ms,
|
|
172
167
|
usage=usage,
|
|
@@ -199,10 +194,9 @@ class CompletionEventProcessor:
|
|
|
199
194
|
results.append(UsageEvent(usage=EventUsage(input_tokens=usage["prompt_tokens"], output_tokens=usage["completion_tokens"])))
|
|
200
195
|
|
|
201
196
|
# Then yield timing event if we have timing data
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
197
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
198
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
199
|
+
if latency_ms is not None and output_time_ms is not None:
|
|
206
200
|
results.append(
|
|
207
201
|
TimingEvent(
|
|
208
202
|
timing=Timing(
|
|
@@ -22,12 +22,14 @@ from lite_agent.types import (
|
|
|
22
22
|
ContentDeltaEvent,
|
|
23
23
|
EventUsage,
|
|
24
24
|
FunctionCallEvent,
|
|
25
|
+
MessageUsage,
|
|
25
26
|
NewAssistantMessage,
|
|
26
27
|
ResponseRawEvent,
|
|
27
28
|
Timing,
|
|
28
29
|
TimingEvent,
|
|
29
30
|
UsageEvent,
|
|
30
31
|
)
|
|
32
|
+
from lite_agent.utils.metrics import TimingMetrics
|
|
31
33
|
|
|
32
34
|
|
|
33
35
|
class ResponseEventProcessor:
|
|
@@ -111,21 +113,26 @@ class ResponseEventProcessor:
|
|
|
111
113
|
content = item.get("content", [])
|
|
112
114
|
if content and isinstance(content, list) and len(content) > 0:
|
|
113
115
|
end_time = datetime.now(timezone.utc)
|
|
114
|
-
latency_ms =
|
|
115
|
-
output_time_ms =
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
#
|
|
120
|
-
if
|
|
121
|
-
|
|
122
|
-
|
|
116
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
117
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
118
|
+
|
|
119
|
+
# Extract model information from event
|
|
120
|
+
model_name = getattr(event, "model", None)
|
|
121
|
+
# Debug: check if event has model info in different location
|
|
122
|
+
if hasattr(event, "response") and hasattr(event.response, "model"):
|
|
123
|
+
model_name = getattr(event.response, "model", None)
|
|
124
|
+
# Create usage information
|
|
125
|
+
usage = MessageUsage(
|
|
126
|
+
input_tokens=self._usage_data.get("input_tokens"),
|
|
127
|
+
output_tokens=self._usage_data.get("output_tokens"),
|
|
128
|
+
total_tokens=(self._usage_data.get("input_tokens") or 0) + (self._usage_data.get("output_tokens") or 0),
|
|
129
|
+
)
|
|
123
130
|
meta = AssistantMessageMeta(
|
|
124
131
|
sent_at=end_time,
|
|
132
|
+
model=model_name,
|
|
125
133
|
latency_ms=latency_ms,
|
|
126
134
|
output_time_ms=output_time_ms,
|
|
127
|
-
|
|
128
|
-
output_tokens=self._usage_data.get("output_tokens"),
|
|
135
|
+
usage=usage,
|
|
129
136
|
)
|
|
130
137
|
return [
|
|
131
138
|
AssistantMessageEvent(
|
|
@@ -173,10 +180,9 @@ class ResponseEventProcessor:
|
|
|
173
180
|
)
|
|
174
181
|
|
|
175
182
|
# Then yield timing event if we have timing data
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
183
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
184
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
185
|
+
if latency_ms is not None and output_time_ms is not None:
|
|
180
186
|
results.append(
|
|
181
187
|
TimingEvent(
|
|
182
188
|
timing=Timing(
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Response handlers for unified streaming and non-streaming processing."""
|
|
2
|
+
|
|
3
|
+
from lite_agent.response_handlers.base import ResponseHandler
|
|
4
|
+
from lite_agent.response_handlers.completion import CompletionResponseHandler
|
|
5
|
+
from lite_agent.response_handlers.responses import ResponsesAPIHandler
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"CompletionResponseHandler",
|
|
9
|
+
"ResponseHandler",
|
|
10
|
+
"ResponsesAPIHandler",
|
|
11
|
+
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Base response handler for unified streaming and non-streaming response processing."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import AsyncGenerator
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from lite_agent.types import AgentChunk
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ResponseHandler(ABC):
|
|
12
|
+
"""Base class for handling both streaming and non-streaming responses."""
|
|
13
|
+
|
|
14
|
+
async def handle(
|
|
15
|
+
self,
|
|
16
|
+
response: Any, # noqa: ANN401
|
|
17
|
+
*,
|
|
18
|
+
streaming: bool,
|
|
19
|
+
record_to: Path | None = None,
|
|
20
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
21
|
+
"""Handle a response in either streaming or non-streaming mode.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
response: The LLM response object
|
|
25
|
+
streaming: Whether to process as streaming or non-streaming
|
|
26
|
+
record_to: Optional file path to record the conversation
|
|
27
|
+
|
|
28
|
+
Yields:
|
|
29
|
+
AgentChunk: Processed chunks from the response
|
|
30
|
+
"""
|
|
31
|
+
if streaming:
|
|
32
|
+
stream = self._handle_streaming(response, record_to)
|
|
33
|
+
async for chunk in stream:
|
|
34
|
+
yield chunk
|
|
35
|
+
else:
|
|
36
|
+
stream = self._handle_non_streaming(response, record_to)
|
|
37
|
+
async for chunk in stream:
|
|
38
|
+
yield chunk
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def _handle_streaming(
|
|
42
|
+
self,
|
|
43
|
+
response: Any, # noqa: ANN401
|
|
44
|
+
record_to: Path | None = None,
|
|
45
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
46
|
+
"""Handle streaming response."""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def _handle_non_streaming(
|
|
50
|
+
self,
|
|
51
|
+
response: Any, # noqa: ANN401
|
|
52
|
+
record_to: Path | None = None,
|
|
53
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
54
|
+
"""Handle non-streaming response."""
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Completion API response handler."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from litellm import CustomStreamWrapper
|
|
9
|
+
|
|
10
|
+
from lite_agent.response_handlers.base import ResponseHandler
|
|
11
|
+
from lite_agent.stream_handlers import litellm_completion_stream_handler
|
|
12
|
+
from lite_agent.types import AgentChunk
|
|
13
|
+
from lite_agent.types.events import AssistantMessageEvent, Usage, UsageEvent
|
|
14
|
+
from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, AssistantToolCall, NewAssistantMessage
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CompletionResponseHandler(ResponseHandler):
|
|
18
|
+
"""Handler for Completion API responses."""
|
|
19
|
+
|
|
20
|
+
async def _handle_streaming(
|
|
21
|
+
self,
|
|
22
|
+
response: Any, # noqa: ANN401
|
|
23
|
+
record_to: Path | None = None,
|
|
24
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
25
|
+
"""Handle streaming completion response."""
|
|
26
|
+
if isinstance(response, CustomStreamWrapper):
|
|
27
|
+
async for chunk in litellm_completion_stream_handler(response, record_to):
|
|
28
|
+
yield chunk
|
|
29
|
+
else:
|
|
30
|
+
msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
|
|
31
|
+
raise TypeError(msg)
|
|
32
|
+
|
|
33
|
+
async def _handle_non_streaming(
|
|
34
|
+
self,
|
|
35
|
+
response: Any, # noqa: ANN401
|
|
36
|
+
record_to: Path | None = None, # noqa: ARG002
|
|
37
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
38
|
+
"""Handle non-streaming completion response."""
|
|
39
|
+
# Convert completion response to chunks
|
|
40
|
+
if hasattr(response, "choices") and response.choices:
|
|
41
|
+
choice = response.choices[0]
|
|
42
|
+
content_items = []
|
|
43
|
+
|
|
44
|
+
# Add text content
|
|
45
|
+
if choice.message and choice.message.content:
|
|
46
|
+
content_items.append(AssistantTextContent(text=choice.message.content))
|
|
47
|
+
|
|
48
|
+
# Handle tool calls
|
|
49
|
+
if choice.message and choice.message.tool_calls:
|
|
50
|
+
for tool_call in choice.message.tool_calls:
|
|
51
|
+
content_items.append( # noqa: PERF401
|
|
52
|
+
AssistantToolCall(
|
|
53
|
+
call_id=tool_call.id,
|
|
54
|
+
name=tool_call.function.name,
|
|
55
|
+
arguments=tool_call.function.arguments,
|
|
56
|
+
),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Always yield assistant message, even if content is empty for tool calls
|
|
60
|
+
if choice.message and (content_items or choice.message.tool_calls):
|
|
61
|
+
# Extract model information from response
|
|
62
|
+
model_name = getattr(response, "model", None)
|
|
63
|
+
message = NewAssistantMessage(
|
|
64
|
+
content=content_items,
|
|
65
|
+
meta=AssistantMessageMeta(
|
|
66
|
+
sent_at=datetime.now(timezone.utc),
|
|
67
|
+
model=model_name,
|
|
68
|
+
),
|
|
69
|
+
)
|
|
70
|
+
yield AssistantMessageEvent(message=message)
|
|
71
|
+
|
|
72
|
+
# Yield usage information if available
|
|
73
|
+
if hasattr(response, "usage") and response.usage:
|
|
74
|
+
usage = Usage(
|
|
75
|
+
input_tokens=response.usage.prompt_tokens,
|
|
76
|
+
output_tokens=response.usage.completion_tokens,
|
|
77
|
+
)
|
|
78
|
+
yield UsageEvent(usage=usage)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Responses API response handler."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from lite_agent.response_handlers.base import ResponseHandler
|
|
9
|
+
from lite_agent.stream_handlers import litellm_response_stream_handler
|
|
10
|
+
from lite_agent.types import AgentChunk
|
|
11
|
+
from lite_agent.types.events import AssistantMessageEvent, Usage, UsageEvent
|
|
12
|
+
from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, AssistantToolCall, NewAssistantMessage
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ResponsesAPIHandler(ResponseHandler):
|
|
16
|
+
"""Handler for Responses API responses."""
|
|
17
|
+
|
|
18
|
+
async def _handle_streaming(
|
|
19
|
+
self,
|
|
20
|
+
response: Any, # noqa: ANN401
|
|
21
|
+
record_to: Path | None = None,
|
|
22
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
23
|
+
"""Handle streaming responses API response."""
|
|
24
|
+
async for chunk in litellm_response_stream_handler(response, record_to):
|
|
25
|
+
yield chunk
|
|
26
|
+
|
|
27
|
+
async def _handle_non_streaming(
|
|
28
|
+
self,
|
|
29
|
+
response: Any, # noqa: ANN401
|
|
30
|
+
record_to: Path | None = None, # noqa: ARG002
|
|
31
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
32
|
+
"""Handle non-streaming responses API response."""
|
|
33
|
+
# Convert ResponsesAPIResponse to chunks
|
|
34
|
+
if hasattr(response, "output") and response.output:
|
|
35
|
+
content_items = []
|
|
36
|
+
|
|
37
|
+
for output_item in response.output:
|
|
38
|
+
# Handle function tool calls
|
|
39
|
+
if hasattr(output_item, "type") and output_item.type == "function_call":
|
|
40
|
+
content_items.append(
|
|
41
|
+
AssistantToolCall(
|
|
42
|
+
call_id=output_item.call_id,
|
|
43
|
+
name=output_item.name,
|
|
44
|
+
arguments=output_item.arguments,
|
|
45
|
+
),
|
|
46
|
+
)
|
|
47
|
+
# Handle text content (if exists)
|
|
48
|
+
elif hasattr(output_item, "content") and output_item.content:
|
|
49
|
+
content_text = ""
|
|
50
|
+
for content_item in output_item.content:
|
|
51
|
+
if hasattr(content_item, "text"):
|
|
52
|
+
content_text += content_item.text
|
|
53
|
+
|
|
54
|
+
if content_text:
|
|
55
|
+
content_items.append(AssistantTextContent(text=content_text))
|
|
56
|
+
|
|
57
|
+
# Create assistant message if we have any content
|
|
58
|
+
if content_items:
|
|
59
|
+
# Extract model information from response
|
|
60
|
+
model_name = getattr(response, "model", None)
|
|
61
|
+
message = NewAssistantMessage(
|
|
62
|
+
content=content_items,
|
|
63
|
+
meta=AssistantMessageMeta(
|
|
64
|
+
sent_at=datetime.now(timezone.utc),
|
|
65
|
+
model=model_name,
|
|
66
|
+
),
|
|
67
|
+
)
|
|
68
|
+
yield AssistantMessageEvent(message=message)
|
|
69
|
+
|
|
70
|
+
# Yield usage information if available
|
|
71
|
+
if hasattr(response, "usage") and response.usage:
|
|
72
|
+
usage = Usage(
|
|
73
|
+
input_tokens=response.usage.input_tokens,
|
|
74
|
+
output_tokens=response.usage.output_tokens,
|
|
75
|
+
)
|
|
76
|
+
yield UsageEvent(usage=usage)
|