lite-agent 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lite-agent might be problematic. Click here for more details.
- lite_agent/agent.py +27 -7
- lite_agent/client.py +135 -26
- lite_agent/runner.py +153 -137
- lite_agent/stream_handlers/litellm.py +16 -7
- {lite_agent-0.4.0.dist-info → lite_agent-0.5.0.dist-info}/METADATA +1 -1
- {lite_agent-0.4.0.dist-info → lite_agent-0.5.0.dist-info}/RECORD +7 -7
- {lite_agent-0.4.0.dist-info → lite_agent-0.5.0.dist-info}/WHEEL +0 -0
lite_agent/agent.py
CHANGED
|
@@ -7,7 +7,7 @@ from funcall import Funcall
|
|
|
7
7
|
from jinja2 import Environment, FileSystemLoader
|
|
8
8
|
from litellm import CustomStreamWrapper
|
|
9
9
|
|
|
10
|
-
from lite_agent.client import BaseLLMClient, LiteLLMClient
|
|
10
|
+
from lite_agent.client import BaseLLMClient, LiteLLMClient, ReasoningConfig
|
|
11
11
|
from lite_agent.loggers import logger
|
|
12
12
|
from lite_agent.stream_handlers import litellm_completion_stream_handler, litellm_response_stream_handler
|
|
13
13
|
from lite_agent.types import AgentChunk, FunctionCallEvent, FunctionCallOutputEvent, RunnerMessages, ToolCall, message_to_llm_dict, system_message_to_llm_dict
|
|
@@ -32,15 +32,21 @@ class Agent:
|
|
|
32
32
|
handoffs: list["Agent"] | None = None,
|
|
33
33
|
message_transfer: Callable[[RunnerMessages], RunnerMessages] | None = None,
|
|
34
34
|
completion_condition: str = "stop",
|
|
35
|
+
reasoning: ReasoningConfig = None,
|
|
35
36
|
) -> None:
|
|
36
37
|
self.name = name
|
|
37
38
|
self.instructions = instructions
|
|
39
|
+
self.reasoning = reasoning
|
|
40
|
+
|
|
38
41
|
if isinstance(model, BaseLLMClient):
|
|
39
42
|
# If model is a BaseLLMClient instance, use it directly
|
|
40
43
|
self.client = model
|
|
41
44
|
else:
|
|
42
45
|
# Otherwise, create a LitellmClient instance
|
|
43
|
-
self.client = LiteLLMClient(
|
|
46
|
+
self.client = LiteLLMClient(
|
|
47
|
+
model=model,
|
|
48
|
+
reasoning=reasoning,
|
|
49
|
+
)
|
|
44
50
|
self.completion_condition = completion_condition
|
|
45
51
|
self.handoffs = handoffs if handoffs else []
|
|
46
52
|
self._parent: Agent | None = None
|
|
@@ -174,9 +180,11 @@ class Agent:
|
|
|
174
180
|
if self.completion_condition == "call":
|
|
175
181
|
instructions = WAIT_FOR_USER_INSTRUCTIONS_TEMPLATE.render(extra_instructions=None) + "\n\n" + instructions
|
|
176
182
|
return [
|
|
177
|
-
system_message_to_llm_dict(
|
|
178
|
-
|
|
179
|
-
|
|
183
|
+
system_message_to_llm_dict(
|
|
184
|
+
NewSystemMessage(
|
|
185
|
+
content=f"You are {self.name}. {instructions}",
|
|
186
|
+
),
|
|
187
|
+
),
|
|
180
188
|
*converted_messages,
|
|
181
189
|
]
|
|
182
190
|
|
|
@@ -267,7 +275,12 @@ class Agent:
|
|
|
267
275
|
res.append(message)
|
|
268
276
|
return res
|
|
269
277
|
|
|
270
|
-
async def completion(
|
|
278
|
+
async def completion(
|
|
279
|
+
self,
|
|
280
|
+
messages: RunnerMessages,
|
|
281
|
+
record_to_file: Path | None = None,
|
|
282
|
+
reasoning: ReasoningConfig = None,
|
|
283
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
271
284
|
# Apply message transfer callback if provided - always use legacy format for LLM compatibility
|
|
272
285
|
processed_messages = messages
|
|
273
286
|
if self.message_transfer:
|
|
@@ -282,6 +295,7 @@ class Agent:
|
|
|
282
295
|
messages=self.message_histories,
|
|
283
296
|
tools=tools,
|
|
284
297
|
tool_choice="auto", # TODO: make this configurable
|
|
298
|
+
reasoning=reasoning,
|
|
285
299
|
)
|
|
286
300
|
|
|
287
301
|
# Ensure resp is a CustomStreamWrapper
|
|
@@ -290,7 +304,12 @@ class Agent:
|
|
|
290
304
|
msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
|
|
291
305
|
raise TypeError(msg)
|
|
292
306
|
|
|
293
|
-
async def responses(
|
|
307
|
+
async def responses(
|
|
308
|
+
self,
|
|
309
|
+
messages: RunnerMessages,
|
|
310
|
+
record_to_file: Path | None = None,
|
|
311
|
+
reasoning: ReasoningConfig = None,
|
|
312
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
294
313
|
# Apply message transfer callback if provided - always use legacy format for LLM compatibility
|
|
295
314
|
processed_messages = messages
|
|
296
315
|
if self.message_transfer:
|
|
@@ -304,6 +323,7 @@ class Agent:
|
|
|
304
323
|
messages=self.message_histories,
|
|
305
324
|
tools=tools,
|
|
306
325
|
tool_choice="auto", # TODO: make this configurable
|
|
326
|
+
reasoning=reasoning,
|
|
307
327
|
)
|
|
308
328
|
return litellm_response_stream_handler(resp, record_to=record_to_file)
|
|
309
329
|
|
lite_agent/client.py
CHANGED
|
@@ -1,25 +1,81 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
import os
|
|
3
|
-
from collections.abc import AsyncGenerator
|
|
4
3
|
from typing import Any, Literal
|
|
5
4
|
|
|
6
5
|
import litellm
|
|
7
|
-
from litellm.types.llms.openai import ResponsesAPIStreamingResponse
|
|
8
6
|
from openai.types.chat import ChatCompletionToolParam
|
|
9
7
|
from openai.types.responses import FunctionToolParam
|
|
10
8
|
|
|
9
|
+
ReasoningEffort = Literal["minimal", "low", "medium", "high"]
|
|
10
|
+
ThinkingConfig = dict[str, Any] | None
|
|
11
|
+
|
|
12
|
+
# 统一的推理配置类型
|
|
13
|
+
ReasoningConfig = (
|
|
14
|
+
str
|
|
15
|
+
| dict[str, Any] # {"type": "enabled", "budget_tokens": 2048} 或其他配置
|
|
16
|
+
| bool # True/False 简单开关
|
|
17
|
+
| None # 不启用推理
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def parse_reasoning_config(reasoning: ReasoningConfig) -> tuple[ReasoningEffort | None, ThinkingConfig]:
|
|
22
|
+
"""
|
|
23
|
+
解析统一的推理配置,返回 reasoning_effort 和 thinking_config。
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
reasoning: 统一的推理配置
|
|
27
|
+
- str: "minimal", "low", "medium", "high" -> reasoning_effort
|
|
28
|
+
- dict: {"type": "enabled", "budget_tokens": N} -> thinking_config
|
|
29
|
+
- bool: True -> "medium", False -> None
|
|
30
|
+
- None: 不启用推理
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
tuple: (reasoning_effort, thinking_config)
|
|
34
|
+
"""
|
|
35
|
+
if reasoning is None:
|
|
36
|
+
return None, None
|
|
37
|
+
if isinstance(reasoning, str):
|
|
38
|
+
# 字符串类型,使用 reasoning_effort
|
|
39
|
+
return reasoning, None
|
|
40
|
+
if isinstance(reasoning, dict):
|
|
41
|
+
# 字典类型,使用 thinking_config
|
|
42
|
+
return None, reasoning
|
|
43
|
+
if isinstance(reasoning, bool):
|
|
44
|
+
# 布尔类型,True 使用默认的 medium,False 不启用
|
|
45
|
+
return "medium" if reasoning else None, None
|
|
46
|
+
# 其他类型,默认不启用
|
|
47
|
+
return None, None
|
|
48
|
+
|
|
11
49
|
|
|
12
50
|
class BaseLLMClient(abc.ABC):
|
|
13
51
|
"""Base class for LLM clients."""
|
|
14
52
|
|
|
15
|
-
def __init__(
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
*,
|
|
56
|
+
model: str,
|
|
57
|
+
api_key: str | None = None,
|
|
58
|
+
api_base: str | None = None,
|
|
59
|
+
api_version: str | None = None,
|
|
60
|
+
reasoning: ReasoningConfig = None,
|
|
61
|
+
):
|
|
16
62
|
self.model = model
|
|
17
63
|
self.api_key = api_key
|
|
18
64
|
self.api_base = api_base
|
|
19
65
|
self.api_version = api_version
|
|
20
66
|
|
|
67
|
+
# 处理推理配置
|
|
68
|
+
self.reasoning_effort, self.thinking_config = parse_reasoning_config(reasoning)
|
|
69
|
+
|
|
21
70
|
@abc.abstractmethod
|
|
22
|
-
async def completion(
|
|
71
|
+
async def completion(
|
|
72
|
+
self,
|
|
73
|
+
messages: list[Any],
|
|
74
|
+
tools: list[ChatCompletionToolParam] | None = None,
|
|
75
|
+
tool_choice: str = "auto",
|
|
76
|
+
reasoning: ReasoningConfig = None,
|
|
77
|
+
**kwargs: Any, # noqa: ANN401
|
|
78
|
+
) -> Any: # noqa: ANN401
|
|
23
79
|
"""Perform a completion request to the LLM."""
|
|
24
80
|
|
|
25
81
|
@abc.abstractmethod
|
|
@@ -28,42 +84,95 @@ class BaseLLMClient(abc.ABC):
|
|
|
28
84
|
messages: list[dict[str, Any]], # Changed from ResponseInputParam
|
|
29
85
|
tools: list[FunctionToolParam] | None = None,
|
|
30
86
|
tool_choice: Literal["none", "auto", "required"] = "auto",
|
|
31
|
-
|
|
87
|
+
reasoning: ReasoningConfig = None,
|
|
88
|
+
**kwargs: Any, # noqa: ANN401
|
|
89
|
+
) -> Any: # noqa: ANN401
|
|
32
90
|
"""Perform a response request to the LLM."""
|
|
33
91
|
|
|
34
92
|
|
|
35
93
|
class LiteLLMClient(BaseLLMClient):
|
|
36
|
-
|
|
94
|
+
def _resolve_reasoning_params(
|
|
95
|
+
self,
|
|
96
|
+
reasoning: ReasoningConfig,
|
|
97
|
+
) -> tuple[ReasoningEffort | None, ThinkingConfig]:
|
|
98
|
+
"""解析推理配置参数。"""
|
|
99
|
+
if reasoning is not None:
|
|
100
|
+
return parse_reasoning_config(reasoning)
|
|
101
|
+
|
|
102
|
+
# 使用实例默认值
|
|
103
|
+
return self.reasoning_effort, self.thinking_config
|
|
104
|
+
|
|
105
|
+
async def completion(
|
|
106
|
+
self,
|
|
107
|
+
messages: list[Any],
|
|
108
|
+
tools: list[ChatCompletionToolParam] | None = None,
|
|
109
|
+
tool_choice: str = "auto",
|
|
110
|
+
reasoning: ReasoningConfig = None,
|
|
111
|
+
**kwargs: Any, # noqa: ANN401
|
|
112
|
+
) -> Any: # noqa: ANN401
|
|
37
113
|
"""Perform a completion request to the Litellm API."""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
tool_choice=tool_choice,
|
|
43
|
-
api_version=self.api_version,
|
|
44
|
-
api_key=self.api_key,
|
|
45
|
-
api_base=self.api_base,
|
|
46
|
-
stream=True,
|
|
114
|
+
|
|
115
|
+
# 处理推理配置参数
|
|
116
|
+
final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
|
|
117
|
+
reasoning,
|
|
47
118
|
)
|
|
48
119
|
|
|
120
|
+
# Prepare completion parameters
|
|
121
|
+
completion_params = {
|
|
122
|
+
"model": self.model,
|
|
123
|
+
"messages": messages,
|
|
124
|
+
"tools": tools,
|
|
125
|
+
"tool_choice": tool_choice,
|
|
126
|
+
"api_version": self.api_version,
|
|
127
|
+
"api_key": self.api_key,
|
|
128
|
+
"api_base": self.api_base,
|
|
129
|
+
"stream": True,
|
|
130
|
+
**kwargs,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Add reasoning parameters if specified
|
|
134
|
+
if final_reasoning_effort is not None:
|
|
135
|
+
completion_params["reasoning_effort"] = final_reasoning_effort
|
|
136
|
+
if final_thinking_config is not None:
|
|
137
|
+
completion_params["thinking"] = final_thinking_config
|
|
138
|
+
|
|
139
|
+
return await litellm.acompletion(**completion_params)
|
|
140
|
+
|
|
49
141
|
async def responses(
|
|
50
142
|
self,
|
|
51
143
|
messages: list[dict[str, Any]], # Changed from ResponseInputParam
|
|
52
144
|
tools: list[FunctionToolParam] | None = None,
|
|
53
145
|
tool_choice: Literal["none", "auto", "required"] = "auto",
|
|
54
|
-
|
|
146
|
+
reasoning: ReasoningConfig = None,
|
|
147
|
+
**kwargs: Any, # noqa: ANN401
|
|
148
|
+
) -> Any: # type: ignore[return] # noqa: ANN401
|
|
55
149
|
"""Perform a response request to the Litellm API."""
|
|
56
150
|
|
|
57
151
|
os.environ["DISABLE_AIOHTTP_TRANSPORT"] = "True"
|
|
58
152
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
tools=tools,
|
|
63
|
-
tool_choice=tool_choice,
|
|
64
|
-
api_version=self.api_version,
|
|
65
|
-
api_key=self.api_key,
|
|
66
|
-
api_base=self.api_base,
|
|
67
|
-
stream=True,
|
|
68
|
-
store=False,
|
|
153
|
+
# 处理推理配置参数
|
|
154
|
+
final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
|
|
155
|
+
reasoning,
|
|
69
156
|
)
|
|
157
|
+
|
|
158
|
+
# Prepare response parameters
|
|
159
|
+
response_params = {
|
|
160
|
+
"model": self.model,
|
|
161
|
+
"input": messages, # type: ignore[arg-type]
|
|
162
|
+
"tools": tools,
|
|
163
|
+
"tool_choice": tool_choice,
|
|
164
|
+
"api_version": self.api_version,
|
|
165
|
+
"api_key": self.api_key,
|
|
166
|
+
"api_base": self.api_base,
|
|
167
|
+
"stream": True,
|
|
168
|
+
"store": False,
|
|
169
|
+
**kwargs,
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
# Add reasoning parameters if specified
|
|
173
|
+
if final_reasoning_effort is not None:
|
|
174
|
+
response_params["reasoning_effort"] = final_reasoning_effort
|
|
175
|
+
if final_thinking_config is not None:
|
|
176
|
+
response_params["thinking"] = final_thinking_config
|
|
177
|
+
|
|
178
|
+
return await litellm.aresponses(**response_params) # type: ignore[return-value]
|
lite_agent/runner.py
CHANGED
|
@@ -30,6 +30,7 @@ from lite_agent.types import (
|
|
|
30
30
|
UserMessageContent,
|
|
31
31
|
UserTextContent,
|
|
32
32
|
)
|
|
33
|
+
from lite_agent.types.events import AssistantMessageEvent
|
|
33
34
|
|
|
34
35
|
DEFAULT_INCLUDES: tuple[AgentChunkType, ...] = (
|
|
35
36
|
"completion_raw",
|
|
@@ -56,38 +57,31 @@ class Runner:
|
|
|
56
57
|
|
|
57
58
|
def _start_assistant_message(self, content: str = "", meta: AssistantMessageMeta | None = None) -> None:
|
|
58
59
|
"""Start a new assistant message."""
|
|
59
|
-
if meta is None:
|
|
60
|
-
meta = AssistantMessageMeta()
|
|
61
|
-
|
|
62
|
-
# Always add text content, even if empty (we can update it later)
|
|
63
|
-
assistant_content_items: list[AssistantMessageContent] = [AssistantTextContent(text=content)]
|
|
64
60
|
self._current_assistant_message = NewAssistantMessage(
|
|
65
|
-
content=
|
|
66
|
-
meta=meta,
|
|
61
|
+
content=[AssistantTextContent(text=content)],
|
|
62
|
+
meta=meta or AssistantMessageMeta(),
|
|
67
63
|
)
|
|
68
64
|
|
|
69
|
-
def
|
|
70
|
-
"""
|
|
65
|
+
def _ensure_current_assistant_message(self) -> NewAssistantMessage:
|
|
66
|
+
"""Ensure current assistant message exists and return it."""
|
|
71
67
|
if self._current_assistant_message is None:
|
|
72
68
|
self._start_assistant_message()
|
|
69
|
+
return self._current_assistant_message # type: ignore[return-value]
|
|
73
70
|
|
|
74
|
-
|
|
75
|
-
|
|
71
|
+
def _add_to_current_assistant_message(self, content_item: AssistantTextContent | AssistantToolCall | AssistantToolCallResult) -> None:
|
|
72
|
+
"""Add content to the current assistant message."""
|
|
73
|
+
self._ensure_current_assistant_message().content.append(content_item)
|
|
76
74
|
|
|
77
75
|
def _add_text_content_to_current_assistant_message(self, delta: str) -> None:
|
|
78
76
|
"""Add text delta to the current assistant message's text content."""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
return
|
|
88
|
-
# If no text content found, add new text content
|
|
89
|
-
new_content = AssistantTextContent(text=delta)
|
|
90
|
-
self._current_assistant_message.content.append(new_content)
|
|
77
|
+
message = self._ensure_current_assistant_message()
|
|
78
|
+
# Find the first text content item and append the delta
|
|
79
|
+
for content_item in message.content:
|
|
80
|
+
if content_item.type == "text":
|
|
81
|
+
content_item.text += delta
|
|
82
|
+
return
|
|
83
|
+
# If no text content found, add new text content
|
|
84
|
+
message.content.append(AssistantTextContent(text=delta))
|
|
91
85
|
|
|
92
86
|
def _finalize_assistant_message(self) -> None:
|
|
93
87
|
"""Finalize the current assistant message and add it to messages."""
|
|
@@ -131,7 +125,7 @@ class Runner:
|
|
|
131
125
|
for i, tool_call in enumerate(transfer_calls):
|
|
132
126
|
if i == 0:
|
|
133
127
|
# Execute the first transfer
|
|
134
|
-
await self._handle_agent_transfer(tool_call
|
|
128
|
+
await self._handle_agent_transfer(tool_call)
|
|
135
129
|
else:
|
|
136
130
|
# Add response for additional transfer calls without executing them
|
|
137
131
|
self._add_tool_call_result(
|
|
@@ -146,7 +140,7 @@ class Runner:
|
|
|
146
140
|
for i, tool_call in enumerate(return_parent_calls):
|
|
147
141
|
if i == 0:
|
|
148
142
|
# Execute the first transfer
|
|
149
|
-
await self._handle_parent_transfer(tool_call
|
|
143
|
+
await self._handle_parent_transfer(tool_call)
|
|
150
144
|
else:
|
|
151
145
|
# Add response for additional transfer calls without executing them
|
|
152
146
|
self._add_tool_call_result(
|
|
@@ -174,30 +168,37 @@ class Runner:
|
|
|
174
168
|
"""Collect all chunks from an async generator into a list."""
|
|
175
169
|
return [chunk async for chunk in stream]
|
|
176
170
|
|
|
177
|
-
def run(
|
|
171
|
+
def run( # noqa: PLR0913
|
|
178
172
|
self,
|
|
179
173
|
user_input: UserInput,
|
|
180
174
|
max_steps: int = 20,
|
|
181
175
|
includes: Sequence[AgentChunkType] | None = None,
|
|
182
176
|
context: "Any | None" = None, # noqa: ANN401
|
|
183
177
|
record_to: PathLike | str | None = None,
|
|
178
|
+
agent_kwargs: dict[str, Any] | None = None,
|
|
184
179
|
) -> AsyncGenerator[AgentChunk, None]:
|
|
185
180
|
"""Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
|
|
186
181
|
includes = self._normalize_includes(includes)
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
182
|
+
match user_input:
|
|
183
|
+
case str():
|
|
184
|
+
self.messages.append(NewUserMessage(content=[UserTextContent(text=user_input)]))
|
|
185
|
+
case list() | tuple():
|
|
186
|
+
# Handle sequence of messages
|
|
187
|
+
for message in user_input:
|
|
188
|
+
self.append_message(message)
|
|
189
|
+
case _:
|
|
190
|
+
# Handle single message (BaseModel, TypedDict, or dict)
|
|
191
|
+
self.append_message(user_input) # type: ignore[arg-type]
|
|
192
|
+
return self._run(max_steps, includes, self._normalize_record_path(record_to), context=context, agent_kwargs=agent_kwargs)
|
|
193
|
+
|
|
194
|
+
async def _run(
|
|
195
|
+
self,
|
|
196
|
+
max_steps: int,
|
|
197
|
+
includes: Sequence[AgentChunkType],
|
|
198
|
+
record_to: Path | None = None,
|
|
199
|
+
context: Any | None = None, # noqa: ANN401
|
|
200
|
+
agent_kwargs: dict[str, Any] | None = None,
|
|
201
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
201
202
|
"""Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
|
|
202
203
|
logger.debug(f"Running agent with messages: {self.messages}")
|
|
203
204
|
steps = 0
|
|
@@ -220,71 +221,101 @@ class Runner:
|
|
|
220
221
|
logger.debug(f"Step {steps}: finish_reason={finish_reason}, is_finish()={is_finish()}")
|
|
221
222
|
# Convert to legacy format only when needed for LLM communication
|
|
222
223
|
# This allows us to keep the new format internally but ensures compatibility
|
|
224
|
+
# Extract agent kwargs for reasoning configuration
|
|
225
|
+
reasoning = None
|
|
226
|
+
if agent_kwargs:
|
|
227
|
+
reasoning = agent_kwargs.get("reasoning")
|
|
228
|
+
|
|
223
229
|
match self.api:
|
|
224
230
|
case "completion":
|
|
225
|
-
resp = await self.agent.completion(
|
|
231
|
+
resp = await self.agent.completion(
|
|
232
|
+
self.messages,
|
|
233
|
+
record_to_file=record_to,
|
|
234
|
+
reasoning=reasoning,
|
|
235
|
+
)
|
|
226
236
|
case "responses":
|
|
227
|
-
resp = await self.agent.responses(
|
|
237
|
+
resp = await self.agent.responses(
|
|
238
|
+
self.messages,
|
|
239
|
+
record_to_file=record_to,
|
|
240
|
+
reasoning=reasoning,
|
|
241
|
+
)
|
|
228
242
|
case _:
|
|
229
243
|
msg = f"Unknown API type: {self.api}"
|
|
230
244
|
raise ValueError(msg)
|
|
231
245
|
async for chunk in resp:
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
#
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
current_message.meta.
|
|
287
|
-
|
|
246
|
+
match chunk.type:
|
|
247
|
+
case "assistant_message":
|
|
248
|
+
# Start or update assistant message in new format
|
|
249
|
+
meta = AssistantMessageMeta(
|
|
250
|
+
sent_at=chunk.message.meta.sent_at,
|
|
251
|
+
latency_ms=getattr(chunk.message.meta, "latency_ms", None),
|
|
252
|
+
total_time_ms=getattr(chunk.message.meta, "output_time_ms", None),
|
|
253
|
+
)
|
|
254
|
+
# If we already have a current assistant message, just update its metadata
|
|
255
|
+
if self._current_assistant_message is not None:
|
|
256
|
+
self._current_assistant_message.meta = meta
|
|
257
|
+
else:
|
|
258
|
+
# Extract text content from the new message format
|
|
259
|
+
text_content = ""
|
|
260
|
+
if chunk.message.content:
|
|
261
|
+
for item in chunk.message.content:
|
|
262
|
+
if hasattr(item, "type") and item.type == "text":
|
|
263
|
+
text_content = item.text
|
|
264
|
+
break
|
|
265
|
+
self._start_assistant_message(text_content, meta)
|
|
266
|
+
# Only yield assistant_message chunk if it's in includes and has content
|
|
267
|
+
if chunk.type in includes and self._current_assistant_message is not None:
|
|
268
|
+
# Create a new chunk with the current assistant message content
|
|
269
|
+
updated_chunk = AssistantMessageEvent(
|
|
270
|
+
message=self._current_assistant_message,
|
|
271
|
+
)
|
|
272
|
+
yield updated_chunk
|
|
273
|
+
case "content_delta":
|
|
274
|
+
# Accumulate text content to current assistant message
|
|
275
|
+
self._add_text_content_to_current_assistant_message(chunk.delta)
|
|
276
|
+
# Always yield content_delta chunk if it's in includes
|
|
277
|
+
if chunk.type in includes:
|
|
278
|
+
yield chunk
|
|
279
|
+
case "function_call":
|
|
280
|
+
# Add tool call to current assistant message
|
|
281
|
+
# Keep arguments as string for compatibility with funcall library
|
|
282
|
+
tool_call = AssistantToolCall(
|
|
283
|
+
call_id=chunk.call_id,
|
|
284
|
+
name=chunk.name,
|
|
285
|
+
arguments=chunk.arguments or "{}",
|
|
286
|
+
)
|
|
287
|
+
self._add_to_current_assistant_message(tool_call)
|
|
288
|
+
# Always yield function_call chunk if it's in includes
|
|
289
|
+
if chunk.type in includes:
|
|
290
|
+
yield chunk
|
|
291
|
+
case "usage":
|
|
292
|
+
# Update the last assistant message with usage data and output_time_ms
|
|
293
|
+
usage_time = datetime.now(timezone.utc)
|
|
294
|
+
for i in range(len(self.messages) - 1, -1, -1):
|
|
295
|
+
current_message = self.messages[i]
|
|
296
|
+
if isinstance(current_message, NewAssistantMessage):
|
|
297
|
+
# Update usage information
|
|
298
|
+
if current_message.meta.usage is None:
|
|
299
|
+
current_message.meta.usage = MessageUsage()
|
|
300
|
+
current_message.meta.usage.input_tokens = chunk.usage.input_tokens
|
|
301
|
+
current_message.meta.usage.output_tokens = chunk.usage.output_tokens
|
|
302
|
+
current_message.meta.usage.total_tokens = (chunk.usage.input_tokens or 0) + (chunk.usage.output_tokens or 0)
|
|
303
|
+
|
|
304
|
+
# Calculate output_time_ms if latency_ms is available
|
|
305
|
+
if current_message.meta.latency_ms is not None:
|
|
306
|
+
# We need to calculate from first output to usage time
|
|
307
|
+
# We'll calculate: usage_time - (sent_at - latency_ms)
|
|
308
|
+
# This gives us the time from first output to usage completion
|
|
309
|
+
# sent_at is when the message was completed, so sent_at - latency_ms approximates first output time
|
|
310
|
+
first_output_time_approx = current_message.meta.sent_at - timedelta(milliseconds=current_message.meta.latency_ms)
|
|
311
|
+
output_time_ms = int((usage_time - first_output_time_approx).total_seconds() * 1000)
|
|
312
|
+
current_message.meta.total_time_ms = max(0, output_time_ms)
|
|
313
|
+
break
|
|
314
|
+
# Always yield usage chunk if it's in includes
|
|
315
|
+
if chunk.type in includes:
|
|
316
|
+
yield chunk
|
|
317
|
+
case _ if chunk.type in includes:
|
|
318
|
+
yield chunk
|
|
288
319
|
|
|
289
320
|
# Finalize assistant message so it can be found in pending function calls
|
|
290
321
|
self._finalize_assistant_message()
|
|
@@ -357,11 +388,6 @@ class Runner:
|
|
|
357
388
|
msg = "Cannot continue running without a valid last message from the assistant."
|
|
358
389
|
raise ValueError(msg)
|
|
359
390
|
|
|
360
|
-
last_message = self.messages[-1]
|
|
361
|
-
if not (isinstance(last_message, NewAssistantMessage) or (hasattr(last_message, "role") and getattr(last_message, "role", None) == "assistant")):
|
|
362
|
-
msg = "Cannot continue running without a valid last message from the assistant."
|
|
363
|
-
raise ValueError(msg)
|
|
364
|
-
|
|
365
391
|
resp = self._run(max_steps=max_steps, includes=includes, record_to=self._normalize_record_path(record_to), context=context)
|
|
366
392
|
async for chunk in resp:
|
|
367
393
|
yield chunk
|
|
@@ -377,58 +403,50 @@ class Runner:
|
|
|
377
403
|
resp = self.run(user_input, max_steps, includes, record_to=record_to)
|
|
378
404
|
return await self._collect_all_chunks(resp)
|
|
379
405
|
|
|
380
|
-
def
|
|
381
|
-
"""
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
# Look at the last assistant message for pending tool calls
|
|
386
|
-
if not self.messages:
|
|
387
|
-
return pending_calls
|
|
388
|
-
|
|
389
|
-
last_message = self.messages[-1]
|
|
390
|
-
if not isinstance(last_message, NewAssistantMessage):
|
|
391
|
-
return pending_calls
|
|
406
|
+
def _analyze_last_assistant_message(self) -> tuple[list[AssistantToolCall], dict[str, str]]:
|
|
407
|
+
"""Analyze the last assistant message and return pending tool calls and tool call map."""
|
|
408
|
+
if not self.messages or not isinstance(self.messages[-1], NewAssistantMessage):
|
|
409
|
+
return [], {}
|
|
392
410
|
|
|
393
|
-
# Collect tool calls and results from the last assistant message
|
|
394
411
|
tool_calls = {}
|
|
395
412
|
tool_results = set()
|
|
413
|
+
tool_call_names = {}
|
|
396
414
|
|
|
397
|
-
for content_item in
|
|
415
|
+
for content_item in self.messages[-1].content:
|
|
398
416
|
if content_item.type == "tool_call":
|
|
399
417
|
tool_calls[content_item.call_id] = content_item
|
|
418
|
+
tool_call_names[content_item.call_id] = content_item.name
|
|
400
419
|
elif content_item.type == "tool_call_result":
|
|
401
420
|
tool_results.add(content_item.call_id)
|
|
402
421
|
|
|
403
|
-
# Return tool calls
|
|
404
|
-
|
|
422
|
+
# Return pending tool calls and tool call names map
|
|
423
|
+
pending_calls = [call for call_id, call in tool_calls.items() if call_id not in tool_results]
|
|
424
|
+
return pending_calls, tool_call_names
|
|
425
|
+
|
|
426
|
+
def _find_pending_tool_calls(self) -> list[AssistantToolCall]:
|
|
427
|
+
"""Find tool calls that don't have corresponding results yet."""
|
|
428
|
+
pending_calls, _ = self._analyze_last_assistant_message()
|
|
429
|
+
return pending_calls
|
|
405
430
|
|
|
406
431
|
def _get_tool_call_name_by_id(self, call_id: str) -> str | None:
|
|
407
432
|
"""Get the tool name for a given call_id from the last assistant message."""
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
for content_item in self.messages[-1].content:
|
|
412
|
-
if content_item.type == "tool_call" and content_item.call_id == call_id:
|
|
413
|
-
return content_item.name
|
|
414
|
-
return None
|
|
433
|
+
_, tool_call_names = self._analyze_last_assistant_message()
|
|
434
|
+
return tool_call_names.get(call_id)
|
|
415
435
|
|
|
416
436
|
def _convert_tool_calls_to_tool_calls(self, tool_calls: list[AssistantToolCall]) -> list[ToolCall]:
|
|
417
437
|
"""Convert AssistantToolCall objects to ToolCall objects for compatibility."""
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
for tc in tool_calls:
|
|
421
|
-
tool_call = ToolCall(
|
|
438
|
+
return [
|
|
439
|
+
ToolCall(
|
|
422
440
|
id=tc.call_id,
|
|
423
441
|
type="function",
|
|
424
442
|
function=ToolCallFunction(
|
|
425
443
|
name=tc.name,
|
|
426
444
|
arguments=tc.arguments if isinstance(tc.arguments, str) else str(tc.arguments),
|
|
427
445
|
),
|
|
428
|
-
index=
|
|
446
|
+
index=i,
|
|
429
447
|
)
|
|
430
|
-
|
|
431
|
-
|
|
448
|
+
for i, tc in enumerate(tool_calls)
|
|
449
|
+
]
|
|
432
450
|
|
|
433
451
|
def set_chat_history(self, messages: Sequence[FlexibleRunnerMessage], root_agent: Agent | None = None) -> None:
|
|
434
452
|
"""Set the entire chat history and track the current agent based on function calls.
|
|
@@ -691,12 +709,11 @@ class Runner:
|
|
|
691
709
|
msg = f"Unsupported message type: {type(message)}"
|
|
692
710
|
raise TypeError(msg)
|
|
693
711
|
|
|
694
|
-
async def _handle_agent_transfer(self, tool_call: ToolCall
|
|
712
|
+
async def _handle_agent_transfer(self, tool_call: ToolCall) -> None:
|
|
695
713
|
"""Handle agent transfer when transfer_to_agent tool is called.
|
|
696
714
|
|
|
697
715
|
Args:
|
|
698
716
|
tool_call: The transfer_to_agent tool call
|
|
699
|
-
_includes: The types of chunks to include in output (unused)
|
|
700
717
|
"""
|
|
701
718
|
|
|
702
719
|
# Parse the arguments to get the target agent name
|
|
@@ -771,12 +788,11 @@ class Runner:
|
|
|
771
788
|
output=f"Transfer failed: {e!s}",
|
|
772
789
|
)
|
|
773
790
|
|
|
774
|
-
async def _handle_parent_transfer(self, tool_call: ToolCall
|
|
791
|
+
async def _handle_parent_transfer(self, tool_call: ToolCall) -> None:
|
|
775
792
|
"""Handle parent transfer when transfer_to_parent tool is called.
|
|
776
793
|
|
|
777
794
|
Args:
|
|
778
795
|
tool_call: The transfer_to_parent tool call
|
|
779
|
-
_includes: The types of chunks to include in output (unused)
|
|
780
796
|
"""
|
|
781
797
|
|
|
782
798
|
# Check if current agent has a parent
|
|
@@ -16,18 +16,27 @@ if TYPE_CHECKING:
|
|
|
16
16
|
from aiofiles.threadpool.text import AsyncTextIOWrapper
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def ensure_record_file(record_to: Path | None) -> Path | None:
|
|
19
|
+
def ensure_record_file(record_to: Path | str | None) -> Path | None:
|
|
20
20
|
if not record_to:
|
|
21
21
|
return None
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
22
|
+
|
|
23
|
+
path = Path(record_to) if isinstance(record_to, str) else record_to
|
|
24
|
+
|
|
25
|
+
# If the path is a directory, generate a filename
|
|
26
|
+
if path.is_dir():
|
|
27
|
+
path = path / "conversation.jsonl"
|
|
28
|
+
|
|
29
|
+
# Ensure parent directory exists
|
|
30
|
+
if not path.parent.exists():
|
|
31
|
+
logger.warning('Record directory "%s" does not exist, creating it.', path.parent)
|
|
32
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
|
|
34
|
+
return path
|
|
26
35
|
|
|
27
36
|
|
|
28
37
|
async def litellm_completion_stream_handler(
|
|
29
38
|
resp: litellm.CustomStreamWrapper,
|
|
30
|
-
record_to: Path | None = None,
|
|
39
|
+
record_to: Path | str | None = None,
|
|
31
40
|
) -> AsyncGenerator[AgentChunk, None]:
|
|
32
41
|
"""
|
|
33
42
|
Optimized chunk handler
|
|
@@ -52,7 +61,7 @@ async def litellm_completion_stream_handler(
|
|
|
52
61
|
|
|
53
62
|
async def litellm_response_stream_handler(
|
|
54
63
|
resp: AsyncGenerator[ResponsesAPIStreamingResponse, None],
|
|
55
|
-
record_to: Path | None = None,
|
|
64
|
+
record_to: Path | str | None = None,
|
|
56
65
|
) -> AsyncGenerator[AgentChunk, None]:
|
|
57
66
|
"""
|
|
58
67
|
Response API stream handler for processing ResponsesAPIStreamingResponse chunks
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
lite_agent/__init__.py,sha256=Swuefee0etSiaDnn30K2hBNV9UI3hIValW3A-pRE7e0,338
|
|
2
|
-
lite_agent/agent.py,sha256=
|
|
2
|
+
lite_agent/agent.py,sha256=M0U59KpMy6OGFje6yZuQCYVGr4oBboRwbtImPF59o2w,23314
|
|
3
3
|
lite_agent/chat_display.py,sha256=b0sUH3fkutc4e_KAKH7AtPu2msyLloNIAiWqCNavdds,30533
|
|
4
|
-
lite_agent/client.py,sha256=
|
|
4
|
+
lite_agent/client.py,sha256=HG-NbTIUSFAUAPjRow3TFYJxvTc6Y4bdT2oJWIJNEEk,5963
|
|
5
5
|
lite_agent/loggers.py,sha256=XkNkdqwD_nQGfhQJ-bBWT7koci_mMkNw3aBpyMhOICw,57
|
|
6
6
|
lite_agent/message_transfers.py,sha256=9qucjc-uSIXvVfhcmVRC_0lp0Q8sWp99dV4ReCh6ZlI,4428
|
|
7
7
|
lite_agent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
lite_agent/runner.py,sha256=
|
|
8
|
+
lite_agent/runner.py,sha256=U7eVNAJ_VLwgbPPpn-vggSgvBmFl8wMMFWn3mWCsDow,40423
|
|
9
9
|
lite_agent/processors/__init__.py,sha256=ybpAzpMBIE9v5I24wIBZRXeaOaPNTmoKH13aofgNI6Q,234
|
|
10
10
|
lite_agent/processors/completion_event_processor.py,sha256=8fQYRofgBd8t0V3oUakTOmZdv5Q9tCuzADGCGvVgy0k,13442
|
|
11
11
|
lite_agent/processors/response_event_processor.py,sha256=CElJMUzLs8mklVqJtoLiVu-NTq0Dz2NNd9YdAKpjgE0,8088
|
|
12
12
|
lite_agent/stream_handlers/__init__.py,sha256=a5s1GZr42uvndtcQqEhK2cnjGkK8ZFTAZCj3J61Bb5E,209
|
|
13
|
-
lite_agent/stream_handlers/litellm.py,sha256=
|
|
13
|
+
lite_agent/stream_handlers/litellm.py,sha256=3D0u7R2ADA8kDwpFImZlw20o-CsmFXVLvq4nvwwD0Rk,2922
|
|
14
14
|
lite_agent/templates/handoffs_source_instructions.xml.j2,sha256=2XsXQlBzk38qbxGrfyt8y2b0KlZmsV_1xavLufcdkHc,428
|
|
15
15
|
lite_agent/templates/handoffs_target_instructions.xml.j2,sha256=gSbWVYYcovPKbGpFc0kqGSJ5Y5UC3fOHyUmZfcrDgSE,356
|
|
16
16
|
lite_agent/templates/wait_for_user_instructions.xml.j2,sha256=wXbcYD5Q1FaCGVBm3Hz_Cp7nnoK7KzloP0ao-jYMwPk,231
|
|
@@ -18,6 +18,6 @@ lite_agent/types/__init__.py,sha256=QKuhjFWRcpAlsBK9JYgoCABpoQExwhuyGudJoiiqQfs,
|
|
|
18
18
|
lite_agent/types/events.py,sha256=mFMqV55WWJbPDyb_P61nd3qMLpEnwZgVY6NTKFkINkg,2389
|
|
19
19
|
lite_agent/types/messages.py,sha256=c7nTIWqXNo562het_vaWcZvsoy-adkARwAYn4JNqm0c,9897
|
|
20
20
|
lite_agent/types/tool_calls.py,sha256=Xnut8-2-Ld9vgA2GKJY6BbFlBaAv_n4W7vo7Jx21A-E,260
|
|
21
|
-
lite_agent-0.
|
|
22
|
-
lite_agent-0.
|
|
23
|
-
lite_agent-0.
|
|
21
|
+
lite_agent-0.5.0.dist-info/METADATA,sha256=20K2Xirnyawl1uN_I8TLcuGlgRjNhs04hz2BtDDRnbM,3456
|
|
22
|
+
lite_agent-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
23
|
+
lite_agent-0.5.0.dist-info/RECORD,,
|
|
File without changes
|