lite-agent 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lite-agent might be problematic. Click here for more details.
- lite_agent/agent.py +35 -13
- lite_agent/chat_display.py +1 -1
- lite_agent/client.py +193 -26
- lite_agent/response_handlers/__init__.py +10 -0
- lite_agent/response_handlers/base.py +46 -0
- lite_agent/response_handlers/completion.py +50 -0
- lite_agent/response_handlers/responses.py +42 -0
- lite_agent/runner.py +35 -10
- {lite_agent-0.4.1.dist-info → lite_agent-0.6.0.dist-info}/METADATA +1 -1
- {lite_agent-0.4.1.dist-info → lite_agent-0.6.0.dist-info}/RECORD +11 -7
- {lite_agent-0.4.1.dist-info → lite_agent-0.6.0.dist-info}/WHEEL +0 -0
lite_agent/agent.py
CHANGED
|
@@ -5,11 +5,10 @@ from typing import Any, Optional
|
|
|
5
5
|
|
|
6
6
|
from funcall import Funcall
|
|
7
7
|
from jinja2 import Environment, FileSystemLoader
|
|
8
|
-
from litellm import CustomStreamWrapper
|
|
9
8
|
|
|
10
|
-
from lite_agent.client import BaseLLMClient, LiteLLMClient
|
|
9
|
+
from lite_agent.client import BaseLLMClient, LiteLLMClient, ReasoningConfig
|
|
11
10
|
from lite_agent.loggers import logger
|
|
12
|
-
from lite_agent.
|
|
11
|
+
from lite_agent.response_handlers import CompletionResponseHandler, ResponsesAPIHandler
|
|
13
12
|
from lite_agent.types import AgentChunk, FunctionCallEvent, FunctionCallOutputEvent, RunnerMessages, ToolCall, message_to_llm_dict, system_message_to_llm_dict
|
|
14
13
|
from lite_agent.types.messages import NewAssistantMessage, NewSystemMessage, NewUserMessage
|
|
15
14
|
|
|
@@ -22,7 +21,7 @@ WAIT_FOR_USER_INSTRUCTIONS_TEMPLATE = jinja_env.get_template("wait_for_user_inst
|
|
|
22
21
|
|
|
23
22
|
|
|
24
23
|
class Agent:
|
|
25
|
-
def __init__(
|
|
24
|
+
def __init__(
|
|
26
25
|
self,
|
|
27
26
|
*,
|
|
28
27
|
model: str | BaseLLMClient,
|
|
@@ -32,15 +31,21 @@ class Agent:
|
|
|
32
31
|
handoffs: list["Agent"] | None = None,
|
|
33
32
|
message_transfer: Callable[[RunnerMessages], RunnerMessages] | None = None,
|
|
34
33
|
completion_condition: str = "stop",
|
|
34
|
+
reasoning: ReasoningConfig = None,
|
|
35
35
|
) -> None:
|
|
36
36
|
self.name = name
|
|
37
37
|
self.instructions = instructions
|
|
38
|
+
self.reasoning = reasoning
|
|
39
|
+
|
|
38
40
|
if isinstance(model, BaseLLMClient):
|
|
39
41
|
# If model is a BaseLLMClient instance, use it directly
|
|
40
42
|
self.client = model
|
|
41
43
|
else:
|
|
42
44
|
# Otherwise, create a LitellmClient instance
|
|
43
|
-
self.client = LiteLLMClient(
|
|
45
|
+
self.client = LiteLLMClient(
|
|
46
|
+
model=model,
|
|
47
|
+
reasoning=reasoning,
|
|
48
|
+
)
|
|
44
49
|
self.completion_condition = completion_condition
|
|
45
50
|
self.handoffs = handoffs if handoffs else []
|
|
46
51
|
self._parent: Agent | None = None
|
|
@@ -269,7 +274,13 @@ class Agent:
|
|
|
269
274
|
res.append(message)
|
|
270
275
|
return res
|
|
271
276
|
|
|
272
|
-
async def completion(
|
|
277
|
+
async def completion(
|
|
278
|
+
self,
|
|
279
|
+
messages: RunnerMessages,
|
|
280
|
+
record_to_file: Path | None = None,
|
|
281
|
+
reasoning: ReasoningConfig = None,
|
|
282
|
+
streaming: bool = True,
|
|
283
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
273
284
|
# Apply message transfer callback if provided - always use legacy format for LLM compatibility
|
|
274
285
|
processed_messages = messages
|
|
275
286
|
if self.message_transfer:
|
|
@@ -284,15 +295,21 @@ class Agent:
|
|
|
284
295
|
messages=self.message_histories,
|
|
285
296
|
tools=tools,
|
|
286
297
|
tool_choice="auto", # TODO: make this configurable
|
|
298
|
+
reasoning=reasoning,
|
|
299
|
+
streaming=streaming,
|
|
287
300
|
)
|
|
288
301
|
|
|
289
|
-
#
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
|
|
293
|
-
raise TypeError(msg)
|
|
302
|
+
# Use response handler for unified processing
|
|
303
|
+
handler = CompletionResponseHandler()
|
|
304
|
+
return handler.handle(resp, streaming, record_to_file)
|
|
294
305
|
|
|
295
|
-
async def responses(
|
|
306
|
+
async def responses(
|
|
307
|
+
self,
|
|
308
|
+
messages: RunnerMessages,
|
|
309
|
+
record_to_file: Path | None = None,
|
|
310
|
+
reasoning: ReasoningConfig = None,
|
|
311
|
+
streaming: bool = True,
|
|
312
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
296
313
|
# Apply message transfer callback if provided - always use legacy format for LLM compatibility
|
|
297
314
|
processed_messages = messages
|
|
298
315
|
if self.message_transfer:
|
|
@@ -306,8 +323,12 @@ class Agent:
|
|
|
306
323
|
messages=self.message_histories,
|
|
307
324
|
tools=tools,
|
|
308
325
|
tool_choice="auto", # TODO: make this configurable
|
|
326
|
+
reasoning=reasoning,
|
|
327
|
+
streaming=streaming,
|
|
309
328
|
)
|
|
310
|
-
|
|
329
|
+
# Use response handler for unified processing
|
|
330
|
+
handler = ResponsesAPIHandler()
|
|
331
|
+
return handler.handle(resp, streaming, record_to_file)
|
|
311
332
|
|
|
312
333
|
async def list_require_confirm_tools(self, tool_calls: Sequence[ToolCall] | None) -> Sequence[ToolCall]:
|
|
313
334
|
if not tool_calls:
|
|
@@ -521,3 +542,4 @@ class Agent:
|
|
|
521
542
|
required=[],
|
|
522
543
|
handler=wait_for_user_handler,
|
|
523
544
|
)
|
|
545
|
+
|
lite_agent/chat_display.py
CHANGED
lite_agent/client.py
CHANGED
|
@@ -1,25 +1,108 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
import os
|
|
3
|
-
from collections.abc import AsyncGenerator
|
|
4
3
|
from typing import Any, Literal
|
|
5
4
|
|
|
6
5
|
import litellm
|
|
7
|
-
from litellm.types.llms.openai import ResponsesAPIStreamingResponse
|
|
8
6
|
from openai.types.chat import ChatCompletionToolParam
|
|
9
7
|
from openai.types.responses import FunctionToolParam
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
ReasoningEffort = Literal["minimal", "low", "medium", "high"]
|
|
11
|
+
ThinkingConfig = dict[str, Any] | None
|
|
12
|
+
|
|
13
|
+
# 统一的推理配置类型
|
|
14
|
+
ReasoningConfig = (
|
|
15
|
+
str
|
|
16
|
+
| dict[str, Any] # {"type": "enabled", "budget_tokens": 2048} 或其他配置
|
|
17
|
+
| bool # True/False 简单开关
|
|
18
|
+
| None # 不启用推理
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LLMConfig(BaseModel):
|
|
23
|
+
"""LLM generation parameters configuration."""
|
|
24
|
+
|
|
25
|
+
temperature: float | None = None
|
|
26
|
+
max_tokens: int | None = None
|
|
27
|
+
top_p: float | None = None
|
|
28
|
+
frequency_penalty: float | None = None
|
|
29
|
+
presence_penalty: float | None = None
|
|
30
|
+
stop: list[str] | str | None = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def parse_reasoning_config(reasoning: ReasoningConfig) -> tuple[ReasoningEffort | None, ThinkingConfig]:
|
|
34
|
+
"""
|
|
35
|
+
解析统一的推理配置,返回 reasoning_effort 和 thinking_config。
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
reasoning: 统一的推理配置
|
|
39
|
+
- str: "minimal", "low", "medium", "high" -> reasoning_effort
|
|
40
|
+
- dict: {"type": "enabled", "budget_tokens": N} -> thinking_config
|
|
41
|
+
- bool: True -> "medium", False -> None
|
|
42
|
+
- None: 不启用推理
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
tuple: (reasoning_effort, thinking_config)
|
|
46
|
+
"""
|
|
47
|
+
if reasoning is None:
|
|
48
|
+
return None, None
|
|
49
|
+
if isinstance(reasoning, str):
|
|
50
|
+
# 字符串类型,使用 reasoning_effort
|
|
51
|
+
# 确保字符串是有效的 ReasoningEffort 值
|
|
52
|
+
if reasoning in ("minimal", "low", "medium", "high"):
|
|
53
|
+
return reasoning, None # type: ignore[return-value]
|
|
54
|
+
return None, None
|
|
55
|
+
if isinstance(reasoning, dict):
|
|
56
|
+
# 字典类型,使用 thinking_config
|
|
57
|
+
return None, reasoning
|
|
58
|
+
if isinstance(reasoning, bool):
|
|
59
|
+
# 布尔类型,True 使用默认的 medium,False 不启用
|
|
60
|
+
return "medium" if reasoning else None, None
|
|
61
|
+
# 其他类型,默认不启用
|
|
62
|
+
return None, None
|
|
10
63
|
|
|
11
64
|
|
|
12
65
|
class BaseLLMClient(abc.ABC):
|
|
13
66
|
"""Base class for LLM clients."""
|
|
14
67
|
|
|
15
|
-
def __init__(
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
*,
|
|
71
|
+
model: str,
|
|
72
|
+
api_key: str | None = None,
|
|
73
|
+
api_base: str | None = None,
|
|
74
|
+
api_version: str | None = None,
|
|
75
|
+
reasoning: ReasoningConfig = None,
|
|
76
|
+
llm_config: LLMConfig | None = None,
|
|
77
|
+
**llm_params: Any, # noqa: ANN401
|
|
78
|
+
):
|
|
16
79
|
self.model = model
|
|
17
80
|
self.api_key = api_key
|
|
18
81
|
self.api_base = api_base
|
|
19
82
|
self.api_version = api_version
|
|
20
83
|
|
|
84
|
+
# 处理 LLM 生成参数
|
|
85
|
+
if llm_config is not None:
|
|
86
|
+
self.llm_config = llm_config
|
|
87
|
+
else:
|
|
88
|
+
# 从 **llm_params 创建配置
|
|
89
|
+
self.llm_config = LLMConfig(**llm_params)
|
|
90
|
+
|
|
91
|
+
# 处理推理配置
|
|
92
|
+
self.reasoning_effort: ReasoningEffort | None
|
|
93
|
+
self.thinking_config: ThinkingConfig
|
|
94
|
+
self.reasoning_effort, self.thinking_config = parse_reasoning_config(reasoning)
|
|
95
|
+
|
|
21
96
|
@abc.abstractmethod
|
|
22
|
-
async def completion(
|
|
97
|
+
async def completion(
|
|
98
|
+
self,
|
|
99
|
+
messages: list[Any],
|
|
100
|
+
tools: list[ChatCompletionToolParam] | None = None,
|
|
101
|
+
tool_choice: str = "auto",
|
|
102
|
+
reasoning: ReasoningConfig = None,
|
|
103
|
+
streaming: bool = True,
|
|
104
|
+
**kwargs: Any, # noqa: ANN401
|
|
105
|
+
) -> Any: # noqa: ANN401
|
|
23
106
|
"""Perform a completion request to the LLM."""
|
|
24
107
|
|
|
25
108
|
@abc.abstractmethod
|
|
@@ -28,42 +111,126 @@ class BaseLLMClient(abc.ABC):
|
|
|
28
111
|
messages: list[dict[str, Any]], # Changed from ResponseInputParam
|
|
29
112
|
tools: list[FunctionToolParam] | None = None,
|
|
30
113
|
tool_choice: Literal["none", "auto", "required"] = "auto",
|
|
31
|
-
|
|
114
|
+
reasoning: ReasoningConfig = None,
|
|
115
|
+
streaming: bool = True,
|
|
116
|
+
**kwargs: Any, # noqa: ANN401
|
|
117
|
+
) -> Any: # noqa: ANN401
|
|
32
118
|
"""Perform a response request to the LLM."""
|
|
33
119
|
|
|
34
120
|
|
|
35
121
|
class LiteLLMClient(BaseLLMClient):
|
|
36
|
-
|
|
122
|
+
def _resolve_reasoning_params(
|
|
123
|
+
self,
|
|
124
|
+
reasoning: ReasoningConfig,
|
|
125
|
+
) -> tuple[ReasoningEffort | None, ThinkingConfig]:
|
|
126
|
+
"""解析推理配置参数。"""
|
|
127
|
+
if reasoning is not None:
|
|
128
|
+
return parse_reasoning_config(reasoning)
|
|
129
|
+
|
|
130
|
+
# 使用实例默认值
|
|
131
|
+
return self.reasoning_effort, self.thinking_config
|
|
132
|
+
|
|
133
|
+
async def completion(
|
|
134
|
+
self,
|
|
135
|
+
messages: list[Any],
|
|
136
|
+
tools: list[ChatCompletionToolParam] | None = None,
|
|
137
|
+
tool_choice: str = "auto",
|
|
138
|
+
reasoning: ReasoningConfig = None,
|
|
139
|
+
streaming: bool = True,
|
|
140
|
+
**kwargs: Any, # noqa: ANN401
|
|
141
|
+
) -> Any: # noqa: ANN401
|
|
37
142
|
"""Perform a completion request to the Litellm API."""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
tool_choice=tool_choice,
|
|
43
|
-
api_version=self.api_version,
|
|
44
|
-
api_key=self.api_key,
|
|
45
|
-
api_base=self.api_base,
|
|
46
|
-
stream=True,
|
|
143
|
+
|
|
144
|
+
# 处理推理配置参数
|
|
145
|
+
final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
|
|
146
|
+
reasoning,
|
|
47
147
|
)
|
|
48
148
|
|
|
149
|
+
# Prepare completion parameters
|
|
150
|
+
completion_params = {
|
|
151
|
+
"model": self.model,
|
|
152
|
+
"messages": messages,
|
|
153
|
+
"tools": tools,
|
|
154
|
+
"tool_choice": tool_choice,
|
|
155
|
+
"api_version": self.api_version,
|
|
156
|
+
"api_key": self.api_key,
|
|
157
|
+
"api_base": self.api_base,
|
|
158
|
+
"stream": streaming,
|
|
159
|
+
**kwargs,
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# Add LLM generation parameters if specified
|
|
163
|
+
if self.llm_config.temperature is not None:
|
|
164
|
+
completion_params["temperature"] = self.llm_config.temperature
|
|
165
|
+
if self.llm_config.max_tokens is not None:
|
|
166
|
+
completion_params["max_tokens"] = self.llm_config.max_tokens
|
|
167
|
+
if self.llm_config.top_p is not None:
|
|
168
|
+
completion_params["top_p"] = self.llm_config.top_p
|
|
169
|
+
if self.llm_config.frequency_penalty is not None:
|
|
170
|
+
completion_params["frequency_penalty"] = self.llm_config.frequency_penalty
|
|
171
|
+
if self.llm_config.presence_penalty is not None:
|
|
172
|
+
completion_params["presence_penalty"] = self.llm_config.presence_penalty
|
|
173
|
+
if self.llm_config.stop is not None:
|
|
174
|
+
completion_params["stop"] = self.llm_config.stop
|
|
175
|
+
|
|
176
|
+
# Add reasoning parameters if specified
|
|
177
|
+
if final_reasoning_effort is not None:
|
|
178
|
+
completion_params["reasoning_effort"] = final_reasoning_effort
|
|
179
|
+
if final_thinking_config is not None:
|
|
180
|
+
completion_params["thinking"] = final_thinking_config
|
|
181
|
+
|
|
182
|
+
return await litellm.acompletion(**completion_params)
|
|
183
|
+
|
|
49
184
|
async def responses(
|
|
50
185
|
self,
|
|
51
186
|
messages: list[dict[str, Any]], # Changed from ResponseInputParam
|
|
52
187
|
tools: list[FunctionToolParam] | None = None,
|
|
53
188
|
tool_choice: Literal["none", "auto", "required"] = "auto",
|
|
54
|
-
|
|
189
|
+
reasoning: ReasoningConfig = None,
|
|
190
|
+
streaming: bool = True,
|
|
191
|
+
**kwargs: Any, # noqa: ANN401
|
|
192
|
+
) -> Any: # type: ignore[return] # noqa: ANN401
|
|
55
193
|
"""Perform a response request to the Litellm API."""
|
|
56
194
|
|
|
57
195
|
os.environ["DISABLE_AIOHTTP_TRANSPORT"] = "True"
|
|
58
196
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
tools=tools,
|
|
63
|
-
tool_choice=tool_choice,
|
|
64
|
-
api_version=self.api_version,
|
|
65
|
-
api_key=self.api_key,
|
|
66
|
-
api_base=self.api_base,
|
|
67
|
-
stream=True,
|
|
68
|
-
store=False,
|
|
197
|
+
# 处理推理配置参数
|
|
198
|
+
final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
|
|
199
|
+
reasoning,
|
|
69
200
|
)
|
|
201
|
+
|
|
202
|
+
# Prepare response parameters
|
|
203
|
+
response_params = {
|
|
204
|
+
"model": self.model,
|
|
205
|
+
"input": messages, # type: ignore[arg-type]
|
|
206
|
+
"tools": tools,
|
|
207
|
+
"tool_choice": tool_choice,
|
|
208
|
+
"api_version": self.api_version,
|
|
209
|
+
"api_key": self.api_key,
|
|
210
|
+
"api_base": self.api_base,
|
|
211
|
+
"stream": streaming,
|
|
212
|
+
"store": False,
|
|
213
|
+
**kwargs,
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# Add LLM generation parameters if specified
|
|
217
|
+
if self.llm_config.temperature is not None:
|
|
218
|
+
response_params["temperature"] = self.llm_config.temperature
|
|
219
|
+
if self.llm_config.max_tokens is not None:
|
|
220
|
+
response_params["max_tokens"] = self.llm_config.max_tokens
|
|
221
|
+
if self.llm_config.top_p is not None:
|
|
222
|
+
response_params["top_p"] = self.llm_config.top_p
|
|
223
|
+
if self.llm_config.frequency_penalty is not None:
|
|
224
|
+
response_params["frequency_penalty"] = self.llm_config.frequency_penalty
|
|
225
|
+
if self.llm_config.presence_penalty is not None:
|
|
226
|
+
response_params["presence_penalty"] = self.llm_config.presence_penalty
|
|
227
|
+
if self.llm_config.stop is not None:
|
|
228
|
+
response_params["stop"] = self.llm_config.stop
|
|
229
|
+
|
|
230
|
+
# Add reasoning parameters if specified
|
|
231
|
+
if final_reasoning_effort is not None:
|
|
232
|
+
response_params["reasoning_effort"] = final_reasoning_effort
|
|
233
|
+
if final_thinking_config is not None:
|
|
234
|
+
response_params["thinking"] = final_thinking_config
|
|
235
|
+
|
|
236
|
+
return await litellm.aresponses(**response_params) # type: ignore[return-value]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Response handlers for unified streaming and non-streaming processing."""
|
|
2
|
+
from lite_agent.response_handlers.base import ResponseHandler
|
|
3
|
+
from lite_agent.response_handlers.completion import CompletionResponseHandler
|
|
4
|
+
from lite_agent.response_handlers.responses import ResponsesAPIHandler
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"CompletionResponseHandler",
|
|
8
|
+
"ResponseHandler",
|
|
9
|
+
"ResponsesAPIHandler",
|
|
10
|
+
]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Base response handler for unified streaming and non-streaming response processing."""
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from lite_agent.types import AgentChunk
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ResponseHandler(ABC):
|
|
11
|
+
"""Base class for handling both streaming and non-streaming responses."""
|
|
12
|
+
|
|
13
|
+
async def handle(
|
|
14
|
+
self,
|
|
15
|
+
response: Any,
|
|
16
|
+
streaming: bool,
|
|
17
|
+
record_to: Path | None = None,
|
|
18
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
19
|
+
"""Handle a response in either streaming or non-streaming mode.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
response: The LLM response object
|
|
23
|
+
streaming: Whether to process as streaming or non-streaming
|
|
24
|
+
record_to: Optional file path to record the conversation
|
|
25
|
+
|
|
26
|
+
Yields:
|
|
27
|
+
AgentChunk: Processed chunks from the response
|
|
28
|
+
"""
|
|
29
|
+
if streaming:
|
|
30
|
+
async for chunk in self._handle_streaming(response, record_to):
|
|
31
|
+
yield chunk
|
|
32
|
+
else:
|
|
33
|
+
async for chunk in self._handle_non_streaming(response, record_to):
|
|
34
|
+
yield chunk
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
async def _handle_streaming(
|
|
38
|
+
self, response: Any, record_to: Path | None = None,
|
|
39
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
40
|
+
"""Handle streaming response."""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
async def _handle_non_streaming(
|
|
44
|
+
self, response: Any, record_to: Path | None = None,
|
|
45
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
46
|
+
"""Handle non-streaming response."""
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Completion API response handler."""
|
|
2
|
+
from collections.abc import AsyncGenerator
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from litellm import CustomStreamWrapper
|
|
8
|
+
|
|
9
|
+
from lite_agent.response_handlers.base import ResponseHandler
|
|
10
|
+
from lite_agent.stream_handlers import litellm_completion_stream_handler
|
|
11
|
+
from lite_agent.types import AgentChunk
|
|
12
|
+
from lite_agent.types.events import AssistantMessageEvent
|
|
13
|
+
from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, NewAssistantMessage
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CompletionResponseHandler(ResponseHandler):
|
|
17
|
+
"""Handler for Completion API responses."""
|
|
18
|
+
|
|
19
|
+
async def _handle_streaming(
|
|
20
|
+
self, response: Any, record_to: Path | None = None,
|
|
21
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
22
|
+
"""Handle streaming completion response."""
|
|
23
|
+
if isinstance(response, CustomStreamWrapper):
|
|
24
|
+
async for chunk in litellm_completion_stream_handler(response, record_to):
|
|
25
|
+
yield chunk
|
|
26
|
+
else:
|
|
27
|
+
msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
|
|
28
|
+
raise TypeError(msg)
|
|
29
|
+
|
|
30
|
+
async def _handle_non_streaming(
|
|
31
|
+
self, response: Any, record_to: Path | None = None,
|
|
32
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
33
|
+
"""Handle non-streaming completion response."""
|
|
34
|
+
# Convert completion response to chunks
|
|
35
|
+
if hasattr(response, "choices") and response.choices:
|
|
36
|
+
choice = response.choices[0]
|
|
37
|
+
content_items = []
|
|
38
|
+
|
|
39
|
+
# Add text content
|
|
40
|
+
if choice.message and choice.message.content:
|
|
41
|
+
content_items.append(AssistantTextContent(text=choice.message.content))
|
|
42
|
+
|
|
43
|
+
# TODO: Handle tool calls in the future
|
|
44
|
+
|
|
45
|
+
if content_items:
|
|
46
|
+
message = NewAssistantMessage(
|
|
47
|
+
content=content_items,
|
|
48
|
+
meta=AssistantMessageMeta(sent_at=datetime.now(timezone.utc)),
|
|
49
|
+
)
|
|
50
|
+
yield AssistantMessageEvent(message=message)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Responses API response handler."""
|
|
2
|
+
from collections.abc import AsyncGenerator
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from lite_agent.response_handlers.base import ResponseHandler
|
|
8
|
+
from lite_agent.stream_handlers import litellm_response_stream_handler
|
|
9
|
+
from lite_agent.types import AgentChunk
|
|
10
|
+
from lite_agent.types.events import AssistantMessageEvent
|
|
11
|
+
from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, NewAssistantMessage
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ResponsesAPIHandler(ResponseHandler):
|
|
15
|
+
"""Handler for Responses API responses."""
|
|
16
|
+
|
|
17
|
+
async def _handle_streaming(
|
|
18
|
+
self, response: Any, record_to: Path | None = None,
|
|
19
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
20
|
+
"""Handle streaming responses API response."""
|
|
21
|
+
async for chunk in litellm_response_stream_handler(response, record_to):
|
|
22
|
+
yield chunk
|
|
23
|
+
|
|
24
|
+
async def _handle_non_streaming(
|
|
25
|
+
self, response: Any, record_to: Path | None = None,
|
|
26
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
27
|
+
"""Handle non-streaming responses API response."""
|
|
28
|
+
# Convert ResponsesAPIResponse to chunks
|
|
29
|
+
if hasattr(response, "output") and response.output:
|
|
30
|
+
for output_message in response.output:
|
|
31
|
+
if hasattr(output_message, "content") and output_message.content:
|
|
32
|
+
content_text = ""
|
|
33
|
+
for content_item in output_message.content:
|
|
34
|
+
if hasattr(content_item, "text"):
|
|
35
|
+
content_text += content_item.text
|
|
36
|
+
|
|
37
|
+
if content_text:
|
|
38
|
+
message = NewAssistantMessage(
|
|
39
|
+
content=[AssistantTextContent(text=content_text)],
|
|
40
|
+
meta=AssistantMessageMeta(sent_at=datetime.now(timezone.utc)),
|
|
41
|
+
)
|
|
42
|
+
yield AssistantMessageEvent(message=message)
|
lite_agent/runner.py
CHANGED
|
@@ -44,10 +44,11 @@ DEFAULT_INCLUDES: tuple[AgentChunkType, ...] = (
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
class Runner:
|
|
47
|
-
def __init__(self, agent: Agent, api: Literal["completion", "responses"] = "responses") -> None:
|
|
47
|
+
def __init__(self, agent: Agent, api: Literal["completion", "responses"] = "responses", streaming: bool = True) -> None:
|
|
48
48
|
self.agent = agent
|
|
49
49
|
self.messages: list[NewMessage] = []
|
|
50
50
|
self.api = api
|
|
51
|
+
self.streaming = streaming
|
|
51
52
|
self._current_assistant_message: NewAssistantMessage | None = None
|
|
52
53
|
|
|
53
54
|
@property
|
|
@@ -175,8 +176,10 @@ class Runner:
|
|
|
175
176
|
includes: Sequence[AgentChunkType] | None = None,
|
|
176
177
|
context: "Any | None" = None, # noqa: ANN401
|
|
177
178
|
record_to: PathLike | str | None = None,
|
|
179
|
+
agent_kwargs: dict[str, Any] | None = None,
|
|
178
180
|
) -> AsyncGenerator[AgentChunk, None]:
|
|
179
181
|
"""Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
|
|
182
|
+
logger.debug(f"Runner.run called with streaming={self.streaming}, api={self.api}")
|
|
180
183
|
includes = self._normalize_includes(includes)
|
|
181
184
|
match user_input:
|
|
182
185
|
case str():
|
|
@@ -188,9 +191,17 @@ class Runner:
|
|
|
188
191
|
case _:
|
|
189
192
|
# Handle single message (BaseModel, TypedDict, or dict)
|
|
190
193
|
self.append_message(user_input) # type: ignore[arg-type]
|
|
191
|
-
|
|
194
|
+
logger.debug("Messages prepared, calling _run")
|
|
195
|
+
return self._run(max_steps, includes, self._normalize_record_path(record_to), context=context, agent_kwargs=agent_kwargs)
|
|
192
196
|
|
|
193
|
-
async def _run(
|
|
197
|
+
async def _run(
|
|
198
|
+
self,
|
|
199
|
+
max_steps: int,
|
|
200
|
+
includes: Sequence[AgentChunkType],
|
|
201
|
+
record_to: Path | None = None,
|
|
202
|
+
context: Any | None = None, # noqa: ANN401
|
|
203
|
+
agent_kwargs: dict[str, Any] | None = None,
|
|
204
|
+
) -> AsyncGenerator[AgentChunk, None]:
|
|
194
205
|
"""Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
|
|
195
206
|
logger.debug(f"Running agent with messages: {self.messages}")
|
|
196
207
|
steps = 0
|
|
@@ -213,14 +224,33 @@ class Runner:
|
|
|
213
224
|
logger.debug(f"Step {steps}: finish_reason={finish_reason}, is_finish()={is_finish()}")
|
|
214
225
|
# Convert to legacy format only when needed for LLM communication
|
|
215
226
|
# This allows us to keep the new format internally but ensures compatibility
|
|
227
|
+
# Extract agent kwargs for reasoning configuration
|
|
228
|
+
reasoning = None
|
|
229
|
+
if agent_kwargs:
|
|
230
|
+
reasoning = agent_kwargs.get("reasoning")
|
|
231
|
+
|
|
232
|
+
logger.debug(f"Using API: {self.api}, streaming: {self.streaming}")
|
|
216
233
|
match self.api:
|
|
217
234
|
case "completion":
|
|
218
|
-
|
|
235
|
+
logger.debug("Calling agent.completion")
|
|
236
|
+
resp = await self.agent.completion(
|
|
237
|
+
self.messages,
|
|
238
|
+
record_to_file=record_to,
|
|
239
|
+
reasoning=reasoning,
|
|
240
|
+
streaming=self.streaming,
|
|
241
|
+
)
|
|
219
242
|
case "responses":
|
|
220
|
-
|
|
243
|
+
logger.debug("Calling agent.responses")
|
|
244
|
+
resp = await self.agent.responses(
|
|
245
|
+
self.messages,
|
|
246
|
+
record_to_file=record_to,
|
|
247
|
+
reasoning=reasoning,
|
|
248
|
+
streaming=self.streaming,
|
|
249
|
+
)
|
|
221
250
|
case _:
|
|
222
251
|
msg = f"Unknown API type: {self.api}"
|
|
223
252
|
raise ValueError(msg)
|
|
253
|
+
logger.debug(f"Received response from agent: {type(resp)}")
|
|
224
254
|
async for chunk in resp:
|
|
225
255
|
match chunk.type:
|
|
226
256
|
case "assistant_message":
|
|
@@ -367,11 +397,6 @@ class Runner:
|
|
|
367
397
|
msg = "Cannot continue running without a valid last message from the assistant."
|
|
368
398
|
raise ValueError(msg)
|
|
369
399
|
|
|
370
|
-
last_message = self.messages[-1]
|
|
371
|
-
if not (isinstance(last_message, NewAssistantMessage) or (hasattr(last_message, "role") and getattr(last_message, "role", None) == "assistant")):
|
|
372
|
-
msg = "Cannot continue running without a valid last message from the assistant."
|
|
373
|
-
raise ValueError(msg)
|
|
374
|
-
|
|
375
400
|
resp = self._run(max_steps=max_steps, includes=includes, record_to=self._normalize_record_path(record_to), context=context)
|
|
376
401
|
async for chunk in resp:
|
|
377
402
|
yield chunk
|
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
lite_agent/__init__.py,sha256=Swuefee0etSiaDnn30K2hBNV9UI3hIValW3A-pRE7e0,338
|
|
2
|
-
lite_agent/agent.py,sha256=
|
|
3
|
-
lite_agent/chat_display.py,sha256=
|
|
4
|
-
lite_agent/client.py,sha256=
|
|
2
|
+
lite_agent/agent.py,sha256=9stxur0iqdG9NUDXdk1ElxenjYsRsurt36hGhZcz_-c,23323
|
|
3
|
+
lite_agent/chat_display.py,sha256=Pfg6ZgTeIuzRZMVxOUzlwZU18rfOLD9-8I1lqUd_fXc,30516
|
|
4
|
+
lite_agent/client.py,sha256=QhtZZGX54ha9-HKHcbx0qUsaAUi4-TAO-YckCH_itQY,8633
|
|
5
5
|
lite_agent/loggers.py,sha256=XkNkdqwD_nQGfhQJ-bBWT7koci_mMkNw3aBpyMhOICw,57
|
|
6
6
|
lite_agent/message_transfers.py,sha256=9qucjc-uSIXvVfhcmVRC_0lp0Q8sWp99dV4ReCh6ZlI,4428
|
|
7
7
|
lite_agent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
lite_agent/runner.py,sha256=
|
|
8
|
+
lite_agent/runner.py,sha256=B0KYE0Wfta4X85kPm_hMMGrLz8o1-TmGUnOG1cUZeBM,40985
|
|
9
9
|
lite_agent/processors/__init__.py,sha256=ybpAzpMBIE9v5I24wIBZRXeaOaPNTmoKH13aofgNI6Q,234
|
|
10
10
|
lite_agent/processors/completion_event_processor.py,sha256=8fQYRofgBd8t0V3oUakTOmZdv5Q9tCuzADGCGvVgy0k,13442
|
|
11
11
|
lite_agent/processors/response_event_processor.py,sha256=CElJMUzLs8mklVqJtoLiVu-NTq0Dz2NNd9YdAKpjgE0,8088
|
|
12
|
+
lite_agent/response_handlers/__init__.py,sha256=2xe8YngMpjdp1B5tE8C3IiimYI30TnqQHj9KTtg6wCI,385
|
|
13
|
+
lite_agent/response_handlers/base.py,sha256=amQSnhUdoMyaacL7BlGfIUJDYDgqH6seYlfOl6loy-w,1566
|
|
14
|
+
lite_agent/response_handlers/completion.py,sha256=X-sBM-ZBxodppcCXAwoN8Lslda5QYSoK7DdKEdgaYnM,2026
|
|
15
|
+
lite_agent/response_handlers/responses.py,sha256=KEKnnsel8HLiF2Ob8TzVSXuRjudCpvyZ_GMrg3ME2g0,1915
|
|
12
16
|
lite_agent/stream_handlers/__init__.py,sha256=a5s1GZr42uvndtcQqEhK2cnjGkK8ZFTAZCj3J61Bb5E,209
|
|
13
17
|
lite_agent/stream_handlers/litellm.py,sha256=3D0u7R2ADA8kDwpFImZlw20o-CsmFXVLvq4nvwwD0Rk,2922
|
|
14
18
|
lite_agent/templates/handoffs_source_instructions.xml.j2,sha256=2XsXQlBzk38qbxGrfyt8y2b0KlZmsV_1xavLufcdkHc,428
|
|
@@ -18,6 +22,6 @@ lite_agent/types/__init__.py,sha256=QKuhjFWRcpAlsBK9JYgoCABpoQExwhuyGudJoiiqQfs,
|
|
|
18
22
|
lite_agent/types/events.py,sha256=mFMqV55WWJbPDyb_P61nd3qMLpEnwZgVY6NTKFkINkg,2389
|
|
19
23
|
lite_agent/types/messages.py,sha256=c7nTIWqXNo562het_vaWcZvsoy-adkARwAYn4JNqm0c,9897
|
|
20
24
|
lite_agent/types/tool_calls.py,sha256=Xnut8-2-Ld9vgA2GKJY6BbFlBaAv_n4W7vo7Jx21A-E,260
|
|
21
|
-
lite_agent-0.
|
|
22
|
-
lite_agent-0.
|
|
23
|
-
lite_agent-0.
|
|
25
|
+
lite_agent-0.6.0.dist-info/METADATA,sha256=_gfjiwA85XKoQdB9TCJx3BI2D21gNkw-C5pL3CaiZz8,3456
|
|
26
|
+
lite_agent-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
27
|
+
lite_agent-0.6.0.dist-info/RECORD,,
|
|
File without changes
|