lite-agent 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lite-agent might be problematic. Click here for more details.

lite_agent/agent.py CHANGED
@@ -5,11 +5,10 @@ from typing import Any, Optional
5
5
 
6
6
  from funcall import Funcall
7
7
  from jinja2 import Environment, FileSystemLoader
8
- from litellm import CustomStreamWrapper
9
8
 
10
- from lite_agent.client import BaseLLMClient, LiteLLMClient
9
+ from lite_agent.client import BaseLLMClient, LiteLLMClient, ReasoningConfig
11
10
  from lite_agent.loggers import logger
12
- from lite_agent.stream_handlers import litellm_completion_stream_handler, litellm_response_stream_handler
11
+ from lite_agent.response_handlers import CompletionResponseHandler, ResponsesAPIHandler
13
12
  from lite_agent.types import AgentChunk, FunctionCallEvent, FunctionCallOutputEvent, RunnerMessages, ToolCall, message_to_llm_dict, system_message_to_llm_dict
14
13
  from lite_agent.types.messages import NewAssistantMessage, NewSystemMessage, NewUserMessage
15
14
 
@@ -22,7 +21,7 @@ WAIT_FOR_USER_INSTRUCTIONS_TEMPLATE = jinja_env.get_template("wait_for_user_inst
22
21
 
23
22
 
24
23
  class Agent:
25
- def __init__( # noqa: PLR0913
24
+ def __init__(
26
25
  self,
27
26
  *,
28
27
  model: str | BaseLLMClient,
@@ -32,15 +31,21 @@ class Agent:
32
31
  handoffs: list["Agent"] | None = None,
33
32
  message_transfer: Callable[[RunnerMessages], RunnerMessages] | None = None,
34
33
  completion_condition: str = "stop",
34
+ reasoning: ReasoningConfig = None,
35
35
  ) -> None:
36
36
  self.name = name
37
37
  self.instructions = instructions
38
+ self.reasoning = reasoning
39
+
38
40
  if isinstance(model, BaseLLMClient):
39
41
  # If model is a BaseLLMClient instance, use it directly
40
42
  self.client = model
41
43
  else:
42
44
  # Otherwise, create a LitellmClient instance
43
- self.client = LiteLLMClient(model=model)
45
+ self.client = LiteLLMClient(
46
+ model=model,
47
+ reasoning=reasoning,
48
+ )
44
49
  self.completion_condition = completion_condition
45
50
  self.handoffs = handoffs if handoffs else []
46
51
  self._parent: Agent | None = None
@@ -269,7 +274,13 @@ class Agent:
269
274
  res.append(message)
270
275
  return res
271
276
 
272
- async def completion(self, messages: RunnerMessages, record_to_file: Path | None = None) -> AsyncGenerator[AgentChunk, None]:
277
+ async def completion(
278
+ self,
279
+ messages: RunnerMessages,
280
+ record_to_file: Path | None = None,
281
+ reasoning: ReasoningConfig = None,
282
+ streaming: bool = True,
283
+ ) -> AsyncGenerator[AgentChunk, None]:
273
284
  # Apply message transfer callback if provided - always use legacy format for LLM compatibility
274
285
  processed_messages = messages
275
286
  if self.message_transfer:
@@ -284,15 +295,21 @@ class Agent:
284
295
  messages=self.message_histories,
285
296
  tools=tools,
286
297
  tool_choice="auto", # TODO: make this configurable
298
+ reasoning=reasoning,
299
+ streaming=streaming,
287
300
  )
288
301
 
289
- # Ensure resp is a CustomStreamWrapper
290
- if isinstance(resp, CustomStreamWrapper):
291
- return litellm_completion_stream_handler(resp, record_to=record_to_file)
292
- msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
293
- raise TypeError(msg)
302
+ # Use response handler for unified processing
303
+ handler = CompletionResponseHandler()
304
+ return handler.handle(resp, streaming, record_to_file)
294
305
 
295
- async def responses(self, messages: RunnerMessages, record_to_file: Path | None = None) -> AsyncGenerator[AgentChunk, None]:
306
+ async def responses(
307
+ self,
308
+ messages: RunnerMessages,
309
+ record_to_file: Path | None = None,
310
+ reasoning: ReasoningConfig = None,
311
+ streaming: bool = True,
312
+ ) -> AsyncGenerator[AgentChunk, None]:
296
313
  # Apply message transfer callback if provided - always use legacy format for LLM compatibility
297
314
  processed_messages = messages
298
315
  if self.message_transfer:
@@ -306,8 +323,12 @@ class Agent:
306
323
  messages=self.message_histories,
307
324
  tools=tools,
308
325
  tool_choice="auto", # TODO: make this configurable
326
+ reasoning=reasoning,
327
+ streaming=streaming,
309
328
  )
310
- return litellm_response_stream_handler(resp, record_to=record_to_file)
329
+ # Use response handler for unified processing
330
+ handler = ResponsesAPIHandler()
331
+ return handler.handle(resp, streaming, record_to_file)
311
332
 
312
333
  async def list_require_confirm_tools(self, tool_calls: Sequence[ToolCall] | None) -> Sequence[ToolCall]:
313
334
  if not tool_calls:
@@ -521,3 +542,4 @@ class Agent:
521
542
  required=[],
522
543
  handler=wait_for_user_handler,
523
544
  )
545
+
@@ -437,7 +437,7 @@ def display_messages(
437
437
  )
438
438
 
439
439
 
440
- def _display_single_message_compact( # noqa: PLR0913
440
+ def _display_single_message_compact(
441
441
  message: FlexibleRunnerMessage,
442
442
  *,
443
443
  index: int | None = None,
lite_agent/client.py CHANGED
@@ -1,25 +1,108 @@
1
1
  import abc
2
2
  import os
3
- from collections.abc import AsyncGenerator
4
3
  from typing import Any, Literal
5
4
 
6
5
  import litellm
7
- from litellm.types.llms.openai import ResponsesAPIStreamingResponse
8
6
  from openai.types.chat import ChatCompletionToolParam
9
7
  from openai.types.responses import FunctionToolParam
8
+ from pydantic import BaseModel
9
+
10
+ ReasoningEffort = Literal["minimal", "low", "medium", "high"]
11
+ ThinkingConfig = dict[str, Any] | None
12
+
13
+ # 统一的推理配置类型
14
+ ReasoningConfig = (
15
+ str
16
+ | dict[str, Any] # {"type": "enabled", "budget_tokens": 2048} 或其他配置
17
+ | bool # True/False 简单开关
18
+ | None # 不启用推理
19
+ )
20
+
21
+
22
+ class LLMConfig(BaseModel):
23
+ """LLM generation parameters configuration."""
24
+
25
+ temperature: float | None = None
26
+ max_tokens: int | None = None
27
+ top_p: float | None = None
28
+ frequency_penalty: float | None = None
29
+ presence_penalty: float | None = None
30
+ stop: list[str] | str | None = None
31
+
32
+
33
+ def parse_reasoning_config(reasoning: ReasoningConfig) -> tuple[ReasoningEffort | None, ThinkingConfig]:
34
+ """
35
+ 解析统一的推理配置,返回 reasoning_effort 和 thinking_config。
36
+
37
+ Args:
38
+ reasoning: 统一的推理配置
39
+ - str: "minimal", "low", "medium", "high" -> reasoning_effort
40
+ - dict: {"type": "enabled", "budget_tokens": N} -> thinking_config
41
+ - bool: True -> "medium", False -> None
42
+ - None: 不启用推理
43
+
44
+ Returns:
45
+ tuple: (reasoning_effort, thinking_config)
46
+ """
47
+ if reasoning is None:
48
+ return None, None
49
+ if isinstance(reasoning, str):
50
+ # 字符串类型,使用 reasoning_effort
51
+ # 确保字符串是有效的 ReasoningEffort 值
52
+ if reasoning in ("minimal", "low", "medium", "high"):
53
+ return reasoning, None # type: ignore[return-value]
54
+ return None, None
55
+ if isinstance(reasoning, dict):
56
+ # 字典类型,使用 thinking_config
57
+ return None, reasoning
58
+ if isinstance(reasoning, bool):
59
+ # 布尔类型,True 使用默认的 medium,False 不启用
60
+ return "medium" if reasoning else None, None
61
+ # 其他类型,默认不启用
62
+ return None, None
10
63
 
11
64
 
12
65
  class BaseLLMClient(abc.ABC):
13
66
  """Base class for LLM clients."""
14
67
 
15
- def __init__(self, *, model: str, api_key: str | None = None, api_base: str | None = None, api_version: str | None = None):
68
+ def __init__(
69
+ self,
70
+ *,
71
+ model: str,
72
+ api_key: str | None = None,
73
+ api_base: str | None = None,
74
+ api_version: str | None = None,
75
+ reasoning: ReasoningConfig = None,
76
+ llm_config: LLMConfig | None = None,
77
+ **llm_params: Any, # noqa: ANN401
78
+ ):
16
79
  self.model = model
17
80
  self.api_key = api_key
18
81
  self.api_base = api_base
19
82
  self.api_version = api_version
20
83
 
84
+ # 处理 LLM 生成参数
85
+ if llm_config is not None:
86
+ self.llm_config = llm_config
87
+ else:
88
+ # 从 **llm_params 创建配置
89
+ self.llm_config = LLMConfig(**llm_params)
90
+
91
+ # 处理推理配置
92
+ self.reasoning_effort: ReasoningEffort | None
93
+ self.thinking_config: ThinkingConfig
94
+ self.reasoning_effort, self.thinking_config = parse_reasoning_config(reasoning)
95
+
21
96
  @abc.abstractmethod
22
- async def completion(self, messages: list[Any], tools: list[ChatCompletionToolParam] | None = None, tool_choice: str = "auto") -> Any: # noqa: ANN401
97
+ async def completion(
98
+ self,
99
+ messages: list[Any],
100
+ tools: list[ChatCompletionToolParam] | None = None,
101
+ tool_choice: str = "auto",
102
+ reasoning: ReasoningConfig = None,
103
+ streaming: bool = True,
104
+ **kwargs: Any, # noqa: ANN401
105
+ ) -> Any: # noqa: ANN401
23
106
  """Perform a completion request to the LLM."""
24
107
 
25
108
  @abc.abstractmethod
@@ -28,42 +111,126 @@ class BaseLLMClient(abc.ABC):
28
111
  messages: list[dict[str, Any]], # Changed from ResponseInputParam
29
112
  tools: list[FunctionToolParam] | None = None,
30
113
  tool_choice: Literal["none", "auto", "required"] = "auto",
31
- ) -> AsyncGenerator[ResponsesAPIStreamingResponse, None]:
114
+ reasoning: ReasoningConfig = None,
115
+ streaming: bool = True,
116
+ **kwargs: Any, # noqa: ANN401
117
+ ) -> Any: # noqa: ANN401
32
118
  """Perform a response request to the LLM."""
33
119
 
34
120
 
35
121
  class LiteLLMClient(BaseLLMClient):
36
- async def completion(self, messages: list[Any], tools: list[ChatCompletionToolParam] | None = None, tool_choice: str = "auto") -> Any: # noqa: ANN401
122
+ def _resolve_reasoning_params(
123
+ self,
124
+ reasoning: ReasoningConfig,
125
+ ) -> tuple[ReasoningEffort | None, ThinkingConfig]:
126
+ """解析推理配置参数。"""
127
+ if reasoning is not None:
128
+ return parse_reasoning_config(reasoning)
129
+
130
+ # 使用实例默认值
131
+ return self.reasoning_effort, self.thinking_config
132
+
133
+ async def completion(
134
+ self,
135
+ messages: list[Any],
136
+ tools: list[ChatCompletionToolParam] | None = None,
137
+ tool_choice: str = "auto",
138
+ reasoning: ReasoningConfig = None,
139
+ streaming: bool = True,
140
+ **kwargs: Any, # noqa: ANN401
141
+ ) -> Any: # noqa: ANN401
37
142
  """Perform a completion request to the Litellm API."""
38
- return await litellm.acompletion(
39
- model=self.model,
40
- messages=messages,
41
- tools=tools,
42
- tool_choice=tool_choice,
43
- api_version=self.api_version,
44
- api_key=self.api_key,
45
- api_base=self.api_base,
46
- stream=True,
143
+
144
+ # 处理推理配置参数
145
+ final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
146
+ reasoning,
47
147
  )
48
148
 
149
+ # Prepare completion parameters
150
+ completion_params = {
151
+ "model": self.model,
152
+ "messages": messages,
153
+ "tools": tools,
154
+ "tool_choice": tool_choice,
155
+ "api_version": self.api_version,
156
+ "api_key": self.api_key,
157
+ "api_base": self.api_base,
158
+ "stream": streaming,
159
+ **kwargs,
160
+ }
161
+
162
+ # Add LLM generation parameters if specified
163
+ if self.llm_config.temperature is not None:
164
+ completion_params["temperature"] = self.llm_config.temperature
165
+ if self.llm_config.max_tokens is not None:
166
+ completion_params["max_tokens"] = self.llm_config.max_tokens
167
+ if self.llm_config.top_p is not None:
168
+ completion_params["top_p"] = self.llm_config.top_p
169
+ if self.llm_config.frequency_penalty is not None:
170
+ completion_params["frequency_penalty"] = self.llm_config.frequency_penalty
171
+ if self.llm_config.presence_penalty is not None:
172
+ completion_params["presence_penalty"] = self.llm_config.presence_penalty
173
+ if self.llm_config.stop is not None:
174
+ completion_params["stop"] = self.llm_config.stop
175
+
176
+ # Add reasoning parameters if specified
177
+ if final_reasoning_effort is not None:
178
+ completion_params["reasoning_effort"] = final_reasoning_effort
179
+ if final_thinking_config is not None:
180
+ completion_params["thinking"] = final_thinking_config
181
+
182
+ return await litellm.acompletion(**completion_params)
183
+
49
184
  async def responses(
50
185
  self,
51
186
  messages: list[dict[str, Any]], # Changed from ResponseInputParam
52
187
  tools: list[FunctionToolParam] | None = None,
53
188
  tool_choice: Literal["none", "auto", "required"] = "auto",
54
- ) -> AsyncGenerator[ResponsesAPIStreamingResponse, None]:
189
+ reasoning: ReasoningConfig = None,
190
+ streaming: bool = True,
191
+ **kwargs: Any, # noqa: ANN401
192
+ ) -> Any: # type: ignore[return] # noqa: ANN401
55
193
  """Perform a response request to the Litellm API."""
56
194
 
57
195
  os.environ["DISABLE_AIOHTTP_TRANSPORT"] = "True"
58
196
 
59
- return await litellm.aresponses(
60
- model=self.model,
61
- input=messages, # type: ignore[arg-type]
62
- tools=tools,
63
- tool_choice=tool_choice,
64
- api_version=self.api_version,
65
- api_key=self.api_key,
66
- api_base=self.api_base,
67
- stream=True,
68
- store=False,
197
+ # 处理推理配置参数
198
+ final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
199
+ reasoning,
69
200
  )
201
+
202
+ # Prepare response parameters
203
+ response_params = {
204
+ "model": self.model,
205
+ "input": messages, # type: ignore[arg-type]
206
+ "tools": tools,
207
+ "tool_choice": tool_choice,
208
+ "api_version": self.api_version,
209
+ "api_key": self.api_key,
210
+ "api_base": self.api_base,
211
+ "stream": streaming,
212
+ "store": False,
213
+ **kwargs,
214
+ }
215
+
216
+ # Add LLM generation parameters if specified
217
+ if self.llm_config.temperature is not None:
218
+ response_params["temperature"] = self.llm_config.temperature
219
+ if self.llm_config.max_tokens is not None:
220
+ response_params["max_tokens"] = self.llm_config.max_tokens
221
+ if self.llm_config.top_p is not None:
222
+ response_params["top_p"] = self.llm_config.top_p
223
+ if self.llm_config.frequency_penalty is not None:
224
+ response_params["frequency_penalty"] = self.llm_config.frequency_penalty
225
+ if self.llm_config.presence_penalty is not None:
226
+ response_params["presence_penalty"] = self.llm_config.presence_penalty
227
+ if self.llm_config.stop is not None:
228
+ response_params["stop"] = self.llm_config.stop
229
+
230
+ # Add reasoning parameters if specified
231
+ if final_reasoning_effort is not None:
232
+ response_params["reasoning_effort"] = final_reasoning_effort
233
+ if final_thinking_config is not None:
234
+ response_params["thinking"] = final_thinking_config
235
+
236
+ return await litellm.aresponses(**response_params) # type: ignore[return-value]
@@ -0,0 +1,10 @@
1
+ """Response handlers for unified streaming and non-streaming processing."""
2
+ from lite_agent.response_handlers.base import ResponseHandler
3
+ from lite_agent.response_handlers.completion import CompletionResponseHandler
4
+ from lite_agent.response_handlers.responses import ResponsesAPIHandler
5
+
6
+ __all__ = [
7
+ "CompletionResponseHandler",
8
+ "ResponseHandler",
9
+ "ResponsesAPIHandler",
10
+ ]
@@ -0,0 +1,46 @@
1
+ """Base response handler for unified streaming and non-streaming response processing."""
2
+ from abc import ABC, abstractmethod
3
+ from collections.abc import AsyncGenerator
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from lite_agent.types import AgentChunk
8
+
9
+
10
+ class ResponseHandler(ABC):
11
+ """Base class for handling both streaming and non-streaming responses."""
12
+
13
+ async def handle(
14
+ self,
15
+ response: Any,
16
+ streaming: bool,
17
+ record_to: Path | None = None,
18
+ ) -> AsyncGenerator[AgentChunk, None]:
19
+ """Handle a response in either streaming or non-streaming mode.
20
+
21
+ Args:
22
+ response: The LLM response object
23
+ streaming: Whether to process as streaming or non-streaming
24
+ record_to: Optional file path to record the conversation
25
+
26
+ Yields:
27
+ AgentChunk: Processed chunks from the response
28
+ """
29
+ if streaming:
30
+ async for chunk in self._handle_streaming(response, record_to):
31
+ yield chunk
32
+ else:
33
+ async for chunk in self._handle_non_streaming(response, record_to):
34
+ yield chunk
35
+
36
+ @abstractmethod
37
+ async def _handle_streaming(
38
+ self, response: Any, record_to: Path | None = None,
39
+ ) -> AsyncGenerator[AgentChunk, None]:
40
+ """Handle streaming response."""
41
+
42
+ @abstractmethod
43
+ async def _handle_non_streaming(
44
+ self, response: Any, record_to: Path | None = None,
45
+ ) -> AsyncGenerator[AgentChunk, None]:
46
+ """Handle non-streaming response."""
@@ -0,0 +1,50 @@
1
+ """Completion API response handler."""
2
+ from collections.abc import AsyncGenerator
3
+ from datetime import datetime, timezone
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from litellm import CustomStreamWrapper
8
+
9
+ from lite_agent.response_handlers.base import ResponseHandler
10
+ from lite_agent.stream_handlers import litellm_completion_stream_handler
11
+ from lite_agent.types import AgentChunk
12
+ from lite_agent.types.events import AssistantMessageEvent
13
+ from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, NewAssistantMessage
14
+
15
+
16
+ class CompletionResponseHandler(ResponseHandler):
17
+ """Handler for Completion API responses."""
18
+
19
+ async def _handle_streaming(
20
+ self, response: Any, record_to: Path | None = None,
21
+ ) -> AsyncGenerator[AgentChunk, None]:
22
+ """Handle streaming completion response."""
23
+ if isinstance(response, CustomStreamWrapper):
24
+ async for chunk in litellm_completion_stream_handler(response, record_to):
25
+ yield chunk
26
+ else:
27
+ msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
28
+ raise TypeError(msg)
29
+
30
+ async def _handle_non_streaming(
31
+ self, response: Any, record_to: Path | None = None,
32
+ ) -> AsyncGenerator[AgentChunk, None]:
33
+ """Handle non-streaming completion response."""
34
+ # Convert completion response to chunks
35
+ if hasattr(response, "choices") and response.choices:
36
+ choice = response.choices[0]
37
+ content_items = []
38
+
39
+ # Add text content
40
+ if choice.message and choice.message.content:
41
+ content_items.append(AssistantTextContent(text=choice.message.content))
42
+
43
+ # TODO: Handle tool calls in the future
44
+
45
+ if content_items:
46
+ message = NewAssistantMessage(
47
+ content=content_items,
48
+ meta=AssistantMessageMeta(sent_at=datetime.now(timezone.utc)),
49
+ )
50
+ yield AssistantMessageEvent(message=message)
@@ -0,0 +1,42 @@
1
+ """Responses API response handler."""
2
+ from collections.abc import AsyncGenerator
3
+ from datetime import datetime, timezone
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from lite_agent.response_handlers.base import ResponseHandler
8
+ from lite_agent.stream_handlers import litellm_response_stream_handler
9
+ from lite_agent.types import AgentChunk
10
+ from lite_agent.types.events import AssistantMessageEvent
11
+ from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, NewAssistantMessage
12
+
13
+
14
+ class ResponsesAPIHandler(ResponseHandler):
15
+ """Handler for Responses API responses."""
16
+
17
+ async def _handle_streaming(
18
+ self, response: Any, record_to: Path | None = None,
19
+ ) -> AsyncGenerator[AgentChunk, None]:
20
+ """Handle streaming responses API response."""
21
+ async for chunk in litellm_response_stream_handler(response, record_to):
22
+ yield chunk
23
+
24
+ async def _handle_non_streaming(
25
+ self, response: Any, record_to: Path | None = None,
26
+ ) -> AsyncGenerator[AgentChunk, None]:
27
+ """Handle non-streaming responses API response."""
28
+ # Convert ResponsesAPIResponse to chunks
29
+ if hasattr(response, "output") and response.output:
30
+ for output_message in response.output:
31
+ if hasattr(output_message, "content") and output_message.content:
32
+ content_text = ""
33
+ for content_item in output_message.content:
34
+ if hasattr(content_item, "text"):
35
+ content_text += content_item.text
36
+
37
+ if content_text:
38
+ message = NewAssistantMessage(
39
+ content=[AssistantTextContent(text=content_text)],
40
+ meta=AssistantMessageMeta(sent_at=datetime.now(timezone.utc)),
41
+ )
42
+ yield AssistantMessageEvent(message=message)
lite_agent/runner.py CHANGED
@@ -44,10 +44,11 @@ DEFAULT_INCLUDES: tuple[AgentChunkType, ...] = (
44
44
 
45
45
 
46
46
  class Runner:
47
- def __init__(self, agent: Agent, api: Literal["completion", "responses"] = "responses") -> None:
47
+ def __init__(self, agent: Agent, api: Literal["completion", "responses"] = "responses", streaming: bool = True) -> None:
48
48
  self.agent = agent
49
49
  self.messages: list[NewMessage] = []
50
50
  self.api = api
51
+ self.streaming = streaming
51
52
  self._current_assistant_message: NewAssistantMessage | None = None
52
53
 
53
54
  @property
@@ -175,8 +176,10 @@ class Runner:
175
176
  includes: Sequence[AgentChunkType] | None = None,
176
177
  context: "Any | None" = None, # noqa: ANN401
177
178
  record_to: PathLike | str | None = None,
179
+ agent_kwargs: dict[str, Any] | None = None,
178
180
  ) -> AsyncGenerator[AgentChunk, None]:
179
181
  """Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
182
+ logger.debug(f"Runner.run called with streaming={self.streaming}, api={self.api}")
180
183
  includes = self._normalize_includes(includes)
181
184
  match user_input:
182
185
  case str():
@@ -188,9 +191,17 @@ class Runner:
188
191
  case _:
189
192
  # Handle single message (BaseModel, TypedDict, or dict)
190
193
  self.append_message(user_input) # type: ignore[arg-type]
191
- return self._run(max_steps, includes, self._normalize_record_path(record_to), context=context)
194
+ logger.debug("Messages prepared, calling _run")
195
+ return self._run(max_steps, includes, self._normalize_record_path(record_to), context=context, agent_kwargs=agent_kwargs)
192
196
 
193
- async def _run(self, max_steps: int, includes: Sequence[AgentChunkType], record_to: Path | None = None, context: Any | None = None) -> AsyncGenerator[AgentChunk, None]: # noqa: ANN401
197
+ async def _run(
198
+ self,
199
+ max_steps: int,
200
+ includes: Sequence[AgentChunkType],
201
+ record_to: Path | None = None,
202
+ context: Any | None = None, # noqa: ANN401
203
+ agent_kwargs: dict[str, Any] | None = None,
204
+ ) -> AsyncGenerator[AgentChunk, None]:
194
205
  """Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
195
206
  logger.debug(f"Running agent with messages: {self.messages}")
196
207
  steps = 0
@@ -213,14 +224,33 @@ class Runner:
213
224
  logger.debug(f"Step {steps}: finish_reason={finish_reason}, is_finish()={is_finish()}")
214
225
  # Convert to legacy format only when needed for LLM communication
215
226
  # This allows us to keep the new format internally but ensures compatibility
227
+ # Extract agent kwargs for reasoning configuration
228
+ reasoning = None
229
+ if agent_kwargs:
230
+ reasoning = agent_kwargs.get("reasoning")
231
+
232
+ logger.debug(f"Using API: {self.api}, streaming: {self.streaming}")
216
233
  match self.api:
217
234
  case "completion":
218
- resp = await self.agent.completion(self.messages, record_to_file=record_to)
235
+ logger.debug("Calling agent.completion")
236
+ resp = await self.agent.completion(
237
+ self.messages,
238
+ record_to_file=record_to,
239
+ reasoning=reasoning,
240
+ streaming=self.streaming,
241
+ )
219
242
  case "responses":
220
- resp = await self.agent.responses(self.messages, record_to_file=record_to)
243
+ logger.debug("Calling agent.responses")
244
+ resp = await self.agent.responses(
245
+ self.messages,
246
+ record_to_file=record_to,
247
+ reasoning=reasoning,
248
+ streaming=self.streaming,
249
+ )
221
250
  case _:
222
251
  msg = f"Unknown API type: {self.api}"
223
252
  raise ValueError(msg)
253
+ logger.debug(f"Received response from agent: {type(resp)}")
224
254
  async for chunk in resp:
225
255
  match chunk.type:
226
256
  case "assistant_message":
@@ -367,11 +397,6 @@ class Runner:
367
397
  msg = "Cannot continue running without a valid last message from the assistant."
368
398
  raise ValueError(msg)
369
399
 
370
- last_message = self.messages[-1]
371
- if not (isinstance(last_message, NewAssistantMessage) or (hasattr(last_message, "role") and getattr(last_message, "role", None) == "assistant")):
372
- msg = "Cannot continue running without a valid last message from the assistant."
373
- raise ValueError(msg)
374
-
375
400
  resp = self._run(max_steps=max_steps, includes=includes, record_to=self._normalize_record_path(record_to), context=context)
376
401
  async for chunk in resp:
377
402
  yield chunk
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lite-agent
3
- Version: 0.4.1
3
+ Version: 0.6.0
4
4
  Summary: A lightweight, extensible framework for building AI agent.
5
5
  Author-email: Jianqi Pan <jannchie@gmail.com>
6
6
  License: MIT
@@ -1,14 +1,18 @@
1
1
  lite_agent/__init__.py,sha256=Swuefee0etSiaDnn30K2hBNV9UI3hIValW3A-pRE7e0,338
2
- lite_agent/agent.py,sha256=t4AYlw3aF2DCPXf2W3s7aow0ql1ON5O2Q8VVuyoN6UI,22936
3
- lite_agent/chat_display.py,sha256=b0sUH3fkutc4e_KAKH7AtPu2msyLloNIAiWqCNavdds,30533
4
- lite_agent/client.py,sha256=m2jfBPIsleMZ1QCczjyHND-PIF17kQh4RTuf5FaipGM,2571
2
+ lite_agent/agent.py,sha256=9stxur0iqdG9NUDXdk1ElxenjYsRsurt36hGhZcz_-c,23323
3
+ lite_agent/chat_display.py,sha256=Pfg6ZgTeIuzRZMVxOUzlwZU18rfOLD9-8I1lqUd_fXc,30516
4
+ lite_agent/client.py,sha256=QhtZZGX54ha9-HKHcbx0qUsaAUi4-TAO-YckCH_itQY,8633
5
5
  lite_agent/loggers.py,sha256=XkNkdqwD_nQGfhQJ-bBWT7koci_mMkNw3aBpyMhOICw,57
6
6
  lite_agent/message_transfers.py,sha256=9qucjc-uSIXvVfhcmVRC_0lp0Q8sWp99dV4ReCh6ZlI,4428
7
7
  lite_agent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- lite_agent/runner.py,sha256=ACZuFJ2dNpdg4Tzeg-bl4Th1X14uhHJdELcBWe5E_Us,40155
8
+ lite_agent/runner.py,sha256=B0KYE0Wfta4X85kPm_hMMGrLz8o1-TmGUnOG1cUZeBM,40985
9
9
  lite_agent/processors/__init__.py,sha256=ybpAzpMBIE9v5I24wIBZRXeaOaPNTmoKH13aofgNI6Q,234
10
10
  lite_agent/processors/completion_event_processor.py,sha256=8fQYRofgBd8t0V3oUakTOmZdv5Q9tCuzADGCGvVgy0k,13442
11
11
  lite_agent/processors/response_event_processor.py,sha256=CElJMUzLs8mklVqJtoLiVu-NTq0Dz2NNd9YdAKpjgE0,8088
12
+ lite_agent/response_handlers/__init__.py,sha256=2xe8YngMpjdp1B5tE8C3IiimYI30TnqQHj9KTtg6wCI,385
13
+ lite_agent/response_handlers/base.py,sha256=amQSnhUdoMyaacL7BlGfIUJDYDgqH6seYlfOl6loy-w,1566
14
+ lite_agent/response_handlers/completion.py,sha256=X-sBM-ZBxodppcCXAwoN8Lslda5QYSoK7DdKEdgaYnM,2026
15
+ lite_agent/response_handlers/responses.py,sha256=KEKnnsel8HLiF2Ob8TzVSXuRjudCpvyZ_GMrg3ME2g0,1915
12
16
  lite_agent/stream_handlers/__init__.py,sha256=a5s1GZr42uvndtcQqEhK2cnjGkK8ZFTAZCj3J61Bb5E,209
13
17
  lite_agent/stream_handlers/litellm.py,sha256=3D0u7R2ADA8kDwpFImZlw20o-CsmFXVLvq4nvwwD0Rk,2922
14
18
  lite_agent/templates/handoffs_source_instructions.xml.j2,sha256=2XsXQlBzk38qbxGrfyt8y2b0KlZmsV_1xavLufcdkHc,428
@@ -18,6 +22,6 @@ lite_agent/types/__init__.py,sha256=QKuhjFWRcpAlsBK9JYgoCABpoQExwhuyGudJoiiqQfs,
18
22
  lite_agent/types/events.py,sha256=mFMqV55WWJbPDyb_P61nd3qMLpEnwZgVY6NTKFkINkg,2389
19
23
  lite_agent/types/messages.py,sha256=c7nTIWqXNo562het_vaWcZvsoy-adkARwAYn4JNqm0c,9897
20
24
  lite_agent/types/tool_calls.py,sha256=Xnut8-2-Ld9vgA2GKJY6BbFlBaAv_n4W7vo7Jx21A-E,260
21
- lite_agent-0.4.1.dist-info/METADATA,sha256=iQIr1OAdiVK5Ad6Uho65OpqS1u4YC9sOaoxKZ1FssOs,3456
22
- lite_agent-0.4.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
23
- lite_agent-0.4.1.dist-info/RECORD,,
25
+ lite_agent-0.6.0.dist-info/METADATA,sha256=_gfjiwA85XKoQdB9TCJx3BI2D21gNkw-C5pL3CaiZz8,3456
26
+ lite_agent-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
27
+ lite_agent-0.6.0.dist-info/RECORD,,