lite-agent 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lite-agent might be problematic. Click here for more details.

lite_agent/agent.py CHANGED
@@ -7,7 +7,7 @@ from funcall import Funcall
7
7
  from jinja2 import Environment, FileSystemLoader
8
8
  from litellm import CustomStreamWrapper
9
9
 
10
- from lite_agent.client import BaseLLMClient, LiteLLMClient
10
+ from lite_agent.client import BaseLLMClient, LiteLLMClient, ReasoningConfig
11
11
  from lite_agent.loggers import logger
12
12
  from lite_agent.stream_handlers import litellm_completion_stream_handler, litellm_response_stream_handler
13
13
  from lite_agent.types import AgentChunk, FunctionCallEvent, FunctionCallOutputEvent, RunnerMessages, ToolCall, message_to_llm_dict, system_message_to_llm_dict
@@ -32,15 +32,21 @@ class Agent:
32
32
  handoffs: list["Agent"] | None = None,
33
33
  message_transfer: Callable[[RunnerMessages], RunnerMessages] | None = None,
34
34
  completion_condition: str = "stop",
35
+ reasoning: ReasoningConfig = None,
35
36
  ) -> None:
36
37
  self.name = name
37
38
  self.instructions = instructions
39
+ self.reasoning = reasoning
40
+
38
41
  if isinstance(model, BaseLLMClient):
39
42
  # If model is a BaseLLMClient instance, use it directly
40
43
  self.client = model
41
44
  else:
42
45
  # Otherwise, create a LitellmClient instance
43
- self.client = LiteLLMClient(model=model)
46
+ self.client = LiteLLMClient(
47
+ model=model,
48
+ reasoning=reasoning,
49
+ )
44
50
  self.completion_condition = completion_condition
45
51
  self.handoffs = handoffs if handoffs else []
46
52
  self._parent: Agent | None = None
@@ -174,9 +180,11 @@ class Agent:
174
180
  if self.completion_condition == "call":
175
181
  instructions = WAIT_FOR_USER_INSTRUCTIONS_TEMPLATE.render(extra_instructions=None) + "\n\n" + instructions
176
182
  return [
177
- system_message_to_llm_dict(NewSystemMessage(
178
- content=f"You are {self.name}. {instructions}",
179
- )),
183
+ system_message_to_llm_dict(
184
+ NewSystemMessage(
185
+ content=f"You are {self.name}. {instructions}",
186
+ ),
187
+ ),
180
188
  *converted_messages,
181
189
  ]
182
190
 
@@ -267,7 +275,12 @@ class Agent:
267
275
  res.append(message)
268
276
  return res
269
277
 
270
- async def completion(self, messages: RunnerMessages, record_to_file: Path | None = None) -> AsyncGenerator[AgentChunk, None]:
278
+ async def completion(
279
+ self,
280
+ messages: RunnerMessages,
281
+ record_to_file: Path | None = None,
282
+ reasoning: ReasoningConfig = None,
283
+ ) -> AsyncGenerator[AgentChunk, None]:
271
284
  # Apply message transfer callback if provided - always use legacy format for LLM compatibility
272
285
  processed_messages = messages
273
286
  if self.message_transfer:
@@ -282,6 +295,7 @@ class Agent:
282
295
  messages=self.message_histories,
283
296
  tools=tools,
284
297
  tool_choice="auto", # TODO: make this configurable
298
+ reasoning=reasoning,
285
299
  )
286
300
 
287
301
  # Ensure resp is a CustomStreamWrapper
@@ -290,7 +304,12 @@ class Agent:
290
304
  msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
291
305
  raise TypeError(msg)
292
306
 
293
- async def responses(self, messages: RunnerMessages, record_to_file: Path | None = None) -> AsyncGenerator[AgentChunk, None]:
307
+ async def responses(
308
+ self,
309
+ messages: RunnerMessages,
310
+ record_to_file: Path | None = None,
311
+ reasoning: ReasoningConfig = None,
312
+ ) -> AsyncGenerator[AgentChunk, None]:
294
313
  # Apply message transfer callback if provided - always use legacy format for LLM compatibility
295
314
  processed_messages = messages
296
315
  if self.message_transfer:
@@ -304,6 +323,7 @@ class Agent:
304
323
  messages=self.message_histories,
305
324
  tools=tools,
306
325
  tool_choice="auto", # TODO: make this configurable
326
+ reasoning=reasoning,
307
327
  )
308
328
  return litellm_response_stream_handler(resp, record_to=record_to_file)
309
329
 
lite_agent/client.py CHANGED
@@ -1,25 +1,81 @@
1
1
  import abc
2
2
  import os
3
- from collections.abc import AsyncGenerator
4
3
  from typing import Any, Literal
5
4
 
6
5
  import litellm
7
- from litellm.types.llms.openai import ResponsesAPIStreamingResponse
8
6
  from openai.types.chat import ChatCompletionToolParam
9
7
  from openai.types.responses import FunctionToolParam
10
8
 
9
+ ReasoningEffort = Literal["minimal", "low", "medium", "high"]
10
+ ThinkingConfig = dict[str, Any] | None
11
+
12
+ # 统一的推理配置类型
13
+ ReasoningConfig = (
14
+ str
15
+ | dict[str, Any] # {"type": "enabled", "budget_tokens": 2048} 或其他配置
16
+ | bool # True/False 简单开关
17
+ | None # 不启用推理
18
+ )
19
+
20
+
21
+ def parse_reasoning_config(reasoning: ReasoningConfig) -> tuple[ReasoningEffort | None, ThinkingConfig]:
22
+ """
23
+ 解析统一的推理配置,返回 reasoning_effort 和 thinking_config。
24
+
25
+ Args:
26
+ reasoning: 统一的推理配置
27
+ - str: "minimal", "low", "medium", "high" -> reasoning_effort
28
+ - dict: {"type": "enabled", "budget_tokens": N} -> thinking_config
29
+ - bool: True -> "medium", False -> None
30
+ - None: 不启用推理
31
+
32
+ Returns:
33
+ tuple: (reasoning_effort, thinking_config)
34
+ """
35
+ if reasoning is None:
36
+ return None, None
37
+ if isinstance(reasoning, str):
38
+ # 字符串类型,使用 reasoning_effort
39
+ return reasoning, None
40
+ if isinstance(reasoning, dict):
41
+ # 字典类型,使用 thinking_config
42
+ return None, reasoning
43
+ if isinstance(reasoning, bool):
44
+ # 布尔类型,True 使用默认的 medium,False 不启用
45
+ return "medium" if reasoning else None, None
46
+ # 其他类型,默认不启用
47
+ return None, None
48
+
11
49
 
12
50
  class BaseLLMClient(abc.ABC):
13
51
  """Base class for LLM clients."""
14
52
 
15
- def __init__(self, *, model: str, api_key: str | None = None, api_base: str | None = None, api_version: str | None = None):
53
+ def __init__(
54
+ self,
55
+ *,
56
+ model: str,
57
+ api_key: str | None = None,
58
+ api_base: str | None = None,
59
+ api_version: str | None = None,
60
+ reasoning: ReasoningConfig = None,
61
+ ):
16
62
  self.model = model
17
63
  self.api_key = api_key
18
64
  self.api_base = api_base
19
65
  self.api_version = api_version
20
66
 
67
+ # 处理推理配置
68
+ self.reasoning_effort, self.thinking_config = parse_reasoning_config(reasoning)
69
+
21
70
  @abc.abstractmethod
22
- async def completion(self, messages: list[Any], tools: list[ChatCompletionToolParam] | None = None, tool_choice: str = "auto") -> Any: # noqa: ANN401
71
+ async def completion(
72
+ self,
73
+ messages: list[Any],
74
+ tools: list[ChatCompletionToolParam] | None = None,
75
+ tool_choice: str = "auto",
76
+ reasoning: ReasoningConfig = None,
77
+ **kwargs: Any, # noqa: ANN401
78
+ ) -> Any: # noqa: ANN401
23
79
  """Perform a completion request to the LLM."""
24
80
 
25
81
  @abc.abstractmethod
@@ -28,42 +84,95 @@ class BaseLLMClient(abc.ABC):
28
84
  messages: list[dict[str, Any]], # Changed from ResponseInputParam
29
85
  tools: list[FunctionToolParam] | None = None,
30
86
  tool_choice: Literal["none", "auto", "required"] = "auto",
31
- ) -> AsyncGenerator[ResponsesAPIStreamingResponse, None]:
87
+ reasoning: ReasoningConfig = None,
88
+ **kwargs: Any, # noqa: ANN401
89
+ ) -> Any: # noqa: ANN401
32
90
  """Perform a response request to the LLM."""
33
91
 
34
92
 
35
93
  class LiteLLMClient(BaseLLMClient):
36
- async def completion(self, messages: list[Any], tools: list[ChatCompletionToolParam] | None = None, tool_choice: str = "auto") -> Any: # noqa: ANN401
94
+ def _resolve_reasoning_params(
95
+ self,
96
+ reasoning: ReasoningConfig,
97
+ ) -> tuple[ReasoningEffort | None, ThinkingConfig]:
98
+ """解析推理配置参数。"""
99
+ if reasoning is not None:
100
+ return parse_reasoning_config(reasoning)
101
+
102
+ # 使用实例默认值
103
+ return self.reasoning_effort, self.thinking_config
104
+
105
+ async def completion(
106
+ self,
107
+ messages: list[Any],
108
+ tools: list[ChatCompletionToolParam] | None = None,
109
+ tool_choice: str = "auto",
110
+ reasoning: ReasoningConfig = None,
111
+ **kwargs: Any, # noqa: ANN401
112
+ ) -> Any: # noqa: ANN401
37
113
  """Perform a completion request to the Litellm API."""
38
- return await litellm.acompletion(
39
- model=self.model,
40
- messages=messages,
41
- tools=tools,
42
- tool_choice=tool_choice,
43
- api_version=self.api_version,
44
- api_key=self.api_key,
45
- api_base=self.api_base,
46
- stream=True,
114
+
115
+ # 处理推理配置参数
116
+ final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
117
+ reasoning,
47
118
  )
48
119
 
120
+ # Prepare completion parameters
121
+ completion_params = {
122
+ "model": self.model,
123
+ "messages": messages,
124
+ "tools": tools,
125
+ "tool_choice": tool_choice,
126
+ "api_version": self.api_version,
127
+ "api_key": self.api_key,
128
+ "api_base": self.api_base,
129
+ "stream": True,
130
+ **kwargs,
131
+ }
132
+
133
+ # Add reasoning parameters if specified
134
+ if final_reasoning_effort is not None:
135
+ completion_params["reasoning_effort"] = final_reasoning_effort
136
+ if final_thinking_config is not None:
137
+ completion_params["thinking"] = final_thinking_config
138
+
139
+ return await litellm.acompletion(**completion_params)
140
+
49
141
  async def responses(
50
142
  self,
51
143
  messages: list[dict[str, Any]], # Changed from ResponseInputParam
52
144
  tools: list[FunctionToolParam] | None = None,
53
145
  tool_choice: Literal["none", "auto", "required"] = "auto",
54
- ) -> AsyncGenerator[ResponsesAPIStreamingResponse, None]:
146
+ reasoning: ReasoningConfig = None,
147
+ **kwargs: Any, # noqa: ANN401
148
+ ) -> Any: # type: ignore[return] # noqa: ANN401
55
149
  """Perform a response request to the Litellm API."""
56
150
 
57
151
  os.environ["DISABLE_AIOHTTP_TRANSPORT"] = "True"
58
152
 
59
- return await litellm.aresponses(
60
- model=self.model,
61
- input=messages, # type: ignore[arg-type]
62
- tools=tools,
63
- tool_choice=tool_choice,
64
- api_version=self.api_version,
65
- api_key=self.api_key,
66
- api_base=self.api_base,
67
- stream=True,
68
- store=False,
153
+ # 处理推理配置参数
154
+ final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
155
+ reasoning,
69
156
  )
157
+
158
+ # Prepare response parameters
159
+ response_params = {
160
+ "model": self.model,
161
+ "input": messages, # type: ignore[arg-type]
162
+ "tools": tools,
163
+ "tool_choice": tool_choice,
164
+ "api_version": self.api_version,
165
+ "api_key": self.api_key,
166
+ "api_base": self.api_base,
167
+ "stream": True,
168
+ "store": False,
169
+ **kwargs,
170
+ }
171
+
172
+ # Add reasoning parameters if specified
173
+ if final_reasoning_effort is not None:
174
+ response_params["reasoning_effort"] = final_reasoning_effort
175
+ if final_thinking_config is not None:
176
+ response_params["thinking"] = final_thinking_config
177
+
178
+ return await litellm.aresponses(**response_params) # type: ignore[return-value]
lite_agent/runner.py CHANGED
@@ -30,6 +30,7 @@ from lite_agent.types import (
30
30
  UserMessageContent,
31
31
  UserTextContent,
32
32
  )
33
+ from lite_agent.types.events import AssistantMessageEvent
33
34
 
34
35
  DEFAULT_INCLUDES: tuple[AgentChunkType, ...] = (
35
36
  "completion_raw",
@@ -56,38 +57,31 @@ class Runner:
56
57
 
57
58
  def _start_assistant_message(self, content: str = "", meta: AssistantMessageMeta | None = None) -> None:
58
59
  """Start a new assistant message."""
59
- if meta is None:
60
- meta = AssistantMessageMeta()
61
-
62
- # Always add text content, even if empty (we can update it later)
63
- assistant_content_items: list[AssistantMessageContent] = [AssistantTextContent(text=content)]
64
60
  self._current_assistant_message = NewAssistantMessage(
65
- content=assistant_content_items,
66
- meta=meta,
61
+ content=[AssistantTextContent(text=content)],
62
+ meta=meta or AssistantMessageMeta(),
67
63
  )
68
64
 
69
- def _add_to_current_assistant_message(self, content_item: AssistantTextContent | AssistantToolCall | AssistantToolCallResult) -> None:
70
- """Add content to the current assistant message."""
65
+ def _ensure_current_assistant_message(self) -> NewAssistantMessage:
66
+ """Ensure current assistant message exists and return it."""
71
67
  if self._current_assistant_message is None:
72
68
  self._start_assistant_message()
69
+ return self._current_assistant_message # type: ignore[return-value]
73
70
 
74
- if self._current_assistant_message is not None:
75
- self._current_assistant_message.content.append(content_item)
71
+ def _add_to_current_assistant_message(self, content_item: AssistantTextContent | AssistantToolCall | AssistantToolCallResult) -> None:
72
+ """Add content to the current assistant message."""
73
+ self._ensure_current_assistant_message().content.append(content_item)
76
74
 
77
75
  def _add_text_content_to_current_assistant_message(self, delta: str) -> None:
78
76
  """Add text delta to the current assistant message's text content."""
79
- if self._current_assistant_message is None:
80
- self._start_assistant_message()
81
-
82
- if self._current_assistant_message is not None:
83
- # Find the first text content item and append the delta
84
- for content_item in self._current_assistant_message.content:
85
- if content_item.type == "text":
86
- content_item.text += delta
87
- return
88
- # If no text content found, add new text content
89
- new_content = AssistantTextContent(text=delta)
90
- self._current_assistant_message.content.append(new_content)
77
+ message = self._ensure_current_assistant_message()
78
+ # Find the first text content item and append the delta
79
+ for content_item in message.content:
80
+ if content_item.type == "text":
81
+ content_item.text += delta
82
+ return
83
+ # If no text content found, add new text content
84
+ message.content.append(AssistantTextContent(text=delta))
91
85
 
92
86
  def _finalize_assistant_message(self) -> None:
93
87
  """Finalize the current assistant message and add it to messages."""
@@ -131,7 +125,7 @@ class Runner:
131
125
  for i, tool_call in enumerate(transfer_calls):
132
126
  if i == 0:
133
127
  # Execute the first transfer
134
- await self._handle_agent_transfer(tool_call, includes)
128
+ await self._handle_agent_transfer(tool_call)
135
129
  else:
136
130
  # Add response for additional transfer calls without executing them
137
131
  self._add_tool_call_result(
@@ -146,7 +140,7 @@ class Runner:
146
140
  for i, tool_call in enumerate(return_parent_calls):
147
141
  if i == 0:
148
142
  # Execute the first transfer
149
- await self._handle_parent_transfer(tool_call, includes)
143
+ await self._handle_parent_transfer(tool_call)
150
144
  else:
151
145
  # Add response for additional transfer calls without executing them
152
146
  self._add_tool_call_result(
@@ -174,30 +168,37 @@ class Runner:
174
168
  """Collect all chunks from an async generator into a list."""
175
169
  return [chunk async for chunk in stream]
176
170
 
177
- def run(
171
+ def run( # noqa: PLR0913
178
172
  self,
179
173
  user_input: UserInput,
180
174
  max_steps: int = 20,
181
175
  includes: Sequence[AgentChunkType] | None = None,
182
176
  context: "Any | None" = None, # noqa: ANN401
183
177
  record_to: PathLike | str | None = None,
178
+ agent_kwargs: dict[str, Any] | None = None,
184
179
  ) -> AsyncGenerator[AgentChunk, None]:
185
180
  """Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
186
181
  includes = self._normalize_includes(includes)
187
- if isinstance(user_input, str):
188
- user_message = NewUserMessage(content=[UserTextContent(text=user_input)])
189
- self.messages.append(user_message)
190
- elif isinstance(user_input, (list, tuple)):
191
- # Handle sequence of messages
192
- for message in user_input:
193
- self.append_message(message)
194
- else:
195
- # Handle single message (BaseModel, TypedDict, or dict)
196
- # Type assertion needed due to the complex union type
197
- self.append_message(user_input) # type: ignore[arg-type]
198
- return self._run(max_steps, includes, self._normalize_record_path(record_to), context=context)
199
-
200
- async def _run(self, max_steps: int, includes: Sequence[AgentChunkType], record_to: Path | None = None, context: Any | None = None) -> AsyncGenerator[AgentChunk, None]: # noqa: ANN401
182
+ match user_input:
183
+ case str():
184
+ self.messages.append(NewUserMessage(content=[UserTextContent(text=user_input)]))
185
+ case list() | tuple():
186
+ # Handle sequence of messages
187
+ for message in user_input:
188
+ self.append_message(message)
189
+ case _:
190
+ # Handle single message (BaseModel, TypedDict, or dict)
191
+ self.append_message(user_input) # type: ignore[arg-type]
192
+ return self._run(max_steps, includes, self._normalize_record_path(record_to), context=context, agent_kwargs=agent_kwargs)
193
+
194
+ async def _run(
195
+ self,
196
+ max_steps: int,
197
+ includes: Sequence[AgentChunkType],
198
+ record_to: Path | None = None,
199
+ context: Any | None = None, # noqa: ANN401
200
+ agent_kwargs: dict[str, Any] | None = None,
201
+ ) -> AsyncGenerator[AgentChunk, None]:
201
202
  """Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
202
203
  logger.debug(f"Running agent with messages: {self.messages}")
203
204
  steps = 0
@@ -220,71 +221,101 @@ class Runner:
220
221
  logger.debug(f"Step {steps}: finish_reason={finish_reason}, is_finish()={is_finish()}")
221
222
  # Convert to legacy format only when needed for LLM communication
222
223
  # This allows us to keep the new format internally but ensures compatibility
224
+ # Extract agent kwargs for reasoning configuration
225
+ reasoning = None
226
+ if agent_kwargs:
227
+ reasoning = agent_kwargs.get("reasoning")
228
+
223
229
  match self.api:
224
230
  case "completion":
225
- resp = await self.agent.completion(self.messages, record_to_file=record_to)
231
+ resp = await self.agent.completion(
232
+ self.messages,
233
+ record_to_file=record_to,
234
+ reasoning=reasoning,
235
+ )
226
236
  case "responses":
227
- resp = await self.agent.responses(self.messages, record_to_file=record_to)
237
+ resp = await self.agent.responses(
238
+ self.messages,
239
+ record_to_file=record_to,
240
+ reasoning=reasoning,
241
+ )
228
242
  case _:
229
243
  msg = f"Unknown API type: {self.api}"
230
244
  raise ValueError(msg)
231
245
  async for chunk in resp:
232
- if chunk.type in includes:
233
- yield chunk
234
- if chunk.type == "assistant_message":
235
- # Start or update assistant message in new format
236
- meta = AssistantMessageMeta(
237
- sent_at=chunk.message.meta.sent_at,
238
- latency_ms=getattr(chunk.message.meta, "latency_ms", None),
239
- total_time_ms=getattr(chunk.message.meta, "output_time_ms", None),
240
- )
241
- # If we already have a current assistant message, just update its metadata
242
- if self._current_assistant_message is not None:
243
- self._current_assistant_message.meta = meta
244
- else:
245
- # Extract text content from the new message format
246
- text_content = ""
247
- if chunk.message.content:
248
- for item in chunk.message.content:
249
- if hasattr(item, "type") and item.type == "text":
250
- text_content = item.text
251
- break
252
- self._start_assistant_message(text_content, meta)
253
- if chunk.type == "content_delta":
254
- # Accumulate text content to current assistant message
255
- self._add_text_content_to_current_assistant_message(chunk.delta)
256
- if chunk.type == "function_call":
257
- # Add tool call to current assistant message
258
- # Keep arguments as string for compatibility with funcall library
259
- tool_call = AssistantToolCall(
260
- call_id=chunk.call_id,
261
- name=chunk.name,
262
- arguments=chunk.arguments or "{}",
263
- )
264
- self._add_to_current_assistant_message(tool_call)
265
- if chunk.type == "usage":
266
- # Update the last assistant message with usage data and output_time_ms
267
- usage_time = datetime.now(timezone.utc)
268
- for i in range(len(self.messages) - 1, -1, -1):
269
- current_message = self.messages[i]
270
- if isinstance(current_message, NewAssistantMessage):
271
- # Update usage information
272
- if current_message.meta.usage is None:
273
- current_message.meta.usage = MessageUsage()
274
- current_message.meta.usage.input_tokens = chunk.usage.input_tokens
275
- current_message.meta.usage.output_tokens = chunk.usage.output_tokens
276
- current_message.meta.usage.total_tokens = (chunk.usage.input_tokens or 0) + (chunk.usage.output_tokens or 0)
277
-
278
- # Calculate output_time_ms if latency_ms is available
279
- if current_message.meta.latency_ms is not None:
280
- # We need to calculate from first output to usage time
281
- # We'll calculate: usage_time - (sent_at - latency_ms)
282
- # This gives us the time from first output to usage completion
283
- # sent_at is when the message was completed, so sent_at - latency_ms approximates first output time
284
- first_output_time_approx = current_message.meta.sent_at - timedelta(milliseconds=current_message.meta.latency_ms)
285
- output_time_ms = int((usage_time - first_output_time_approx).total_seconds() * 1000)
286
- current_message.meta.total_time_ms = max(0, output_time_ms)
287
- break
246
+ match chunk.type:
247
+ case "assistant_message":
248
+ # Start or update assistant message in new format
249
+ meta = AssistantMessageMeta(
250
+ sent_at=chunk.message.meta.sent_at,
251
+ latency_ms=getattr(chunk.message.meta, "latency_ms", None),
252
+ total_time_ms=getattr(chunk.message.meta, "output_time_ms", None),
253
+ )
254
+ # If we already have a current assistant message, just update its metadata
255
+ if self._current_assistant_message is not None:
256
+ self._current_assistant_message.meta = meta
257
+ else:
258
+ # Extract text content from the new message format
259
+ text_content = ""
260
+ if chunk.message.content:
261
+ for item in chunk.message.content:
262
+ if hasattr(item, "type") and item.type == "text":
263
+ text_content = item.text
264
+ break
265
+ self._start_assistant_message(text_content, meta)
266
+ # Only yield assistant_message chunk if it's in includes and has content
267
+ if chunk.type in includes and self._current_assistant_message is not None:
268
+ # Create a new chunk with the current assistant message content
269
+ updated_chunk = AssistantMessageEvent(
270
+ message=self._current_assistant_message,
271
+ )
272
+ yield updated_chunk
273
+ case "content_delta":
274
+ # Accumulate text content to current assistant message
275
+ self._add_text_content_to_current_assistant_message(chunk.delta)
276
+ # Always yield content_delta chunk if it's in includes
277
+ if chunk.type in includes:
278
+ yield chunk
279
+ case "function_call":
280
+ # Add tool call to current assistant message
281
+ # Keep arguments as string for compatibility with funcall library
282
+ tool_call = AssistantToolCall(
283
+ call_id=chunk.call_id,
284
+ name=chunk.name,
285
+ arguments=chunk.arguments or "{}",
286
+ )
287
+ self._add_to_current_assistant_message(tool_call)
288
+ # Always yield function_call chunk if it's in includes
289
+ if chunk.type in includes:
290
+ yield chunk
291
+ case "usage":
292
+ # Update the last assistant message with usage data and output_time_ms
293
+ usage_time = datetime.now(timezone.utc)
294
+ for i in range(len(self.messages) - 1, -1, -1):
295
+ current_message = self.messages[i]
296
+ if isinstance(current_message, NewAssistantMessage):
297
+ # Update usage information
298
+ if current_message.meta.usage is None:
299
+ current_message.meta.usage = MessageUsage()
300
+ current_message.meta.usage.input_tokens = chunk.usage.input_tokens
301
+ current_message.meta.usage.output_tokens = chunk.usage.output_tokens
302
+ current_message.meta.usage.total_tokens = (chunk.usage.input_tokens or 0) + (chunk.usage.output_tokens or 0)
303
+
304
+ # Calculate output_time_ms if latency_ms is available
305
+ if current_message.meta.latency_ms is not None:
306
+ # We need to calculate from first output to usage time
307
+ # We'll calculate: usage_time - (sent_at - latency_ms)
308
+ # This gives us the time from first output to usage completion
309
+ # sent_at is when the message was completed, so sent_at - latency_ms approximates first output time
310
+ first_output_time_approx = current_message.meta.sent_at - timedelta(milliseconds=current_message.meta.latency_ms)
311
+ output_time_ms = int((usage_time - first_output_time_approx).total_seconds() * 1000)
312
+ current_message.meta.total_time_ms = max(0, output_time_ms)
313
+ break
314
+ # Always yield usage chunk if it's in includes
315
+ if chunk.type in includes:
316
+ yield chunk
317
+ case _ if chunk.type in includes:
318
+ yield chunk
288
319
 
289
320
  # Finalize assistant message so it can be found in pending function calls
290
321
  self._finalize_assistant_message()
@@ -357,11 +388,6 @@ class Runner:
357
388
  msg = "Cannot continue running without a valid last message from the assistant."
358
389
  raise ValueError(msg)
359
390
 
360
- last_message = self.messages[-1]
361
- if not (isinstance(last_message, NewAssistantMessage) or (hasattr(last_message, "role") and getattr(last_message, "role", None) == "assistant")):
362
- msg = "Cannot continue running without a valid last message from the assistant."
363
- raise ValueError(msg)
364
-
365
391
  resp = self._run(max_steps=max_steps, includes=includes, record_to=self._normalize_record_path(record_to), context=context)
366
392
  async for chunk in resp:
367
393
  yield chunk
@@ -377,58 +403,50 @@ class Runner:
377
403
  resp = self.run(user_input, max_steps, includes, record_to=record_to)
378
404
  return await self._collect_all_chunks(resp)
379
405
 
380
- def _find_pending_tool_calls(self) -> list[AssistantToolCall]:
381
- """Find tool calls that don't have corresponding results yet."""
382
- # Find pending calls directly in new format messages
383
- pending_calls: list[AssistantToolCall] = []
384
-
385
- # Look at the last assistant message for pending tool calls
386
- if not self.messages:
387
- return pending_calls
388
-
389
- last_message = self.messages[-1]
390
- if not isinstance(last_message, NewAssistantMessage):
391
- return pending_calls
406
+ def _analyze_last_assistant_message(self) -> tuple[list[AssistantToolCall], dict[str, str]]:
407
+ """Analyze the last assistant message and return pending tool calls and tool call map."""
408
+ if not self.messages or not isinstance(self.messages[-1], NewAssistantMessage):
409
+ return [], {}
392
410
 
393
- # Collect tool calls and results from the last assistant message
394
411
  tool_calls = {}
395
412
  tool_results = set()
413
+ tool_call_names = {}
396
414
 
397
- for content_item in last_message.content:
415
+ for content_item in self.messages[-1].content:
398
416
  if content_item.type == "tool_call":
399
417
  tool_calls[content_item.call_id] = content_item
418
+ tool_call_names[content_item.call_id] = content_item.name
400
419
  elif content_item.type == "tool_call_result":
401
420
  tool_results.add(content_item.call_id)
402
421
 
403
- # Return tool calls that don't have corresponding results
404
- return [call for call_id, call in tool_calls.items() if call_id not in tool_results]
422
+ # Return pending tool calls and tool call names map
423
+ pending_calls = [call for call_id, call in tool_calls.items() if call_id not in tool_results]
424
+ return pending_calls, tool_call_names
425
+
426
+ def _find_pending_tool_calls(self) -> list[AssistantToolCall]:
427
+ """Find tool calls that don't have corresponding results yet."""
428
+ pending_calls, _ = self._analyze_last_assistant_message()
429
+ return pending_calls
405
430
 
406
431
  def _get_tool_call_name_by_id(self, call_id: str) -> str | None:
407
432
  """Get the tool name for a given call_id from the last assistant message."""
408
- if not self.messages or not isinstance(self.messages[-1], NewAssistantMessage):
409
- return None
410
-
411
- for content_item in self.messages[-1].content:
412
- if content_item.type == "tool_call" and content_item.call_id == call_id:
413
- return content_item.name
414
- return None
433
+ _, tool_call_names = self._analyze_last_assistant_message()
434
+ return tool_call_names.get(call_id)
415
435
 
416
436
  def _convert_tool_calls_to_tool_calls(self, tool_calls: list[AssistantToolCall]) -> list[ToolCall]:
417
437
  """Convert AssistantToolCall objects to ToolCall objects for compatibility."""
418
-
419
- result_tool_calls = []
420
- for tc in tool_calls:
421
- tool_call = ToolCall(
438
+ return [
439
+ ToolCall(
422
440
  id=tc.call_id,
423
441
  type="function",
424
442
  function=ToolCallFunction(
425
443
  name=tc.name,
426
444
  arguments=tc.arguments if isinstance(tc.arguments, str) else str(tc.arguments),
427
445
  ),
428
- index=len(result_tool_calls),
446
+ index=i,
429
447
  )
430
- result_tool_calls.append(tool_call)
431
- return result_tool_calls
448
+ for i, tc in enumerate(tool_calls)
449
+ ]
432
450
 
433
451
  def set_chat_history(self, messages: Sequence[FlexibleRunnerMessage], root_agent: Agent | None = None) -> None:
434
452
  """Set the entire chat history and track the current agent based on function calls.
@@ -691,12 +709,11 @@ class Runner:
691
709
  msg = f"Unsupported message type: {type(message)}"
692
710
  raise TypeError(msg)
693
711
 
694
- async def _handle_agent_transfer(self, tool_call: ToolCall, _includes: Sequence[AgentChunkType]) -> None:
712
+ async def _handle_agent_transfer(self, tool_call: ToolCall) -> None:
695
713
  """Handle agent transfer when transfer_to_agent tool is called.
696
714
 
697
715
  Args:
698
716
  tool_call: The transfer_to_agent tool call
699
- _includes: The types of chunks to include in output (unused)
700
717
  """
701
718
 
702
719
  # Parse the arguments to get the target agent name
@@ -771,12 +788,11 @@ class Runner:
771
788
  output=f"Transfer failed: {e!s}",
772
789
  )
773
790
 
774
- async def _handle_parent_transfer(self, tool_call: ToolCall, _includes: Sequence[AgentChunkType]) -> None:
791
+ async def _handle_parent_transfer(self, tool_call: ToolCall) -> None:
775
792
  """Handle parent transfer when transfer_to_parent tool is called.
776
793
 
777
794
  Args:
778
795
  tool_call: The transfer_to_parent tool call
779
- _includes: The types of chunks to include in output (unused)
780
796
  """
781
797
 
782
798
  # Check if current agent has a parent
@@ -16,18 +16,27 @@ if TYPE_CHECKING:
16
16
  from aiofiles.threadpool.text import AsyncTextIOWrapper
17
17
 
18
18
 
19
- def ensure_record_file(record_to: Path | None) -> Path | None:
19
+ def ensure_record_file(record_to: Path | str | None) -> Path | None:
20
20
  if not record_to:
21
21
  return None
22
- if not record_to.parent.exists():
23
- logger.warning('Record directory "%s" does not exist, creating it.', record_to.parent)
24
- record_to.parent.mkdir(parents=True, exist_ok=True)
25
- return record_to
22
+
23
+ path = Path(record_to) if isinstance(record_to, str) else record_to
24
+
25
+ # If the path is a directory, generate a filename
26
+ if path.is_dir():
27
+ path = path / "conversation.jsonl"
28
+
29
+ # Ensure parent directory exists
30
+ if not path.parent.exists():
31
+ logger.warning('Record directory "%s" does not exist, creating it.', path.parent)
32
+ path.parent.mkdir(parents=True, exist_ok=True)
33
+
34
+ return path
26
35
 
27
36
 
28
37
  async def litellm_completion_stream_handler(
29
38
  resp: litellm.CustomStreamWrapper,
30
- record_to: Path | None = None,
39
+ record_to: Path | str | None = None,
31
40
  ) -> AsyncGenerator[AgentChunk, None]:
32
41
  """
33
42
  Optimized chunk handler
@@ -52,7 +61,7 @@ async def litellm_completion_stream_handler(
52
61
 
53
62
  async def litellm_response_stream_handler(
54
63
  resp: AsyncGenerator[ResponsesAPIStreamingResponse, None],
55
- record_to: Path | None = None,
64
+ record_to: Path | str | None = None,
56
65
  ) -> AsyncGenerator[AgentChunk, None]:
57
66
  """
58
67
  Response API stream handler for processing ResponsesAPIStreamingResponse chunks
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lite-agent
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: A lightweight, extensible framework for building AI agent.
5
5
  Author-email: Jianqi Pan <jannchie@gmail.com>
6
6
  License: MIT
@@ -1,16 +1,16 @@
1
1
  lite_agent/__init__.py,sha256=Swuefee0etSiaDnn30K2hBNV9UI3hIValW3A-pRE7e0,338
2
- lite_agent/agent.py,sha256=7wM8nVXIaPvEirRiY2HV0rY0sQuYCj-_hQ6V369bB58,22897
2
+ lite_agent/agent.py,sha256=M0U59KpMy6OGFje6yZuQCYVGr4oBboRwbtImPF59o2w,23314
3
3
  lite_agent/chat_display.py,sha256=b0sUH3fkutc4e_KAKH7AtPu2msyLloNIAiWqCNavdds,30533
4
- lite_agent/client.py,sha256=m2jfBPIsleMZ1QCczjyHND-PIF17kQh4RTuf5FaipGM,2571
4
+ lite_agent/client.py,sha256=HG-NbTIUSFAUAPjRow3TFYJxvTc6Y4bdT2oJWIJNEEk,5963
5
5
  lite_agent/loggers.py,sha256=XkNkdqwD_nQGfhQJ-bBWT7koci_mMkNw3aBpyMhOICw,57
6
6
  lite_agent/message_transfers.py,sha256=9qucjc-uSIXvVfhcmVRC_0lp0Q8sWp99dV4ReCh6ZlI,4428
7
7
  lite_agent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- lite_agent/runner.py,sha256=wi56i5cMYAJi3U9GBsBlzhGC6Do9ovMSOsA_Qqy2HtA,39642
8
+ lite_agent/runner.py,sha256=U7eVNAJ_VLwgbPPpn-vggSgvBmFl8wMMFWn3mWCsDow,40423
9
9
  lite_agent/processors/__init__.py,sha256=ybpAzpMBIE9v5I24wIBZRXeaOaPNTmoKH13aofgNI6Q,234
10
10
  lite_agent/processors/completion_event_processor.py,sha256=8fQYRofgBd8t0V3oUakTOmZdv5Q9tCuzADGCGvVgy0k,13442
11
11
  lite_agent/processors/response_event_processor.py,sha256=CElJMUzLs8mklVqJtoLiVu-NTq0Dz2NNd9YdAKpjgE0,8088
12
12
  lite_agent/stream_handlers/__init__.py,sha256=a5s1GZr42uvndtcQqEhK2cnjGkK8ZFTAZCj3J61Bb5E,209
13
- lite_agent/stream_handlers/litellm.py,sha256=lE2whfFG-txhjeIp58yZ4nqApXjVeSACUIk-3KYcnVg,2692
13
+ lite_agent/stream_handlers/litellm.py,sha256=3D0u7R2ADA8kDwpFImZlw20o-CsmFXVLvq4nvwwD0Rk,2922
14
14
  lite_agent/templates/handoffs_source_instructions.xml.j2,sha256=2XsXQlBzk38qbxGrfyt8y2b0KlZmsV_1xavLufcdkHc,428
15
15
  lite_agent/templates/handoffs_target_instructions.xml.j2,sha256=gSbWVYYcovPKbGpFc0kqGSJ5Y5UC3fOHyUmZfcrDgSE,356
16
16
  lite_agent/templates/wait_for_user_instructions.xml.j2,sha256=wXbcYD5Q1FaCGVBm3Hz_Cp7nnoK7KzloP0ao-jYMwPk,231
@@ -18,6 +18,6 @@ lite_agent/types/__init__.py,sha256=QKuhjFWRcpAlsBK9JYgoCABpoQExwhuyGudJoiiqQfs,
18
18
  lite_agent/types/events.py,sha256=mFMqV55WWJbPDyb_P61nd3qMLpEnwZgVY6NTKFkINkg,2389
19
19
  lite_agent/types/messages.py,sha256=c7nTIWqXNo562het_vaWcZvsoy-adkARwAYn4JNqm0c,9897
20
20
  lite_agent/types/tool_calls.py,sha256=Xnut8-2-Ld9vgA2GKJY6BbFlBaAv_n4W7vo7Jx21A-E,260
21
- lite_agent-0.4.0.dist-info/METADATA,sha256=biQoUoss9DcSuUX494mPj2yoXtE0H81Far_K3IpFB30,3456
22
- lite_agent-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
23
- lite_agent-0.4.0.dist-info/RECORD,,
21
+ lite_agent-0.5.0.dist-info/METADATA,sha256=20K2Xirnyawl1uN_I8TLcuGlgRjNhs04hz2BtDDRnbM,3456
22
+ lite_agent-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
23
+ lite_agent-0.5.0.dist-info/RECORD,,