PyPI - lite-agent - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

lite-agent 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lite-agent might be problematic. Click here for more details.

Files changed (7) hide show

lite_agent/agent.py +27 -7
lite_agent/client.py +135 -26
lite_agent/runner.py +153 -137
lite_agent/stream_handlers/litellm.py +16 -7
{lite_agent-0.4.0.dist-info → lite_agent-0.5.0.dist-info}/METADATA +1 -1
{lite_agent-0.4.0.dist-info → lite_agent-0.5.0.dist-info}/RECORD +7 -7
{lite_agent-0.4.0.dist-info → lite_agent-0.5.0.dist-info}/WHEEL +0 -0

lite_agent/agent.py CHANGED Viewed

@@ -7,7 +7,7 @@ from funcall import Funcall
 from jinja2 import Environment, FileSystemLoader
 from litellm import CustomStreamWrapper
-from lite_agent.client import BaseLLMClient, LiteLLMClient
+from lite_agent.client import BaseLLMClient, LiteLLMClient, ReasoningConfig
 from lite_agent.loggers import logger
 from lite_agent.stream_handlers import litellm_completion_stream_handler, litellm_response_stream_handler
 from lite_agent.types import AgentChunk, FunctionCallEvent, FunctionCallOutputEvent, RunnerMessages, ToolCall, message_to_llm_dict, system_message_to_llm_dict
@@ -32,15 +32,21 @@ class Agent:
         handoffs: list["Agent"] | None = None,
         message_transfer: Callable[[RunnerMessages], RunnerMessages] | None = None,
         completion_condition: str = "stop",
+        reasoning: ReasoningConfig = None,
     ) -> None:
         self.name = name
         self.instructions = instructions
+        self.reasoning = reasoning
         if isinstance(model, BaseLLMClient):
             # If model is a BaseLLMClient instance, use it directly
             self.client = model
         else:
             # Otherwise, create a LitellmClient instance
-            self.client = LiteLLMClient(model=model)
+            self.client = LiteLLMClient(
+                model=model,
+                reasoning=reasoning,
+            )
         self.completion_condition = completion_condition
         self.handoffs = handoffs if handoffs else []
         self._parent: Agent | None = None
@@ -174,9 +180,11 @@ class Agent:
         if self.completion_condition == "call":
             instructions = WAIT_FOR_USER_INSTRUCTIONS_TEMPLATE.render(extra_instructions=None) + "\n\n" + instructions
         return [
-            system_message_to_llm_dict(NewSystemMessage(
-                content=f"You are {self.name}. {instructions}",
-            )),
+            system_message_to_llm_dict(
+                NewSystemMessage(
+                    content=f"You are {self.name}. {instructions}",
+                ),
+            ),
             *converted_messages,
         ]
@@ -267,7 +275,12 @@ class Agent:
                 res.append(message)
         return res
-    async def completion(self, messages: RunnerMessages, record_to_file: Path | None = None) -> AsyncGenerator[AgentChunk, None]:
+    async def completion(
+        self,
+        messages: RunnerMessages,
+        record_to_file: Path | None = None,
+        reasoning: ReasoningConfig = None,
+    ) -> AsyncGenerator[AgentChunk, None]:
         # Apply message transfer callback if provided - always use legacy format for LLM compatibility
         processed_messages = messages
         if self.message_transfer:
@@ -282,6 +295,7 @@ class Agent:
             messages=self.message_histories,
             tools=tools,
             tool_choice="auto",  # TODO: make this configurable
+            reasoning=reasoning,
         )
         # Ensure resp is a CustomStreamWrapper
@@ -290,7 +304,12 @@ class Agent:
         msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
         raise TypeError(msg)
-    async def responses(self, messages: RunnerMessages, record_to_file: Path | None = None) -> AsyncGenerator[AgentChunk, None]:
+    async def responses(
+        self,
+        messages: RunnerMessages,
+        record_to_file: Path | None = None,
+        reasoning: ReasoningConfig = None,
+    ) -> AsyncGenerator[AgentChunk, None]:
         # Apply message transfer callback if provided - always use legacy format for LLM compatibility
         processed_messages = messages
         if self.message_transfer:
@@ -304,6 +323,7 @@ class Agent:
             messages=self.message_histories,
             tools=tools,
             tool_choice="auto",  # TODO: make this configurable
+            reasoning=reasoning,
         )
         return litellm_response_stream_handler(resp, record_to=record_to_file)

lite_agent/client.py CHANGED Viewed

@@ -1,25 +1,81 @@
 import abc
 import os
-from collections.abc import AsyncGenerator
 from typing import Any, Literal
 import litellm
-from litellm.types.llms.openai import ResponsesAPIStreamingResponse
 from openai.types.chat import ChatCompletionToolParam
 from openai.types.responses import FunctionToolParam
+ReasoningEffort = Literal["minimal", "low", "medium", "high"]
+ThinkingConfig = dict[str, Any] | None
+# 统一的推理配置类型
+ReasoningConfig = (
+    str
+    | dict[str, Any]  # {"type": "enabled", "budget_tokens": 2048} 或其他配置
+    | bool  # True/False 简单开关
+    | None  # 不启用推理
+)
+def parse_reasoning_config(reasoning: ReasoningConfig) -> tuple[ReasoningEffort | None, ThinkingConfig]:
+    """
+    解析统一的推理配置，返回 reasoning_effort 和 thinking_config。
+    Args:
+        reasoning: 统一的推理配置
+            - str: "minimal", "low", "medium", "high" -> reasoning_effort
+            - dict: {"type": "enabled", "budget_tokens": N} -> thinking_config
+            - bool: True -> "medium", False -> None
+            - None: 不启用推理
+    Returns:
+        tuple: (reasoning_effort, thinking_config)
+    """
+    if reasoning is None:
+        return None, None
+    if isinstance(reasoning, str):
+        # 字符串类型，使用 reasoning_effort
+        return reasoning, None
+    if isinstance(reasoning, dict):
+        # 字典类型，使用 thinking_config
+        return None, reasoning
+    if isinstance(reasoning, bool):
+        # 布尔类型，True 使用默认的 medium，False 不启用
+        return "medium" if reasoning else None, None
+    # 其他类型，默认不启用
+    return None, None
 class BaseLLMClient(abc.ABC):
     """Base class for LLM clients."""
-    def __init__(self, *, model: str, api_key: str | None = None, api_base: str | None = None, api_version: str | None = None):
+    def __init__(
+        self,
+        *,
+        model: str,
+        api_key: str | None = None,
+        api_base: str | None = None,
+        api_version: str | None = None,
+        reasoning: ReasoningConfig = None,
+    ):
         self.model = model
         self.api_key = api_key
         self.api_base = api_base
         self.api_version = api_version
+        # 处理推理配置
+        self.reasoning_effort, self.thinking_config = parse_reasoning_config(reasoning)
     @abc.abstractmethod
-    async def completion(self, messages: list[Any], tools: list[ChatCompletionToolParam] | None = None, tool_choice: str = "auto") -> Any:  # noqa: ANN401
+    async def completion(
+        self,
+        messages: list[Any],
+        tools: list[ChatCompletionToolParam] | None = None,
+        tool_choice: str = "auto",
+        reasoning: ReasoningConfig = None,
+        **kwargs: Any,  # noqa: ANN401
+    ) -> Any:  # noqa: ANN401
         """Perform a completion request to the LLM."""
     @abc.abstractmethod
@@ -28,42 +84,95 @@ class BaseLLMClient(abc.ABC):
         messages: list[dict[str, Any]],  # Changed from ResponseInputParam
         tools: list[FunctionToolParam] | None = None,
         tool_choice: Literal["none", "auto", "required"] = "auto",
-    ) -> AsyncGenerator[ResponsesAPIStreamingResponse, None]:
+        reasoning: ReasoningConfig = None,
+        **kwargs: Any,  # noqa: ANN401
+    ) -> Any:  # noqa: ANN401
         """Perform a response request to the LLM."""
 class LiteLLMClient(BaseLLMClient):
-    async def completion(self, messages: list[Any], tools: list[ChatCompletionToolParam] | None = None, tool_choice: str = "auto") -> Any:  # noqa: ANN401
+    def _resolve_reasoning_params(
+        self,
+        reasoning: ReasoningConfig,
+    ) -> tuple[ReasoningEffort | None, ThinkingConfig]:
+        """解析推理配置参数。"""
+        if reasoning is not None:
+            return parse_reasoning_config(reasoning)
+        # 使用实例默认值
+        return self.reasoning_effort, self.thinking_config
+    async def completion(
+        self,
+        messages: list[Any],
+        tools: list[ChatCompletionToolParam] | None = None,
+        tool_choice: str = "auto",
+        reasoning: ReasoningConfig = None,
+        **kwargs: Any,  # noqa: ANN401
+    ) -> Any:  # noqa: ANN401
         """Perform a completion request to the Litellm API."""
-        return await litellm.acompletion(
-            model=self.model,
-            messages=messages,
-            tools=tools,
-            tool_choice=tool_choice,
-            api_version=self.api_version,
-            api_key=self.api_key,
-            api_base=self.api_base,
-            stream=True,
+        # 处理推理配置参数
+        final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
+            reasoning,
         )
+        # Prepare completion parameters
+        completion_params = {
+            "model": self.model,
+            "messages": messages,
+            "tools": tools,
+            "tool_choice": tool_choice,
+            "api_version": self.api_version,
+            "api_key": self.api_key,
+            "api_base": self.api_base,
+            "stream": True,
+            **kwargs,
+        }
+        # Add reasoning parameters if specified
+        if final_reasoning_effort is not None:
+            completion_params["reasoning_effort"] = final_reasoning_effort
+        if final_thinking_config is not None:
+            completion_params["thinking"] = final_thinking_config
+        return await litellm.acompletion(**completion_params)
     async def responses(
         self,
         messages: list[dict[str, Any]],  # Changed from ResponseInputParam
         tools: list[FunctionToolParam] | None = None,
         tool_choice: Literal["none", "auto", "required"] = "auto",
-    ) -> AsyncGenerator[ResponsesAPIStreamingResponse, None]:
+        reasoning: ReasoningConfig = None,
+        **kwargs: Any,  # noqa: ANN401
+    ) -> Any:  # type: ignore[return]  # noqa: ANN401
         """Perform a response request to the Litellm API."""
         os.environ["DISABLE_AIOHTTP_TRANSPORT"] = "True"
-        return await litellm.aresponses(
-            model=self.model,
-            input=messages,  # type: ignore[arg-type]
-            tools=tools,
-            tool_choice=tool_choice,
-            api_version=self.api_version,
-            api_key=self.api_key,
-            api_base=self.api_base,
-            stream=True,
-            store=False,
+        # 处理推理配置参数
+        final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
+            reasoning,
         )
+        # Prepare response parameters
+        response_params = {
+            "model": self.model,
+            "input": messages,  # type: ignore[arg-type]
+            "tools": tools,
+            "tool_choice": tool_choice,
+            "api_version": self.api_version,
+            "api_key": self.api_key,
+            "api_base": self.api_base,
+            "stream": True,
+            "store": False,
+            **kwargs,
+        }
+        # Add reasoning parameters if specified
+        if final_reasoning_effort is not None:
+            response_params["reasoning_effort"] = final_reasoning_effort
+        if final_thinking_config is not None:
+            response_params["thinking"] = final_thinking_config
+        return await litellm.aresponses(**response_params)  # type: ignore[return-value]

lite_agent/runner.py CHANGED Viewed

@@ -30,6 +30,7 @@ from lite_agent.types import (
     UserMessageContent,
     UserTextContent,
 )
+from lite_agent.types.events import AssistantMessageEvent
 DEFAULT_INCLUDES: tuple[AgentChunkType, ...] = (
     "completion_raw",
@@ -56,38 +57,31 @@ class Runner:
     def _start_assistant_message(self, content: str = "", meta: AssistantMessageMeta | None = None) -> None:
         """Start a new assistant message."""
-        if meta is None:
-            meta = AssistantMessageMeta()
-        # Always add text content, even if empty (we can update it later)
-        assistant_content_items: list[AssistantMessageContent] = [AssistantTextContent(text=content)]
         self._current_assistant_message = NewAssistantMessage(
-            content=assistant_content_items,
-            meta=meta,
+            content=[AssistantTextContent(text=content)],
+            meta=meta or AssistantMessageMeta(),
         )
-    def _add_to_current_assistant_message(self, content_item: AssistantTextContent | AssistantToolCall | AssistantToolCallResult) -> None:
-        """Add content to the current assistant message."""
+    def _ensure_current_assistant_message(self) -> NewAssistantMessage:
+        """Ensure current assistant message exists and return it."""
         if self._current_assistant_message is None:
             self._start_assistant_message()
+        return self._current_assistant_message  # type: ignore[return-value]
-        if self._current_assistant_message is not None:
-            self._current_assistant_message.content.append(content_item)
+    def _add_to_current_assistant_message(self, content_item: AssistantTextContent | AssistantToolCall | AssistantToolCallResult) -> None:
+        """Add content to the current assistant message."""
+        self._ensure_current_assistant_message().content.append(content_item)
     def _add_text_content_to_current_assistant_message(self, delta: str) -> None:
         """Add text delta to the current assistant message's text content."""
-        if self._current_assistant_message is None:
-            self._start_assistant_message()
-        if self._current_assistant_message is not None:
-            # Find the first text content item and append the delta
-            for content_item in self._current_assistant_message.content:
-                if content_item.type == "text":
-                    content_item.text += delta
-                    return
-            # If no text content found, add new text content
-            new_content = AssistantTextContent(text=delta)
-            self._current_assistant_message.content.append(new_content)
+        message = self._ensure_current_assistant_message()
+        # Find the first text content item and append the delta
+        for content_item in message.content:
+            if content_item.type == "text":
+                content_item.text += delta
+                return
+        # If no text content found, add new text content
+        message.content.append(AssistantTextContent(text=delta))
     def _finalize_assistant_message(self) -> None:
         """Finalize the current assistant message and add it to messages."""
@@ -131,7 +125,7 @@ class Runner:
             for i, tool_call in enumerate(transfer_calls):
                 if i == 0:
                     # Execute the first transfer
-                    await self._handle_agent_transfer(tool_call, includes)
+                    await self._handle_agent_transfer(tool_call)
                 else:
                     # Add response for additional transfer calls without executing them
                     self._add_tool_call_result(
@@ -146,7 +140,7 @@ class Runner:
             for i, tool_call in enumerate(return_parent_calls):
                 if i == 0:
                     # Execute the first transfer
-                    await self._handle_parent_transfer(tool_call, includes)
+                    await self._handle_parent_transfer(tool_call)
                 else:
                     # Add response for additional transfer calls without executing them
                     self._add_tool_call_result(
@@ -174,30 +168,37 @@ class Runner:
         """Collect all chunks from an async generator into a list."""
         return [chunk async for chunk in stream]
-    def run(
+    def run(  # noqa: PLR0913
         self,
         user_input: UserInput,
         max_steps: int = 20,
         includes: Sequence[AgentChunkType] | None = None,
         context: "Any | None" = None,  # noqa: ANN401
         record_to: PathLike | str | None = None,
+        agent_kwargs: dict[str, Any] | None = None,
     ) -> AsyncGenerator[AgentChunk, None]:
         """Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
         includes = self._normalize_includes(includes)
-        if isinstance(user_input, str):
-            user_message = NewUserMessage(content=[UserTextContent(text=user_input)])
-            self.messages.append(user_message)
-        elif isinstance(user_input, (list, tuple)):
-            # Handle sequence of messages
-            for message in user_input:
-                self.append_message(message)
-        else:
-            # Handle single message (BaseModel, TypedDict, or dict)
-            # Type assertion needed due to the complex union type
-            self.append_message(user_input)  # type: ignore[arg-type]
-        return self._run(max_steps, includes, self._normalize_record_path(record_to), context=context)
-    async def _run(self, max_steps: int, includes: Sequence[AgentChunkType], record_to: Path | None = None, context: Any | None = None) -> AsyncGenerator[AgentChunk, None]:  # noqa: ANN401
+        match user_input:
+            case str():
+                self.messages.append(NewUserMessage(content=[UserTextContent(text=user_input)]))
+            case list() | tuple():
+                # Handle sequence of messages
+                for message in user_input:
+                    self.append_message(message)
+            case _:
+                # Handle single message (BaseModel, TypedDict, or dict)
+                self.append_message(user_input)  # type: ignore[arg-type]
+        return self._run(max_steps, includes, self._normalize_record_path(record_to), context=context, agent_kwargs=agent_kwargs)
+    async def _run(
+        self,
+        max_steps: int,
+        includes: Sequence[AgentChunkType],
+        record_to: Path | None = None,
+        context: Any | None = None,  # noqa: ANN401
+        agent_kwargs: dict[str, Any] | None = None,
+    ) -> AsyncGenerator[AgentChunk, None]:
         """Run the agent and return a RunResponse object that can be asynchronously iterated for each chunk."""
         logger.debug(f"Running agent with messages: {self.messages}")
         steps = 0
@@ -220,71 +221,101 @@ class Runner:
             logger.debug(f"Step {steps}: finish_reason={finish_reason}, is_finish()={is_finish()}")
             # Convert to legacy format only when needed for LLM communication
             # This allows us to keep the new format internally but ensures compatibility
+            # Extract agent kwargs for reasoning configuration
+            reasoning = None
+            if agent_kwargs:
+                reasoning = agent_kwargs.get("reasoning")
             match self.api:
                 case "completion":
-                    resp = await self.agent.completion(self.messages, record_to_file=record_to)
+                    resp = await self.agent.completion(
+                        self.messages,
+                        record_to_file=record_to,
+                        reasoning=reasoning,
+                    )
                 case "responses":
-                    resp = await self.agent.responses(self.messages, record_to_file=record_to)
+                    resp = await self.agent.responses(
+                        self.messages,
+                        record_to_file=record_to,
+                        reasoning=reasoning,
+                    )
                 case _:
                     msg = f"Unknown API type: {self.api}"
                     raise ValueError(msg)
             async for chunk in resp:
-                if chunk.type in includes:
-                    yield chunk
-                if chunk.type == "assistant_message":
-                    # Start or update assistant message in new format
-                    meta = AssistantMessageMeta(
-                        sent_at=chunk.message.meta.sent_at,
-                        latency_ms=getattr(chunk.message.meta, "latency_ms", None),
-                        total_time_ms=getattr(chunk.message.meta, "output_time_ms", None),
-                    )
-                    # If we already have a current assistant message, just update its metadata
-                    if self._current_assistant_message is not None:
-                        self._current_assistant_message.meta = meta
-                    else:
-                        # Extract text content from the new message format
-                        text_content = ""
-                        if chunk.message.content:
-                            for item in chunk.message.content:
-                                if hasattr(item, "type") and item.type == "text":
-                                    text_content = item.text
-                                    break
-                        self._start_assistant_message(text_content, meta)
-                if chunk.type == "content_delta":
-                    # Accumulate text content to current assistant message
-                    self._add_text_content_to_current_assistant_message(chunk.delta)
-                if chunk.type == "function_call":
-                    # Add tool call to current assistant message
-                    # Keep arguments as string for compatibility with funcall library
-                    tool_call = AssistantToolCall(
-                        call_id=chunk.call_id,
-                        name=chunk.name,
-                        arguments=chunk.arguments or "{}",
-                    )
-                    self._add_to_current_assistant_message(tool_call)
-                if chunk.type == "usage":
-                    # Update the last assistant message with usage data and output_time_ms
-                    usage_time = datetime.now(timezone.utc)
-                    for i in range(len(self.messages) - 1, -1, -1):
-                        current_message = self.messages[i]
-                        if isinstance(current_message, NewAssistantMessage):
-                            # Update usage information
-                            if current_message.meta.usage is None:
-                                current_message.meta.usage = MessageUsage()
-                            current_message.meta.usage.input_tokens = chunk.usage.input_tokens
-                            current_message.meta.usage.output_tokens = chunk.usage.output_tokens
-                            current_message.meta.usage.total_tokens = (chunk.usage.input_tokens or 0) + (chunk.usage.output_tokens or 0)
-                            # Calculate output_time_ms if latency_ms is available
-                            if current_message.meta.latency_ms is not None:
-                                # We need to calculate from first output to usage time
-                                # We'll calculate: usage_time - (sent_at - latency_ms)
-                                # This gives us the time from first output to usage completion
-                                # sent_at is when the message was completed, so sent_at - latency_ms approximates first output time
-                                first_output_time_approx = current_message.meta.sent_at - timedelta(milliseconds=current_message.meta.latency_ms)
-                                output_time_ms = int((usage_time - first_output_time_approx).total_seconds() * 1000)
-                                current_message.meta.total_time_ms = max(0, output_time_ms)
-                            break
+                match chunk.type:
+                    case "assistant_message":
+                        # Start or update assistant message in new format
+                        meta = AssistantMessageMeta(
+                            sent_at=chunk.message.meta.sent_at,
+                            latency_ms=getattr(chunk.message.meta, "latency_ms", None),
+                            total_time_ms=getattr(chunk.message.meta, "output_time_ms", None),
+                        )
+                        # If we already have a current assistant message, just update its metadata
+                        if self._current_assistant_message is not None:
+                            self._current_assistant_message.meta = meta
+                        else:
+                            # Extract text content from the new message format
+                            text_content = ""
+                            if chunk.message.content:
+                                for item in chunk.message.content:
+                                    if hasattr(item, "type") and item.type == "text":
+                                        text_content = item.text
+                                        break
+                            self._start_assistant_message(text_content, meta)
+                        # Only yield assistant_message chunk if it's in includes and has content
+                        if chunk.type in includes and self._current_assistant_message is not None:
+                            # Create a new chunk with the current assistant message content
+                            updated_chunk = AssistantMessageEvent(
+                                message=self._current_assistant_message,
+                            )
+                            yield updated_chunk
+                    case "content_delta":
+                        # Accumulate text content to current assistant message
+                        self._add_text_content_to_current_assistant_message(chunk.delta)
+                        # Always yield content_delta chunk if it's in includes
+                        if chunk.type in includes:
+                            yield chunk
+                    case "function_call":
+                        # Add tool call to current assistant message
+                        # Keep arguments as string for compatibility with funcall library
+                        tool_call = AssistantToolCall(
+                            call_id=chunk.call_id,
+                            name=chunk.name,
+                            arguments=chunk.arguments or "{}",
+                        )
+                        self._add_to_current_assistant_message(tool_call)
+                        # Always yield function_call chunk if it's in includes
+                        if chunk.type in includes:
+                            yield chunk
+                    case "usage":
+                        # Update the last assistant message with usage data and output_time_ms
+                        usage_time = datetime.now(timezone.utc)
+                        for i in range(len(self.messages) - 1, -1, -1):
+                            current_message = self.messages[i]
+                            if isinstance(current_message, NewAssistantMessage):
+                                # Update usage information
+                                if current_message.meta.usage is None:
+                                    current_message.meta.usage = MessageUsage()
+                                current_message.meta.usage.input_tokens = chunk.usage.input_tokens
+                                current_message.meta.usage.output_tokens = chunk.usage.output_tokens
+                                current_message.meta.usage.total_tokens = (chunk.usage.input_tokens or 0) + (chunk.usage.output_tokens or 0)
+                                # Calculate output_time_ms if latency_ms is available
+                                if current_message.meta.latency_ms is not None:
+                                    # We need to calculate from first output to usage time
+                                    # We'll calculate: usage_time - (sent_at - latency_ms)
+                                    # This gives us the time from first output to usage completion
+                                    # sent_at is when the message was completed, so sent_at - latency_ms approximates first output time
+                                    first_output_time_approx = current_message.meta.sent_at - timedelta(milliseconds=current_message.meta.latency_ms)
+                                    output_time_ms = int((usage_time - first_output_time_approx).total_seconds() * 1000)
+                                    current_message.meta.total_time_ms = max(0, output_time_ms)
+                                break
+                        # Always yield usage chunk if it's in includes
+                        if chunk.type in includes:
+                            yield chunk
+                    case _ if chunk.type in includes:
+                        yield chunk
             # Finalize assistant message so it can be found in pending function calls
             self._finalize_assistant_message()
@@ -357,11 +388,6 @@ class Runner:
                 msg = "Cannot continue running without a valid last message from the assistant."
                 raise ValueError(msg)
-            last_message = self.messages[-1]
-            if not (isinstance(last_message, NewAssistantMessage) or (hasattr(last_message, "role") and getattr(last_message, "role", None) == "assistant")):
-                msg = "Cannot continue running without a valid last message from the assistant."
-                raise ValueError(msg)
             resp = self._run(max_steps=max_steps, includes=includes, record_to=self._normalize_record_path(record_to), context=context)
             async for chunk in resp:
                 yield chunk
@@ -377,58 +403,50 @@ class Runner:
         resp = self.run(user_input, max_steps, includes, record_to=record_to)
         return await self._collect_all_chunks(resp)
-    def _find_pending_tool_calls(self) -> list[AssistantToolCall]:
-        """Find tool calls that don't have corresponding results yet."""
-        # Find pending calls directly in new format messages
-        pending_calls: list[AssistantToolCall] = []
-        # Look at the last assistant message for pending tool calls
-        if not self.messages:
-            return pending_calls
-        last_message = self.messages[-1]
-        if not isinstance(last_message, NewAssistantMessage):
-            return pending_calls
+    def _analyze_last_assistant_message(self) -> tuple[list[AssistantToolCall], dict[str, str]]:
+        """Analyze the last assistant message and return pending tool calls and tool call map."""
+        if not self.messages or not isinstance(self.messages[-1], NewAssistantMessage):
+            return [], {}
-        # Collect tool calls and results from the last assistant message
         tool_calls = {}
         tool_results = set()
+        tool_call_names = {}
-        for content_item in last_message.content:
+        for content_item in self.messages[-1].content:
             if content_item.type == "tool_call":
                 tool_calls[content_item.call_id] = content_item
+                tool_call_names[content_item.call_id] = content_item.name
             elif content_item.type == "tool_call_result":
                 tool_results.add(content_item.call_id)
-        # Return tool calls that don't have corresponding results
-        return [call for call_id, call in tool_calls.items() if call_id not in tool_results]
+        # Return pending tool calls and tool call names map
+        pending_calls = [call for call_id, call in tool_calls.items() if call_id not in tool_results]
+        return pending_calls, tool_call_names
+    def _find_pending_tool_calls(self) -> list[AssistantToolCall]:
+        """Find tool calls that don't have corresponding results yet."""
+        pending_calls, _ = self._analyze_last_assistant_message()
+        return pending_calls
     def _get_tool_call_name_by_id(self, call_id: str) -> str | None:
         """Get the tool name for a given call_id from the last assistant message."""
-        if not self.messages or not isinstance(self.messages[-1], NewAssistantMessage):
-            return None
-        for content_item in self.messages[-1].content:
-            if content_item.type == "tool_call" and content_item.call_id == call_id:
-                return content_item.name
-        return None
+        _, tool_call_names = self._analyze_last_assistant_message()
+        return tool_call_names.get(call_id)
     def _convert_tool_calls_to_tool_calls(self, tool_calls: list[AssistantToolCall]) -> list[ToolCall]:
         """Convert AssistantToolCall objects to ToolCall objects for compatibility."""
-        result_tool_calls = []
-        for tc in tool_calls:
-            tool_call = ToolCall(
+        return [
+            ToolCall(
                 id=tc.call_id,
                 type="function",
                 function=ToolCallFunction(
                     name=tc.name,
                     arguments=tc.arguments if isinstance(tc.arguments, str) else str(tc.arguments),
                 ),
-                index=len(result_tool_calls),
+                index=i,
             )
-            result_tool_calls.append(tool_call)
-        return result_tool_calls
+            for i, tc in enumerate(tool_calls)
+        ]
     def set_chat_history(self, messages: Sequence[FlexibleRunnerMessage], root_agent: Agent | None = None) -> None:
         """Set the entire chat history and track the current agent based on function calls.
@@ -691,12 +709,11 @@ class Runner:
             msg = f"Unsupported message type: {type(message)}"
             raise TypeError(msg)
-    async def _handle_agent_transfer(self, tool_call: ToolCall, _includes: Sequence[AgentChunkType]) -> None:
+    async def _handle_agent_transfer(self, tool_call: ToolCall) -> None:
         """Handle agent transfer when transfer_to_agent tool is called.
         Args:
             tool_call: The transfer_to_agent tool call
-            _includes: The types of chunks to include in output (unused)
         """
         # Parse the arguments to get the target agent name
@@ -771,12 +788,11 @@ class Runner:
                 output=f"Transfer failed: {e!s}",
             )
-    async def _handle_parent_transfer(self, tool_call: ToolCall, _includes: Sequence[AgentChunkType]) -> None:
+    async def _handle_parent_transfer(self, tool_call: ToolCall) -> None:
         """Handle parent transfer when transfer_to_parent tool is called.
         Args:
             tool_call: The transfer_to_parent tool call
-            _includes: The types of chunks to include in output (unused)
         """
         # Check if current agent has a parent

lite_agent/stream_handlers/litellm.py CHANGED Viewed

@@ -16,18 +16,27 @@ if TYPE_CHECKING:
     from aiofiles.threadpool.text import AsyncTextIOWrapper
-def ensure_record_file(record_to: Path | None) -> Path | None:
+def ensure_record_file(record_to: Path | str | None) -> Path | None:
     if not record_to:
         return None
-    if not record_to.parent.exists():
-        logger.warning('Record directory "%s" does not exist, creating it.', record_to.parent)
-        record_to.parent.mkdir(parents=True, exist_ok=True)
-    return record_to
+    path = Path(record_to) if isinstance(record_to, str) else record_to
+    # If the path is a directory, generate a filename
+    if path.is_dir():
+        path = path / "conversation.jsonl"
+    # Ensure parent directory exists
+    if not path.parent.exists():
+        logger.warning('Record directory "%s" does not exist, creating it.', path.parent)
+        path.parent.mkdir(parents=True, exist_ok=True)
+    return path
 async def litellm_completion_stream_handler(
     resp: litellm.CustomStreamWrapper,
-    record_to: Path | None = None,
+    record_to: Path | str | None = None,
 ) -> AsyncGenerator[AgentChunk, None]:
     """
     Optimized chunk handler
@@ -52,7 +61,7 @@ async def litellm_completion_stream_handler(
 async def litellm_response_stream_handler(
     resp: AsyncGenerator[ResponsesAPIStreamingResponse, None],
-    record_to: Path | None = None,
+    record_to: Path | str | None = None,
 ) -> AsyncGenerator[AgentChunk, None]:
     """
     Response API stream handler for processing ResponsesAPIStreamingResponse chunks

{lite_agent-0.4.0.dist-info → lite_agent-0.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lite-agent
-Version: 0.4.0
+Version: 0.5.0
 Summary: A lightweight, extensible framework for building AI agent.
 Author-email: Jianqi Pan <jannchie@gmail.com>
 License: MIT

{lite_agent-0.4.0.dist-info → lite_agent-0.5.0.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,16 @@
 lite_agent/__init__.py,sha256=Swuefee0etSiaDnn30K2hBNV9UI3hIValW3A-pRE7e0,338
-lite_agent/agent.py,sha256=7wM8nVXIaPvEirRiY2HV0rY0sQuYCj-_hQ6V369bB58,22897
+lite_agent/agent.py,sha256=M0U59KpMy6OGFje6yZuQCYVGr4oBboRwbtImPF59o2w,23314
 lite_agent/chat_display.py,sha256=b0sUH3fkutc4e_KAKH7AtPu2msyLloNIAiWqCNavdds,30533
-lite_agent/client.py,sha256=m2jfBPIsleMZ1QCczjyHND-PIF17kQh4RTuf5FaipGM,2571
+lite_agent/client.py,sha256=HG-NbTIUSFAUAPjRow3TFYJxvTc6Y4bdT2oJWIJNEEk,5963
 lite_agent/loggers.py,sha256=XkNkdqwD_nQGfhQJ-bBWT7koci_mMkNw3aBpyMhOICw,57
 lite_agent/message_transfers.py,sha256=9qucjc-uSIXvVfhcmVRC_0lp0Q8sWp99dV4ReCh6ZlI,4428
 lite_agent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lite_agent/runner.py,sha256=wi56i5cMYAJi3U9GBsBlzhGC6Do9ovMSOsA_Qqy2HtA,39642
+lite_agent/runner.py,sha256=U7eVNAJ_VLwgbPPpn-vggSgvBmFl8wMMFWn3mWCsDow,40423
 lite_agent/processors/__init__.py,sha256=ybpAzpMBIE9v5I24wIBZRXeaOaPNTmoKH13aofgNI6Q,234
 lite_agent/processors/completion_event_processor.py,sha256=8fQYRofgBd8t0V3oUakTOmZdv5Q9tCuzADGCGvVgy0k,13442
 lite_agent/processors/response_event_processor.py,sha256=CElJMUzLs8mklVqJtoLiVu-NTq0Dz2NNd9YdAKpjgE0,8088
 lite_agent/stream_handlers/__init__.py,sha256=a5s1GZr42uvndtcQqEhK2cnjGkK8ZFTAZCj3J61Bb5E,209
-lite_agent/stream_handlers/litellm.py,sha256=lE2whfFG-txhjeIp58yZ4nqApXjVeSACUIk-3KYcnVg,2692
+lite_agent/stream_handlers/litellm.py,sha256=3D0u7R2ADA8kDwpFImZlw20o-CsmFXVLvq4nvwwD0Rk,2922
 lite_agent/templates/handoffs_source_instructions.xml.j2,sha256=2XsXQlBzk38qbxGrfyt8y2b0KlZmsV_1xavLufcdkHc,428
 lite_agent/templates/handoffs_target_instructions.xml.j2,sha256=gSbWVYYcovPKbGpFc0kqGSJ5Y5UC3fOHyUmZfcrDgSE,356
 lite_agent/templates/wait_for_user_instructions.xml.j2,sha256=wXbcYD5Q1FaCGVBm3Hz_Cp7nnoK7KzloP0ao-jYMwPk,231
@@ -18,6 +18,6 @@ lite_agent/types/__init__.py,sha256=QKuhjFWRcpAlsBK9JYgoCABpoQExwhuyGudJoiiqQfs,
 lite_agent/types/events.py,sha256=mFMqV55WWJbPDyb_P61nd3qMLpEnwZgVY6NTKFkINkg,2389
 lite_agent/types/messages.py,sha256=c7nTIWqXNo562het_vaWcZvsoy-adkARwAYn4JNqm0c,9897
 lite_agent/types/tool_calls.py,sha256=Xnut8-2-Ld9vgA2GKJY6BbFlBaAv_n4W7vo7Jx21A-E,260
-lite_agent-0.4.0.dist-info/METADATA,sha256=biQoUoss9DcSuUX494mPj2yoXtE0H81Far_K3IpFB30,3456
-lite_agent-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-lite_agent-0.4.0.dist-info/RECORD,,
+lite_agent-0.5.0.dist-info/METADATA,sha256=20K2Xirnyawl1uN_I8TLcuGlgRjNhs04hz2BtDDRnbM,3456
+lite_agent-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+lite_agent-0.5.0.dist-info/RECORD,,

{lite_agent-0.4.0.dist-info → lite_agent-0.5.0.dist-info}/WHEEL RENAMED Viewed

File without changes

lite-agent 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

Potentially problematic release.

lite-agent 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl