PyPI - lite-agent - Versions diffs - 0.6.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

lite-agent 0.6.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lite-agent might be problematic. Click here for more details.

Files changed (21) hide show

lite_agent/agent.py +233 -47
lite_agent/chat_display.py +319 -54
lite_agent/client.py +4 -0
lite_agent/constants.py +30 -0
lite_agent/message_transfers.py +24 -5
lite_agent/processors/completion_event_processor.py +14 -20
lite_agent/processors/response_event_processor.py +23 -15
lite_agent/response_handlers/__init__.py +1 -0
lite_agent/response_handlers/base.py +17 -9
lite_agent/response_handlers/completion.py +35 -7
lite_agent/response_handlers/responses.py +46 -12
lite_agent/runner.py +336 -249
lite_agent/types/__init__.py +2 -0
lite_agent/types/messages.py +6 -5
lite_agent/utils/__init__.py +0 -0
lite_agent/utils/message_builder.py +213 -0
lite_agent/utils/metrics.py +50 -0
{lite_agent-0.6.0.dist-info → lite_agent-0.9.0.dist-info}/METADATA +3 -2
lite_agent-0.9.0.dist-info/RECORD +31 -0
lite_agent-0.6.0.dist-info/RECORD +0 -27
{lite_agent-0.6.0.dist-info → lite_agent-0.9.0.dist-info}/WHEEL +0 -0

lite_agent/processors/response_event_processor.py CHANGED Viewed

@@ -22,12 +22,14 @@ from lite_agent.types import (
     ContentDeltaEvent,
     EventUsage,
     FunctionCallEvent,
+    MessageUsage,
     NewAssistantMessage,
     ResponseRawEvent,
     Timing,
     TimingEvent,
     UsageEvent,
 )
+from lite_agent.utils.metrics import TimingMetrics
 class ResponseEventProcessor:
@@ -111,21 +113,28 @@ class ResponseEventProcessor:
                 content = item.get("content", [])
                 if content and isinstance(content, list) and len(content) > 0:
                     end_time = datetime.now(timezone.utc)
-                    latency_ms = None
-                    output_time_ms = None
-                    # latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
-                    if self._start_time and self._first_output_time:
-                        latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
-                    # output_time_ms: 从输出第一个字符到输出完成的时间差
-                    if self._first_output_time and self._output_complete_time:
-                        output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
+                    latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
+                    output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
+                    # Extract model information from event
+                    model_name = getattr(event, "model", None)
+                    # Debug: check if event has model info in different location
+                    if hasattr(event, "response"):
+                        response = getattr(event, "response", None)
+                        if response and hasattr(response, "model"):
+                            model_name = getattr(response, "model", None)
+                    # Create usage information
+                    usage = MessageUsage(
+                        input_tokens=self._usage_data.get("input_tokens"),
+                        output_tokens=self._usage_data.get("output_tokens"),
+                        total_tokens=(self._usage_data.get("input_tokens") or 0) + (self._usage_data.get("output_tokens") or 0),
+                    )
                     meta = AssistantMessageMeta(
                         sent_at=end_time,
+                        model=model_name,
                         latency_ms=latency_ms,
                         output_time_ms=output_time_ms,
-                        input_tokens=self._usage_data.get("input_tokens"),
-                        output_tokens=self._usage_data.get("output_tokens"),
+                        usage=usage,
                     )
                     return [
                         AssistantMessageEvent(
@@ -173,10 +182,9 @@ class ResponseEventProcessor:
                 )
                 # Then yield timing event if we have timing data
-                if self._start_time and self._first_output_time and self._output_complete_time:
-                    latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
-                    output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
+                latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
+                output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
+                if latency_ms is not None and output_time_ms is not None:
                     results.append(
                         TimingEvent(
                             timing=Timing(

lite_agent/response_handlers/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Response handlers for unified streaming and non-streaming processing."""
 from lite_agent.response_handlers.base import ResponseHandler
 from lite_agent.response_handlers.completion import CompletionResponseHandler
 from lite_agent.response_handlers.responses import ResponsesAPIHandler

lite_agent/response_handlers/base.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Base response handler for unified streaming and non-streaming response processing."""
 from abc import ABC, abstractmethod
 from collections.abc import AsyncGenerator
 from pathlib import Path
@@ -12,35 +13,42 @@ class ResponseHandler(ABC):
     async def handle(
         self,
-        response: Any,
+        response: Any,  # noqa: ANN401
+        *,
         streaming: bool,
         record_to: Path | None = None,
     ) -> AsyncGenerator[AgentChunk, None]:
         """Handle a response in either streaming or non-streaming mode.
         Args:
             response: The LLM response object
             streaming: Whether to process as streaming or non-streaming
             record_to: Optional file path to record the conversation
         Yields:
             AgentChunk: Processed chunks from the response
         """
         if streaming:
-            async for chunk in self._handle_streaming(response, record_to):
+            stream = self._handle_streaming(response, record_to)
+            async for chunk in stream:
                 yield chunk
         else:
-            async for chunk in self._handle_non_streaming(response, record_to):
+            stream = self._handle_non_streaming(response, record_to)
+            async for chunk in stream:
                 yield chunk
     @abstractmethod
-    async def _handle_streaming(
-        self, response: Any, record_to: Path | None = None,
+    def _handle_streaming(
+        self,
+        response: Any,  # noqa: ANN401
+        record_to: Path | None = None,
     ) -> AsyncGenerator[AgentChunk, None]:
         """Handle streaming response."""
     @abstractmethod
-    async def _handle_non_streaming(
-        self, response: Any, record_to: Path | None = None,
+    def _handle_non_streaming(
+        self,
+        response: Any,  # noqa: ANN401
+        record_to: Path | None = None,
     ) -> AsyncGenerator[AgentChunk, None]:
         """Handle non-streaming response."""

lite_agent/response_handlers/completion.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Completion API response handler."""
 from collections.abc import AsyncGenerator
 from datetime import datetime, timezone
 from pathlib import Path
@@ -9,15 +10,17 @@ from litellm import CustomStreamWrapper
 from lite_agent.response_handlers.base import ResponseHandler
 from lite_agent.stream_handlers import litellm_completion_stream_handler
 from lite_agent.types import AgentChunk
-from lite_agent.types.events import AssistantMessageEvent
-from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, NewAssistantMessage
+from lite_agent.types.events import AssistantMessageEvent, Usage, UsageEvent
+from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, AssistantToolCall, NewAssistantMessage
 class CompletionResponseHandler(ResponseHandler):
     """Handler for Completion API responses."""
     async def _handle_streaming(
-        self, response: Any, record_to: Path | None = None,
+        self,
+        response: Any,  # noqa: ANN401
+        record_to: Path | None = None,
     ) -> AsyncGenerator[AgentChunk, None]:
         """Handle streaming completion response."""
         if isinstance(response, CustomStreamWrapper):
@@ -28,7 +31,9 @@ class CompletionResponseHandler(ResponseHandler):
             raise TypeError(msg)
     async def _handle_non_streaming(
-        self, response: Any, record_to: Path | None = None,
+        self,
+        response: Any,  # noqa: ANN401
+        record_to: Path | None = None,  # noqa: ARG002
     ) -> AsyncGenerator[AgentChunk, None]:
         """Handle non-streaming completion response."""
         # Convert completion response to chunks
@@ -40,11 +45,34 @@ class CompletionResponseHandler(ResponseHandler):
             if choice.message and choice.message.content:
                 content_items.append(AssistantTextContent(text=choice.message.content))
-            # TODO: Handle tool calls in the future
+            # Handle tool calls
+            if choice.message and choice.message.tool_calls:
+                for tool_call in choice.message.tool_calls:
+                    content_items.append(  # noqa: PERF401
+                        AssistantToolCall(
+                            call_id=tool_call.id,
+                            name=tool_call.function.name,
+                            arguments=tool_call.function.arguments,
+                        ),
+                    )
-            if content_items:
+            # Always yield assistant message, even if content is empty for tool calls
+            if choice.message and (content_items or choice.message.tool_calls):
+                # Extract model information from response
+                model_name = getattr(response, "model", None)
                 message = NewAssistantMessage(
                     content=content_items,
-                    meta=AssistantMessageMeta(sent_at=datetime.now(timezone.utc)),
+                    meta=AssistantMessageMeta(
+                        sent_at=datetime.now(timezone.utc),
+                        model=model_name,
+                    ),
                 )
                 yield AssistantMessageEvent(message=message)
+        # Yield usage information if available
+        if hasattr(response, "usage") and response.usage:
+            usage = Usage(
+                input_tokens=response.usage.prompt_tokens,
+                output_tokens=response.usage.completion_tokens,
+            )
+            yield UsageEvent(usage=usage)

lite_agent/response_handlers/responses.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Responses API response handler."""
 from collections.abc import AsyncGenerator
 from datetime import datetime, timezone
 from pathlib import Path
@@ -7,36 +8,69 @@ from typing import Any
 from lite_agent.response_handlers.base import ResponseHandler
 from lite_agent.stream_handlers import litellm_response_stream_handler
 from lite_agent.types import AgentChunk
-from lite_agent.types.events import AssistantMessageEvent
-from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, NewAssistantMessage
+from lite_agent.types.events import AssistantMessageEvent, Usage, UsageEvent
+from lite_agent.types.messages import AssistantMessageMeta, AssistantTextContent, AssistantToolCall, NewAssistantMessage
 class ResponsesAPIHandler(ResponseHandler):
     """Handler for Responses API responses."""
     async def _handle_streaming(
-        self, response: Any, record_to: Path | None = None,
+        self,
+        response: Any,  # noqa: ANN401
+        record_to: Path | None = None,
     ) -> AsyncGenerator[AgentChunk, None]:
         """Handle streaming responses API response."""
         async for chunk in litellm_response_stream_handler(response, record_to):
             yield chunk
     async def _handle_non_streaming(
-        self, response: Any, record_to: Path | None = None,
+        self,
+        response: Any,  # noqa: ANN401
+        record_to: Path | None = None,  # noqa: ARG002
     ) -> AsyncGenerator[AgentChunk, None]:
         """Handle non-streaming responses API response."""
         # Convert ResponsesAPIResponse to chunks
         if hasattr(response, "output") and response.output:
-            for output_message in response.output:
-                if hasattr(output_message, "content") and output_message.content:
+            content_items = []
+            for output_item in response.output:
+                # Handle function tool calls
+                if hasattr(output_item, "type") and output_item.type == "function_call":
+                    content_items.append(
+                        AssistantToolCall(
+                            call_id=output_item.call_id,
+                            name=output_item.name,
+                            arguments=output_item.arguments,
+                        ),
+                    )
+                # Handle text content (if exists)
+                elif hasattr(output_item, "content") and output_item.content:
                     content_text = ""
-                    for content_item in output_message.content:
+                    for content_item in output_item.content:
                         if hasattr(content_item, "text"):
                             content_text += content_item.text
                     if content_text:
-                        message = NewAssistantMessage(
-                            content=[AssistantTextContent(text=content_text)],
-                            meta=AssistantMessageMeta(sent_at=datetime.now(timezone.utc)),
-                        )
-                        yield AssistantMessageEvent(message=message)
+                        content_items.append(AssistantTextContent(text=content_text))
+            # Create assistant message if we have any content
+            if content_items:
+                # Extract model information from response
+                model_name = getattr(response, "model", None)
+                message = NewAssistantMessage(
+                    content=content_items,
+                    meta=AssistantMessageMeta(
+                        sent_at=datetime.now(timezone.utc),
+                        model=model_name,
+                    ),
+                )
+                yield AssistantMessageEvent(message=message)
+        # Yield usage information if available
+        if hasattr(response, "usage") and response.usage:
+            usage = Usage(
+                input_tokens=response.usage.input_tokens,
+                output_tokens=response.usage.output_tokens,
+            )
+            yield UsageEvent(usage=usage)

lite-agent 0.6.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

Potentially problematic release.

lite-agent 0.6.0py3-none-any.whl → 0.9.0py3-none-any.whl