PyPI - lite-agent - Versions diffs - 0.5.0__tar.gz → 0.6.0__tar.gz - Mend

lite-agent 0.5.0tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lite-agent might be problematic. Click here for more details.

Files changed (97) hide show

{lite_agent-0.5.0 → lite_agent-0.6.0}/.claude/settings.local.json RENAMED Viewed

@@ -14,7 +14,8 @@
       "Bash(uv run pytest:*)",
       "Bash(timeout:*)",
       "Bash(pyright:*)",
-      "Bash(rg:*)"
+      "Bash(rg:*)",
+      "Bash(uv run:*)"
     ],
     "deny": []
   }

{lite_agent-0.5.0 → lite_agent-0.6.0}/CHANGELOG.md RENAMED Viewed

@@ -1,3 +1,12 @@
+## v0.6.0
+[v0.5.0...v0.6.0](https://github.com/Jannchie/lite-agent/compare/v0.5.0...v0.6.0)
+### :sparkles: Features
+- **examples**: add basic and llm config usage examples - By [Jannchie](mailto:jannchie@gmail.com) in [ea4112f](https://github.com/Jannchie/lite-agent/commit/ea4112f)
+- **streaming**: add unified streaming and non-streaming response handling - By [Jannchie](mailto:jannchie@gmail.com) in [53daf42](https://github.com/Jannchie/lite-agent/commit/53daf42)
 ## v0.5.0
 [v0.4.1...v0.5.0](https://github.com/Jannchie/lite-agent/compare/v0.4.1...v0.5.0)

{lite_agent-0.5.0 → lite_agent-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lite-agent
-Version: 0.5.0
+Version: 0.6.0
 Summary: A lightweight, extensible framework for building AI agent.
 Author-email: Jianqi Pan <jannchie@gmail.com>
 License: MIT

lite_agent-0.6.0/examples/basic_model.py ADDED Viewed

@@ -0,0 +1,53 @@
+import asyncio
+import logging
+from rich.logging import RichHandler
+from lite_agent.agent import Agent
+from lite_agent.chat_display import display_messages
+from lite_agent.client import LiteLLMClient
+from lite_agent.runner import Runner
+logging.basicConfig(
+    level=logging.WARNING,
+    format="%(message)s",
+    datefmt="[%X]",
+    handlers=[RichHandler(rich_tracebacks=True)],
+)
+logger = logging.getLogger("lite_agent")
+logger.setLevel(logging.DEBUG)
+async def get_temperature(city: str) -> str:
+    """Get the temperature for a city."""
+    return f"The temperature in {city} is 25°C."
+agent = Agent(
+    model=LiteLLMClient(
+        model="gpt-4o-mini",
+        temperature=0.7,
+        max_tokens=150,
+        top_p=0.9,
+    ),
+    name="Weather Assistant",
+    instructions="You are a helpful weather assistant. Before using tools, briefly explain what you are going to do. Provide friendly and informative responses.",
+    tools=[get_temperature],
+)
+async def main():
+    runner = Runner(agent)
+    resp = runner.run(
+        "What is the temperature in New York?",
+        includes=["usage", "assistant_message", "function_call", "function_call_output", "timing"],
+    )
+    async for chunk in resp:
+        logger.info(chunk)
+    display_messages(runner.messages)
+    print(runner.messages)
+if __name__ == "__main__":
+    asyncio.run(main())

lite_agent-0.6.0/examples/debug_non_streaming.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""
+Debug example to investigate non-streaming mode issues.
+"""
+import asyncio
+import logging
+from lite_agent import Agent, Runner
+# Enable debug logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger("lite_agent")
+logger.setLevel(logging.DEBUG)
+async def main():
+    # Create an agent
+    agent = Agent(
+        name="DebugAgent",
+        model="gpt-4o-mini",
+        instructions="You are a helpful assistant.",
+    )
+    print("=== Debug Non-Streaming Mode ===")
+    # Test with streaming=False
+    runner = Runner(agent, streaming=False)
+    print("Running in non-streaming mode...")
+    chunks = []
+    async for chunk in runner.run("Hello, please say hi back."):
+        print(f"Received chunk: {chunk}")
+        print(f"Chunk type: {chunk.type}")
+        if hasattr(chunk, "message"):
+            print(f"Chunk message: {chunk.message}")
+        if hasattr(chunk, "content"):
+            print(f"Chunk content: {chunk.content}")
+        chunks.append(chunk)
+    print(f"\nTotal chunks received: {len(chunks)}")
+    # Compare with streaming mode
+    print("\n=== Compare with Streaming Mode ===")
+    runner_streaming = Runner(agent, streaming=True)
+    streaming_chunks = []
+    async for chunk in runner_streaming.run("Hello, please say hi back too."):
+        streaming_chunks.append(chunk)
+        if chunk.type == "content_delta":
+            print(chunk.delta, end="", flush=True)
+    print(f"\nStreaming chunks received: {len(streaming_chunks)}")
+if __name__ == "__main__":
+    asyncio.run(main())

lite_agent-0.6.0/examples/debug_with_logging.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""
+Debug with full logging enabled.
+"""
+import asyncio
+import logging
+from lite_agent import Agent, Runner
+# Configure logging
+logging.basicConfig(
+    level=logging.DEBUG,
+    format="%(name)s - %(levelname)s - %(message)s",
+)
+# Enable specific loggers
+logging.getLogger("lite_agent").setLevel(logging.DEBUG)
+async def main():
+    agent = Agent(
+        name="TestAgent",
+        model="gpt-4o-mini",
+        instructions="You are helpful.",
+    )
+    print("=== Testing Non-Streaming ===")
+    runner = Runner(agent, streaming=False)
+    chunks = []
+    async for chunk in runner.run("Hello"):
+        chunks.append(chunk)
+        print(f"Got chunk: {chunk.type}")
+    print(f"Total chunks: {len(chunks)}")
+if __name__ == "__main__":
+    asyncio.run(main())

lite_agent-0.6.0/examples/llm_config_demo.py ADDED Viewed

@@ -0,0 +1,90 @@
+import asyncio
+import logging
+from rich.logging import RichHandler
+from lite_agent.agent import Agent
+from lite_agent.chat_display import display_messages
+from lite_agent.client import LiteLLMClient, LLMConfig
+from lite_agent.runner import Runner
+logging.basicConfig(
+    level=logging.WARNING,
+    format="%(message)s",
+    datefmt="[%X]",
+    handlers=[RichHandler(rich_tracebacks=True)],
+)
+logger = logging.getLogger("lite_agent")
+logger.setLevel(logging.DEBUG)
+async def get_weather(city: str) -> str:
+    """Get the current weather for a city."""
+    return f"The weather in {city} is sunny, 25°C."
+# Method 1: Using individual parameters
+agent1 = Agent(
+    model=LiteLLMClient(
+        model="gpt-4o-mini",
+        temperature=0.3,
+        max_tokens=100,
+        top_p=0.8,
+        frequency_penalty=0.1,
+        presence_penalty=0.1,
+        stop=["END"],
+    ),
+    name="Weather Bot (Individual Params)",
+    instructions="You are a weather assistant. Keep responses brief and factual.",
+    tools=[get_weather],
+)
+# Method 2: Using LLMConfig object
+llm_config = LLMConfig(
+    temperature=0.8,
+    max_tokens=200,
+    top_p=0.9,
+    frequency_penalty=0.0,
+    presence_penalty=0.0,
+)
+agent2 = Agent(
+    model=LiteLLMClient(
+        model="gpt-4o-mini",
+        llm_config=llm_config,
+    ),
+    name="Weather Bot (LLMConfig)",
+    instructions="You are a creative weather assistant. Add some personality to your responses.",
+    tools=[get_weather],
+)
+async def main():
+    # Test agent with conservative settings (low temperature, short responses)
+    print("=== Testing Agent 1 (Conservative Settings) ===")
+    runner1 = Runner(agent1)
+    resp1 = runner1.run(
+        "What's the weather like in Tokyo?",
+        includes=["assistant_message"],
+    )
+    async for chunk in resp1:
+        logger.info(chunk)
+    display_messages(runner1.messages)
+    print("\n" + "="*50 + "\n")
+    # Test agent with creative settings (high temperature, longer responses)
+    print("=== Testing Agent 2 (Creative Settings) ===")
+    runner2 = Runner(agent2)
+    resp2 = runner2.run(
+        "What's the weather like in Tokyo?",
+        includes=["assistant_message"],
+    )
+    async for chunk in resp2:
+        logger.info(chunk)
+    display_messages(runner2.messages)
+if __name__ == "__main__":
+    asyncio.run(main())

lite_agent-0.6.0/examples/non_streaming.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""
+Simple example demonstrating non-streaming mode in LiteAgent.
+"""
+import asyncio
+from lite_agent import Agent, Runner
+async def main():
+    # Create an agent
+    agent = Agent(
+        name="NonStreamingDemo",
+        model="gpt-4o-mini",
+        instructions="You are a helpful assistant.",
+    )
+    # Create runner with non-streaming mode
+    runner = Runner(agent, streaming=False)
+    print("=== Non-Streaming Mode Example ===")
+    print("Question: Explain what Python is in one sentence.")
+    print("Response: ", end="", flush=True)
+    # In non-streaming mode, you get the complete response at once
+    async for chunk in runner.run("Explain what Python is in one sentence."):
+        if chunk.type == "assistant_message":
+            # Non-streaming mode typically yields one complete message
+            print(chunk.message.content[0].text)
+    print("\n=== Tool Usage with Non-Streaming ===")
+    # Example with a simple tool
+    def get_time() -> str:
+        """Get the current time."""
+        from datetime import datetime
+        return f"Current time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+    agent_with_tools = Agent(
+        name="TimeAgent",
+        model="gpt-4o-mini",
+        instructions="You help users with time-related queries. Use the get_time tool when asked about time.",
+        tools=[get_time],
+    )
+    runner_with_tools = Runner(agent_with_tools, streaming=False)
+    print("Question: What time is it now?")
+    print("Response:")
+    async for chunk in runner_with_tools.run("What time is it now?"):
+        if chunk.type == "assistant_message":
+            print(chunk.message.content[0].text)
+        elif chunk.type == "function_call_output":
+            print(f"Tool output: {chunk.content}")
+    print("\n=== Benefits of Non-Streaming Mode ===")
+    print("1. Simpler processing - get complete responses")
+    print("2. Easier for batch processing")
+    print("3. Better for APIs that need complete responses")
+    print("4. Lower overhead for short interactions")
+if __name__ == "__main__":
+    asyncio.run(main())

{lite_agent-0.5.0 → lite_agent-0.6.0}/examples/reasoning_example.py RENAMED Viewed

@@ -4,7 +4,6 @@ import logging
 from rich.logging import RichHandler
 from lite_agent.agent import Agent
-from lite_agent.runner import Runner
 logging.basicConfig(
     level=logging.WARNING,
@@ -26,46 +25,47 @@ async def demo_reasoning_configurations():
     """演示不同的推理配置方法。"""
     print("=== 推理配置演示 ===\n")
-    # 1. 在Agent初始化时设置推理参数
-    print("1. Agent初始化时设置reasoning_effort:")
+    # 1. 使用reasoning参数设置推理强度（字符串形式）
+    print("1. 使用reasoning参数设置推理强度:")
     agent_with_reasoning = Agent(
         model="gpt-4o-mini",
         name="推理助手",
         instructions="你是一个深度分析助手，使用仔细的推理来提供全面的分析。",
-        reasoning_effort="high",  # 高强度推理
+        reasoning="high",  # 高强度推理
     )
-    print(f"   Agent推理努力程度: {agent_with_reasoning.reasoning_effort}")
+    print(f"   Agent推理配置: {agent_with_reasoning.reasoning}")
     print(f"   客户端推理努力程度: {agent_with_reasoning.client.reasoning_effort}")
+    print(f"   客户端思考配置: {agent_with_reasoning.client.thinking_config}")
-    # 2. 使用thinking_config进行更精细的控制
-    print("\n2. 使用thinking_config进行精细控制:")
+    # 2. 使用reasoning参数进行更精细的控制（字典形式）
+    print("\n2. 使用reasoning参数进行精细控制:")
     agent_with_thinking = Agent(
         model="claude-3-5-sonnet-20241022",  # Anthropic模型支持thinking
         name="思考助手",
         instructions="你是一个深思熟虑的助手。",
-        thinking_config={"type": "enabled", "budget_tokens": 2048},
+        reasoning={"type": "enabled", "budget_tokens": 2048},  # 使用字典形式
     )
-    print(f"   Agent思考配置: {agent_with_thinking.thinking_config}")
+    print(f"   Agent推理配置: {agent_with_thinking.reasoning}")
+    print(f"   客户端推理努力程度: {agent_with_thinking.client.reasoning_effort}")
     print(f"   客户端思考配置: {agent_with_thinking.client.thinking_config}")
-    # 3. 同时设置reasoning_effort和thinking_config
-    print("\n3. 同时设置多种推理参数:")
-    agent_full_config = Agent(
+    # 3. 使用布尔值设置推理（会默认使用medium级别）
+    print("\n3. 使用布尔值启用推理:")
+    agent_bool_reasoning = Agent(
         model="o1-mini",  # OpenAI推理模型
-        name="全配置推理助手",
+        name="布尔推理助手",
         instructions="你是一个高级推理助手。",
-        reasoning_effort="medium",
-        thinking_config={"type": "enabled", "budget_tokens": 1024},
+        reasoning=True,  # 布尔值，会使用默认的medium级别
     )
-    print(f"   推理努力程度: {agent_full_config.reasoning_effort}")
-    print(f"   思考配置: {agent_full_config.thinking_config}")
+    print(f"   Agent推理配置: {agent_bool_reasoning.reasoning}")
+    print(f"   客户端推理努力程度: {agent_bool_reasoning.client.reasoning_effort}")
+    print(f"   客户端思考配置: {agent_bool_reasoning.client.thinking_config}")
     # 4. 演示运行时覆盖推理参数
     print("\n4. 运行时覆盖推理参数:")
-    runner = Runner(agent_with_reasoning)
-    print("   - Agent默认使用 reasoning_effort='high'")
+    print("   - Agent默认使用 reasoning='high'")
     print("   - 运行时可通过 agent_kwargs 覆盖:")
-    print("     runner.run(query, agent_kwargs={'reasoning_effort': 'minimal'})")
+    print("     runner.run(query, agent_kwargs={'reasoning': 'minimal'})")
     # 注意：由于没有实际的API密钥，我们不运行真实的API调用
     print("\n✓ 所有推理配置功能已成功设置！")

lite_agent-0.6.0/examples/simple_debug.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""
+Simple debug to check non-streaming response.
+"""
+import asyncio
+from lite_agent import Agent, Runner
+async def main():
+    agent = Agent(
+        name="TestAgent",
+        model="gpt-4o-mini",
+        instructions="You are helpful.",
+    )
+    # Test non-streaming
+    print("Testing non-streaming...")
+    runner = Runner(agent, streaming=False)
+    try:
+        chunks = []
+        async for chunk in runner.run("Say hello"):
+            chunks.append(chunk)
+            print(f"Received chunk type: {chunk.type}")
+        print(f"Total chunks: {len(chunks)}")
+        if chunks:
+            print(f"First chunk: {chunks[0]}")
+    except Exception as e:
+        print(f"Error: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    asyncio.run(main())

lite_agent-0.6.0/examples/simple_debug2.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""
+Simple debug to check which API is being used.
+"""
+import asyncio
+from lite_agent import Agent, Runner
+async def main():
+    agent = Agent(
+        name="TestAgent",
+        model="gpt-4o-mini",
+        instructions="You are helpful.",
+    )
+    # Test non-streaming with explicit API
+    print("Testing non-streaming with responses API...")
+    runner = Runner(agent, api="responses", streaming=False)
+    print(f"Runner API: {runner.api}")
+    print(f"Runner streaming: {runner.streaming}")
+    # Test with completion API
+    print("\nTesting non-streaming with completion API...")
+    runner2 = Runner(agent, api="completion", streaming=False)
+    print(f"Runner API: {runner2.api}")
+    print(f"Runner streaming: {runner2.streaming}")
+if __name__ == "__main__":
+    asyncio.run(main())

lite_agent-0.6.0/examples/streaming_demo.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""
+Demo script showing streaming vs non-streaming configuration in LiteAgent.
+"""
+import asyncio
+import time
+from lite_agent import Agent, Runner
+async def main():
+    # Create an agent
+    agent = Agent(
+        name="StreamingDemo",
+        model="gpt-4o-mini",
+        instructions="You are a helpful assistant. Always respond concisely.",
+    )
+    print("=== Streaming Mode (Default) ===")
+    # Default streaming=True
+    runner_streaming = Runner(agent, streaming=True)
+    chunks = []
+    print("Question: What is the capital of France?")
+    print("Response: ", end="", flush=True)
+    async for chunk in runner_streaming.run("What is the capital of France?"):
+        chunks.append(chunk)
+        if chunk.type == "content_delta":
+            print(chunk.delta, end="", flush=True)
+    print(f"\nReceived {len(chunks)} chunks in streaming mode\n")
+    print("=== Non-Streaming Mode ===")
+    # Set streaming=False
+    runner_non_streaming = Runner(agent, streaming=False)
+    chunks = []
+    print("Question: What is the capital of Germany?")
+    print("Response: ", end="", flush=True)
+    async for chunk in runner_non_streaming.run("What is the capital of Germany?"):
+        chunks.append(chunk)
+        if chunk.type == "assistant_message":
+            print(chunk.message.content[0].text)
+    print(f"Received {len(chunks)} chunks in non-streaming mode\n")
+    print("=== Comparing Performance ===")
+    # Time streaming
+    start = time.time()
+    runner_streaming = Runner(agent, streaming=True)
+    chunks = []
+    async for chunk in runner_streaming.run("What is 2+2?"):
+        chunks.append(chunk)
+    streaming_time = time.time() - start
+    # Time non-streaming
+    start = time.time()
+    runner_non_streaming = Runner(agent, streaming=False)
+    chunks = []
+    async for chunk in runner_non_streaming.run("What is 3+3?"):
+        chunks.append(chunk)
+    non_streaming_time = time.time() - start
+    print(f"Streaming mode: {streaming_time:.2f}s")
+    print(f"Non-streaming mode: {non_streaming_time:.2f}s")
+    print("\n=== Usage Guide ===")
+    print("To use non-streaming mode:")
+    print("  runner = Runner(agent, streaming=False)")
+    print("To use streaming mode (default):")
+    print("  runner = Runner(agent, streaming=True)  # or just Runner(agent)")
+if __name__ == "__main__":
+    asyncio.run(main())

{lite_agent-0.5.0 → lite_agent-0.6.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lite-agent"
-version = "0.5.0"
+version = "0.6.0"
 description = "A lightweight, extensible framework for building AI agent."
 readme = "README.md"
 authors = [{ name = "Jianqi Pan", email = "jannchie@gmail.com" }]
@@ -69,6 +69,7 @@ ignore = [
     "ANN204",
     "C901",
     "PLR0912",
+    "PLR0913",
     "PLR0915",
     "RUF001",
 ]

{lite_agent-0.5.0 → lite_agent-0.6.0}/src/lite_agent/agent.py RENAMED Viewed

@@ -5,11 +5,10 @@ from typing import Any, Optional
 from funcall import Funcall
 from jinja2 import Environment, FileSystemLoader
-from litellm import CustomStreamWrapper
 from lite_agent.client import BaseLLMClient, LiteLLMClient, ReasoningConfig
 from lite_agent.loggers import logger
-from lite_agent.stream_handlers import litellm_completion_stream_handler, litellm_response_stream_handler
+from lite_agent.response_handlers import CompletionResponseHandler, ResponsesAPIHandler
 from lite_agent.types import AgentChunk, FunctionCallEvent, FunctionCallOutputEvent, RunnerMessages, ToolCall, message_to_llm_dict, system_message_to_llm_dict
 from lite_agent.types.messages import NewAssistantMessage, NewSystemMessage, NewUserMessage
@@ -22,7 +21,7 @@ WAIT_FOR_USER_INSTRUCTIONS_TEMPLATE = jinja_env.get_template("wait_for_user_inst
 class Agent:
-    def __init__(  # noqa: PLR0913
+    def __init__(
         self,
         *,
         model: str | BaseLLMClient,
@@ -280,6 +279,7 @@ class Agent:
         messages: RunnerMessages,
         record_to_file: Path | None = None,
         reasoning: ReasoningConfig = None,
+        streaming: bool = True,
     ) -> AsyncGenerator[AgentChunk, None]:
         # Apply message transfer callback if provided - always use legacy format for LLM compatibility
         processed_messages = messages
@@ -296,19 +296,19 @@ class Agent:
             tools=tools,
             tool_choice="auto",  # TODO: make this configurable
             reasoning=reasoning,
+            streaming=streaming,
         )
-        # Ensure resp is a CustomStreamWrapper
-        if isinstance(resp, CustomStreamWrapper):
-            return litellm_completion_stream_handler(resp, record_to=record_to_file)
-        msg = "Response is not a CustomStreamWrapper, cannot stream chunks."
-        raise TypeError(msg)
+        # Use response handler for unified processing
+        handler = CompletionResponseHandler()
+        return handler.handle(resp, streaming, record_to_file)
     async def responses(
         self,
         messages: RunnerMessages,
         record_to_file: Path | None = None,
         reasoning: ReasoningConfig = None,
+        streaming: bool = True,
     ) -> AsyncGenerator[AgentChunk, None]:
         # Apply message transfer callback if provided - always use legacy format for LLM compatibility
         processed_messages = messages
@@ -324,8 +324,11 @@ class Agent:
             tools=tools,
             tool_choice="auto",  # TODO: make this configurable
             reasoning=reasoning,
+            streaming=streaming,
         )
-        return litellm_response_stream_handler(resp, record_to=record_to_file)
+        # Use response handler for unified processing
+        handler = ResponsesAPIHandler()
+        return handler.handle(resp, streaming, record_to_file)
     async def list_require_confirm_tools(self, tool_calls: Sequence[ToolCall] | None) -> Sequence[ToolCall]:
         if not tool_calls:
@@ -539,3 +542,4 @@ class Agent:
             required=[],
             handler=wait_for_user_handler,
         )

{lite_agent-0.5.0 → lite_agent-0.6.0}/src/lite_agent/chat_display.py RENAMED Viewed

@@ -437,7 +437,7 @@ def display_messages(
         )
-def _display_single_message_compact(  # noqa: PLR0913
+def _display_single_message_compact(
     message: FlexibleRunnerMessage,
     *,
     index: int | None = None,

lite-agent 0.5.0__tar.gz → 0.6.0__tar.gz

Potentially problematic release.

lite-agent 0.5.0tar.gz → 0.6.0tar.gz