PyPI - casual-llm - Versions diffs - 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

casual-llm 0.4.2py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

casual_llm/__init__.py +42 -16
casual_llm/config.py +50 -19
casual_llm/message_converters/anthropic.py +6 -7
casual_llm/message_converters/ollama.py +6 -6
casual_llm/message_converters/openai.py +4 -4
casual_llm/model.py +193 -0
casual_llm/providers/__init__.py +91 -58
casual_llm/providers/anthropic.py +57 -85
casual_llm/providers/base.py +37 -69
casual_llm/providers/ollama.py +57 -84
casual_llm/providers/openai.py +58 -86
casual_llm/tool_converters/anthropic.py +1 -1
casual_llm/tool_converters/ollama.py +1 -1
casual_llm/tool_converters/openai.py +1 -1
casual_llm/tools.py +8 -2
casual_llm-0.5.0.dist-info/METADATA +192 -0
casual_llm-0.5.0.dist-info/RECORD +27 -0
casual_llm-0.4.2.dist-info/METADATA +0 -525
casual_llm-0.4.2.dist-info/RECORD +0 -26
{casual_llm-0.4.2.dist-info → casual_llm-0.5.0.dist-info}/WHEEL +0 -0
{casual_llm-0.4.2.dist-info → casual_llm-0.5.0.dist-info}/licenses/LICENSE +0 -0
{casual_llm-0.4.2.dist-info → casual_llm-0.5.0.dist-info}/top_level.txt +0 -0

casual_llm/__init__.py CHANGED Viewed

@@ -2,25 +2,46 @@
 casual-llm - Lightweight LLM provider abstraction with standard message models.
 A simple, protocol-based library for working with different LLM providers
-(OpenAI, Ollama, etc.) using a unified interface and OpenAI-compatible message format.
+(OpenAI, Ollama, Anthropic) using a unified interface and OpenAI-compatible message format.
 Part of the casual-* ecosystem of lightweight AI tools.
+Example usage:
+    >>> from casual_llm import OpenAIClient, Model, UserMessage
+    >>>
+    >>> # Create client (configured once)
+    >>> client = OpenAIClient(api_key="...")
+    >>>
+    >>> # Create multiple models using the same client
+    >>> gpt4 = Model(client, name="gpt-4", temperature=0.7)
+    >>> gpt4o = Model(client, name="gpt-4o")
+    >>>
+    >>> # Use models
+    >>> response = await gpt4.chat([UserMessage(content="Hello")])
+    >>> print(response.content)
+    >>>
+    >>> # Each model tracks its own usage
+    >>> print(f"Used {gpt4.get_usage().total_tokens} tokens")
 """
-__version__ = "0.4.2"
+__version__ = "0.5.0"
-# Model configuration
-from casual_llm.config import ModelConfig, Provider
+# Configuration
+from casual_llm.config import ClientConfig, ModelConfig, Provider
-# Provider protocol and implementations
+# Client protocol and implementations
 from casual_llm.providers import (
-    LLMProvider,
-    OllamaProvider,
-    OpenAIProvider,
-    AnthropicProvider,
-    create_provider,
+    LLMClient,
+    OllamaClient,
+    OpenAIClient,
+    AnthropicClient,
+    create_client,
+    create_model,
 )
+# Model class
+from casual_llm.model import Model
 # OpenAI-compatible message models
 from casual_llm.messages import (
     ChatMessage,
@@ -66,14 +87,19 @@ from casual_llm.message_converters import (
 __all__ = [
     # Version
     "__version__",
-    # Providers
-    "LLMProvider",
+    # Configuration
+    "ClientConfig",
     "ModelConfig",
     "Provider",
-    "OllamaProvider",
-    "OpenAIProvider",
-    "AnthropicProvider",
-    "create_provider",
+    # Clients
+    "LLMClient",
+    "OllamaClient",
+    "OpenAIClient",
+    "AnthropicClient",
+    "create_client",
+    "create_model",
+    # Model
+    "Model",
     # Messages
     "ChatMessage",
     "UserMessage",

casual_llm/config.py CHANGED Viewed

@@ -1,12 +1,13 @@
 """
-Model configuration and provider enums.
+Configuration for LLM clients and models.
-This module defines configuration structures for LLM models,
-allowing unified configuration across different provider backends.
+This module defines configuration structures for LLM clients (API connections)
+and models, allowing unified configuration across different provider backends.
 """
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from enum import Enum
+from typing import Any
 class Provider(Enum):
@@ -18,47 +19,77 @@ class Provider(Enum):
 @dataclass
-class ModelConfig:
+class ClientConfig:
     """
-    Configuration for a specific LLM model.
+    Configuration for an LLM client (API connection).
-    Provides a unified way to configure models across different providers.
+    Provides a unified way to configure client connections across different providers.
     Attributes:
-        name: Model name (e.g., "gpt-4o-mini", "qwen2.5:7b-instruct")
-        provider: Provider type (OPENAI or OLLAMA)
+        provider: Provider type (OPENAI, OLLAMA, or ANTHROPIC)
         base_url: Optional custom API endpoint
-        api_key: Optional API key (for OpenAI/compatible providers)
-        temperature: Sampling temperature (0.0-1.0, optional - uses provider default if not set)
+        api_key: Optional API key (for OpenAI/Anthropic providers)
+        timeout: HTTP request timeout in seconds (default: 60.0)
+        extra_kwargs: Additional kwargs passed to the client
     Examples:
-        >>> from casual_llm import ModelConfig, Provider
+        >>> from casual_llm import ClientConfig, Provider
         >>>
         >>> # OpenAI configuration
-        >>> config = ModelConfig(
-        ...     name="gpt-4o-mini",
+        >>> config = ClientConfig(
         ...     provider=Provider.OPENAI,
         ...     api_key="sk-..."
         ... )
         >>>
         >>> # Ollama configuration
-        >>> config = ModelConfig(
-        ...     name="qwen2.5:7b-instruct",
+        >>> config = ClientConfig(
         ...     provider=Provider.OLLAMA,
         ...     base_url="http://localhost:11434"
         ... )
         >>>
         >>> # OpenRouter configuration (OpenAI-compatible)
-        >>> config = ModelConfig(
-        ...     name="anthropic/claude-3.5-sonnet",
+        >>> config = ClientConfig(
         ...     provider=Provider.OPENAI,
         ...     api_key="sk-or-...",
         ...     base_url="https://openrouter.ai/api/v1"
         ... )
     """
-    name: str
     provider: Provider
     base_url: str | None = None
     api_key: str | None = None
+    timeout: float = 60.0
+    extra_kwargs: dict[str, Any] = field(default_factory=dict)
+@dataclass
+class ModelConfig:
+    """
+    Configuration for a specific LLM model.
+    Used with a client to create Model instances.
+    Attributes:
+        name: Model name (e.g., "gpt-4o-mini", "qwen2.5:7b-instruct", "claude-3-5-sonnet-latest")
+        temperature: Sampling temperature (0.0-1.0, optional - uses provider default if not set)
+        extra_kwargs: Additional kwargs passed to chat/stream methods
+    Examples:
+        >>> from casual_llm import ModelConfig
+        >>>
+        >>> # GPT-4 configuration
+        >>> config = ModelConfig(
+        ...     name="gpt-4",
+        ...     temperature=0.7
+        ... )
+        >>>
+        >>> # Claude configuration
+        >>> config = ModelConfig(
+        ...     name="claude-3-5-sonnet-latest",
+        ...     temperature=0.5
+        ... )
+    """
+    name: str
     temperature: float | None = None
+    extra_kwargs: dict[str, Any] = field(default_factory=dict)

casual_llm/message_converters/anthropic.py CHANGED Viewed

@@ -162,7 +162,7 @@ def convert_messages_to_anthropic(messages: list[ChatMessage]) -> list[dict[str,
     if not messages:
         return []
-    logger.debug(f"Converting {len(messages)} messages to Anthropic format")
+    logger.debug("Converting %d messages to Anthropic format", len(messages))
     anthropic_messages: list[dict[str, Any]] = []
@@ -185,7 +185,8 @@ def convert_messages_to_anthropic(messages: list[ChatMessage]) -> list[dict[str,
                         except json.JSONDecodeError:
                             input_data = {}
                             logger.warning(
-                                f"Failed to parse tool call arguments: {tool_call.function.arguments}"
+                                "Failed to parse tool call arguments: %s",
+                                tool_call.function.arguments,
                             )
                         content_blocks.append(
@@ -236,7 +237,7 @@ def convert_messages_to_anthropic(messages: list[ChatMessage]) -> list[dict[str,
                 )
             case _:
-                logger.warning(f"Unknown message role: {msg.role}")
+                logger.warning("Unknown message role: %s", msg.role)
     return anthropic_messages
@@ -265,7 +266,7 @@ def convert_tool_calls_from_anthropic(
     tool_calls = []
     for tool in response_tool_calls:
-        logger.debug(f"Converting tool call: {tool.name}")
+        logger.debug("Converting tool call: %s", tool.name)
         # Serialize input dict to JSON string for casual-llm format
         arguments = json.dumps(tool.input) if tool.input else "{}"
@@ -277,7 +278,7 @@ def convert_tool_calls_from_anthropic(
         )
         tool_calls.append(tool_call)
-    logger.debug(f"Converted {len(tool_calls)} tool calls")
+    logger.debug("Converted %d tool calls", len(tool_calls))
     return tool_calls
@@ -285,6 +286,4 @@ __all__ = [
     "convert_messages_to_anthropic",
     "extract_system_message",
     "convert_tool_calls_from_anthropic",
-    "_convert_image_to_anthropic",
-    "_convert_user_content_to_anthropic",
 ]

casual_llm/message_converters/ollama.py CHANGED Viewed

@@ -45,7 +45,7 @@ async def _convert_image_to_ollama(image: ImageContent) -> str:
             return strip_base64_prefix(image.source)
         else:
             # Regular URL - fetch and convert to base64
-            logger.debug(f"Fetching image from URL for Ollama: {image.source}")
+            logger.debug("Fetching image from URL for Ollama: %s", image.source)
             base64_data, _ = await fetch_image_as_base64(image.source)
             return base64_data
     else:
@@ -128,7 +128,7 @@ async def convert_messages_to_ollama(messages: list[ChatMessage]) -> list[dict[s
     if not messages:
         return []
-    logger.debug(f"Converting {len(messages)} messages to Ollama format")
+    logger.debug("Converting %d messages to Ollama format", len(messages))
     ollama_messages: list[dict[str, Any]] = []
@@ -188,7 +188,7 @@ async def convert_messages_to_ollama(messages: list[ChatMessage]) -> list[dict[s
                 ollama_messages.append(user_message)
             case _:
-                logger.warning(f"Unknown message role: {msg.role}")
+                logger.warning("Unknown message role: %s", msg.role)
     return ollama_messages
@@ -221,9 +221,9 @@ def convert_tool_calls_from_ollama(
         tool_call_id = getattr(tool, "id", None)
         if not tool_call_id:
             tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
-            logger.debug(f"Generated tool call ID: {tool_call_id}")
+            logger.debug("Generated tool call ID: %s", tool_call_id)
-        logger.debug(f"Converting tool call: {tool.function.name}")
+        logger.debug("Converting tool call: %s", tool.function.name)
         # Convert arguments from Mapping[str, Any] to JSON string
         # Ollama returns arguments as a dict, but we need a JSON string
@@ -237,7 +237,7 @@ def convert_tool_calls_from_ollama(
         )
         tool_calls.append(tool_call)
-    logger.debug(f"Converted {len(tool_calls)} tool calls")
+    logger.debug("Converted %d tool calls", len(tool_calls))
     return tool_calls

casual_llm/message_converters/openai.py CHANGED Viewed

@@ -92,7 +92,7 @@ def convert_messages_to_openai(messages: list[ChatMessage]) -> list[dict[str, An
     if not messages:
         return []
-    logger.debug(f"Converting {len(messages)} messages to OpenAI format")
+    logger.debug("Converting %d messages to OpenAI format", len(messages))
     openai_messages: list[dict[str, Any]] = []
@@ -145,7 +145,7 @@ def convert_messages_to_openai(messages: list[ChatMessage]) -> list[dict[str, An
                 )
             case _:
-                logger.warning(f"Unknown message role: {msg.role}")
+                logger.warning("Unknown message role: %s", msg.role)
     return openai_messages
@@ -171,7 +171,7 @@ def convert_tool_calls_from_openai(
     tool_calls = []
     for tool in response_tool_calls:
-        logger.debug(f"Converting tool call: {tool.function.name}")
+        logger.debug("Converting tool call: %s", tool.function.name)
         tool_call = AssistantToolCall(
             id=tool.id,
@@ -182,7 +182,7 @@ def convert_tool_calls_from_openai(
         )
         tool_calls.append(tool_call)
-    logger.debug(f"Converted {len(tool_calls)} tool calls")
+    logger.debug("Converted %d tool calls", len(tool_calls))
     return tool_calls

casual_llm/model.py ADDED Viewed

@@ -0,0 +1,193 @@
+"""
+Model class for LLM interactions.
+Provides a user-friendly interface for chat and streaming with per-model usage tracking.
+"""
+from __future__ import annotations
+from typing import Literal, AsyncIterator, Any, TYPE_CHECKING
+from pydantic import BaseModel
+from casual_llm.messages import ChatMessage, AssistantMessage, StreamChunk
+from casual_llm.tools import Tool
+from casual_llm.usage import Usage
+if TYPE_CHECKING:
+    from casual_llm.providers.base import LLMClient
+class Model:
+    """
+    User-facing class for LLM interactions.
+    A Model wraps an LLMClient with model-specific configuration. This allows
+    configuring providers once and creating multiple models that share the
+    same connection.
+    Examples:
+        >>> from casual_llm import OpenAIClient, Model, UserMessage
+        >>>
+        >>> # Create a client (configured once)
+        >>> client = OpenAIClient(api_key="...")
+        >>>
+        >>> # Create multiple models using the same client
+        >>> gpt4 = Model(client, name="gpt-4", temperature=0.7)
+        >>> gpt4o = Model(client, name="gpt-4o")
+        >>> gpt35 = Model(client, name="gpt-3.5-turbo", temperature=0.5)
+        >>>
+        >>> # Use models
+        >>> response = await gpt4.chat([UserMessage(content="Hello")])
+        >>> print(response.content)
+        >>>
+        >>> # Each model tracks its own usage
+        >>> print(f"GPT-4 used {gpt4.get_usage().total_tokens} tokens")
+    """
+    def __init__(
+        self,
+        client: LLMClient,
+        name: str,
+        temperature: float | None = None,
+        extra_kwargs: dict[str, Any] | None = None,
+    ):
+        """
+        Create a new Model.
+        Args:
+            client: The LLM client to use (OpenAIClient, OllamaClient, etc.)
+            name: The model identifier (e.g., "gpt-4", "llama3.1", "claude-3-opus")
+            temperature: Default temperature for this model (can be overridden per-call)
+            extra_kwargs: Extra keyword arguments passed to the client methods
+        """
+        self._client = client
+        self.name = name
+        self.temperature = temperature
+        self.extra_kwargs = extra_kwargs or {}
+        self._last_usage: Usage | None = None
+    async def chat(
+        self,
+        messages: list[ChatMessage],
+        response_format: Literal["json", "text"] | type[BaseModel] = "text",
+        max_tokens: int | None = None,
+        tools: list[Tool] | None = None,
+        temperature: float | None = None,
+    ) -> AssistantMessage:
+        """
+        Generate a chat response from the LLM.
+        Args:
+            messages: List of ChatMessage (UserMessage, AssistantMessage, SystemMessage, etc.)
+            response_format: Expected response format. Can be "json", "text", or a Pydantic
+                BaseModel class for JSON Schema-based structured output. When a Pydantic model
+                is provided, the LLM will be instructed to return JSON matching the schema.
+            max_tokens: Maximum tokens to generate (optional)
+            tools: List of tools available for the LLM to call (optional)
+            temperature: Temperature for this request (optional, overrides model default)
+        Returns:
+            AssistantMessage with content and optional tool_calls
+        Raises:
+            Provider-specific exceptions (httpx.HTTPError, openai.OpenAIError, etc.)
+        Examples:
+            >>> from pydantic import BaseModel
+            >>>
+            >>> class PersonInfo(BaseModel):
+            ...     name: str
+            ...     age: int
+            >>>
+            >>> # Pass Pydantic model for structured output
+            >>> response = await model.chat(
+            ...     messages=[UserMessage(content="Tell me about a person")],
+            ...     response_format=PersonInfo
+            ... )
+        """
+        temp = temperature if temperature is not None else self.temperature
+        result, usage = await self._client._chat(
+            model=self.name,
+            messages=messages,
+            response_format=response_format,
+            max_tokens=max_tokens,
+            tools=tools,
+            temperature=temp,
+        )
+        self._last_usage = usage
+        return result
+    async def stream(
+        self,
+        messages: list[ChatMessage],
+        response_format: Literal["json", "text"] | type[BaseModel] = "text",
+        max_tokens: int | None = None,
+        tools: list[Tool] | None = None,
+        temperature: float | None = None,
+    ) -> AsyncIterator[StreamChunk]:
+        """
+        Stream a chat response from the LLM.
+        This method yields response chunks in real-time as they are generated,
+        enabling progressive display in chat interfaces.
+        Args:
+            messages: List of ChatMessage (UserMessage, AssistantMessage, SystemMessage, etc.)
+            response_format: Expected response format. Can be "json", "text", or a Pydantic
+                BaseModel class for JSON Schema-based structured output.
+            max_tokens: Maximum tokens to generate (optional)
+            tools: List of tools available for the LLM to call (optional, may not work
+                with all providers during streaming)
+            temperature: Temperature for this request (optional, overrides model default)
+        Yields:
+            StreamChunk objects containing content fragments as tokens are generated.
+            Each chunk has a `content` attribute with the text fragment.
+        Raises:
+            Provider-specific exceptions (httpx.HTTPError, openai.OpenAIError, etc.)
+        Examples:
+            >>> from casual_llm import UserMessage
+            >>>
+            >>> # Stream response and print tokens as they arrive
+            >>> async for chunk in model.stream([UserMessage(content="Tell me a story")]):
+            ...     print(chunk.content, end="", flush=True)
+            >>>
+            >>> # Collect full response from stream
+            >>> chunks = []
+            >>> async for chunk in model.stream([UserMessage(content="Hello")]):
+            ...     chunks.append(chunk.content)
+            >>> full_response = "".join(chunks)
+        """
+        temp = temperature if temperature is not None else self.temperature
+        async for chunk in self._client._stream(
+            model=self.name,
+            messages=messages,
+            response_format=response_format,
+            max_tokens=max_tokens,
+            tools=tools,
+            temperature=temp,
+        ):
+            yield chunk
+    def get_usage(self) -> Usage | None:
+        """
+        Get token usage statistics from the last chat() call.
+        Returns:
+            Usage object with prompt_tokens, completion_tokens, and total_tokens,
+            or None if no calls have been made yet.
+        Examples:
+            >>> model = Model(client, name="gpt-4")
+            >>> await model.chat([UserMessage(content="Hello")])
+            >>> usage = model.get_usage()
+            >>> if usage:
+            ...     print(f"Used {usage.total_tokens} tokens")
+        """
+        return self._last_usage
+    def __repr__(self) -> str:
+        return f"Model(name={self.name!r}, temperature={self.temperature})"

casual-llm 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

casual-llm 0.4.2py3-none-any.whl → 0.5.0py3-none-any.whl