casual-llm 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
casual_llm/__init__.py CHANGED
@@ -2,25 +2,46 @@
2
2
  casual-llm - Lightweight LLM provider abstraction with standard message models.
3
3
 
4
4
  A simple, protocol-based library for working with different LLM providers
5
- (OpenAI, Ollama, etc.) using a unified interface and OpenAI-compatible message format.
5
+ (OpenAI, Ollama, Anthropic) using a unified interface and OpenAI-compatible message format.
6
6
 
7
7
  Part of the casual-* ecosystem of lightweight AI tools.
8
+
9
+ Example usage:
10
+ >>> from casual_llm import OpenAIClient, Model, UserMessage
11
+ >>>
12
+ >>> # Create client (configured once)
13
+ >>> client = OpenAIClient(api_key="...")
14
+ >>>
15
+ >>> # Create multiple models using the same client
16
+ >>> gpt4 = Model(client, name="gpt-4", temperature=0.7)
17
+ >>> gpt4o = Model(client, name="gpt-4o")
18
+ >>>
19
+ >>> # Use models
20
+ >>> response = await gpt4.chat([UserMessage(content="Hello")])
21
+ >>> print(response.content)
22
+ >>>
23
+ >>> # Each model tracks its own usage
24
+ >>> print(f"Used {gpt4.get_usage().total_tokens} tokens")
8
25
  """
9
26
 
10
- __version__ = "0.4.2"
27
+ __version__ = "0.5.0"
11
28
 
12
- # Model configuration
13
- from casual_llm.config import ModelConfig, Provider
29
+ # Configuration
30
+ from casual_llm.config import ClientConfig, ModelConfig, Provider
14
31
 
15
- # Provider protocol and implementations
32
+ # Client protocol and implementations
16
33
  from casual_llm.providers import (
17
- LLMProvider,
18
- OllamaProvider,
19
- OpenAIProvider,
20
- AnthropicProvider,
21
- create_provider,
34
+ LLMClient,
35
+ OllamaClient,
36
+ OpenAIClient,
37
+ AnthropicClient,
38
+ create_client,
39
+ create_model,
22
40
  )
23
41
 
42
+ # Model class
43
+ from casual_llm.model import Model
44
+
24
45
  # OpenAI-compatible message models
25
46
  from casual_llm.messages import (
26
47
  ChatMessage,
@@ -66,14 +87,19 @@ from casual_llm.message_converters import (
66
87
  __all__ = [
67
88
  # Version
68
89
  "__version__",
69
- # Providers
70
- "LLMProvider",
90
+ # Configuration
91
+ "ClientConfig",
71
92
  "ModelConfig",
72
93
  "Provider",
73
- "OllamaProvider",
74
- "OpenAIProvider",
75
- "AnthropicProvider",
76
- "create_provider",
94
+ # Clients
95
+ "LLMClient",
96
+ "OllamaClient",
97
+ "OpenAIClient",
98
+ "AnthropicClient",
99
+ "create_client",
100
+ "create_model",
101
+ # Model
102
+ "Model",
77
103
  # Messages
78
104
  "ChatMessage",
79
105
  "UserMessage",
casual_llm/config.py CHANGED
@@ -1,12 +1,13 @@
1
1
  """
2
- Model configuration and provider enums.
2
+ Configuration for LLM clients and models.
3
3
 
4
- This module defines configuration structures for LLM models,
5
- allowing unified configuration across different provider backends.
4
+ This module defines configuration structures for LLM clients (API connections)
5
+ and models, allowing unified configuration across different provider backends.
6
6
  """
7
7
 
8
- from dataclasses import dataclass
8
+ from dataclasses import dataclass, field
9
9
  from enum import Enum
10
+ from typing import Any
10
11
 
11
12
 
12
13
  class Provider(Enum):
@@ -18,47 +19,77 @@ class Provider(Enum):
18
19
 
19
20
 
20
21
  @dataclass
21
- class ModelConfig:
22
+ class ClientConfig:
22
23
  """
23
- Configuration for a specific LLM model.
24
+ Configuration for an LLM client (API connection).
24
25
 
25
- Provides a unified way to configure models across different providers.
26
+ Provides a unified way to configure client connections across different providers.
26
27
 
27
28
  Attributes:
28
- name: Model name (e.g., "gpt-4o-mini", "qwen2.5:7b-instruct")
29
- provider: Provider type (OPENAI or OLLAMA)
29
+ provider: Provider type (OPENAI, OLLAMA, or ANTHROPIC)
30
30
  base_url: Optional custom API endpoint
31
- api_key: Optional API key (for OpenAI/compatible providers)
32
- temperature: Sampling temperature (0.0-1.0, optional - uses provider default if not set)
31
+ api_key: Optional API key (for OpenAI/Anthropic providers)
32
+ timeout: HTTP request timeout in seconds (default: 60.0)
33
+ extra_kwargs: Additional kwargs passed to the client
33
34
 
34
35
  Examples:
35
- >>> from casual_llm import ModelConfig, Provider
36
+ >>> from casual_llm import ClientConfig, Provider
36
37
  >>>
37
38
  >>> # OpenAI configuration
38
- >>> config = ModelConfig(
39
- ... name="gpt-4o-mini",
39
+ >>> config = ClientConfig(
40
40
  ... provider=Provider.OPENAI,
41
41
  ... api_key="sk-..."
42
42
  ... )
43
43
  >>>
44
44
  >>> # Ollama configuration
45
- >>> config = ModelConfig(
46
- ... name="qwen2.5:7b-instruct",
45
+ >>> config = ClientConfig(
47
46
  ... provider=Provider.OLLAMA,
48
47
  ... base_url="http://localhost:11434"
49
48
  ... )
50
49
  >>>
51
50
  >>> # OpenRouter configuration (OpenAI-compatible)
52
- >>> config = ModelConfig(
53
- ... name="anthropic/claude-3.5-sonnet",
51
+ >>> config = ClientConfig(
54
52
  ... provider=Provider.OPENAI,
55
53
  ... api_key="sk-or-...",
56
54
  ... base_url="https://openrouter.ai/api/v1"
57
55
  ... )
58
56
  """
59
57
 
60
- name: str
61
58
  provider: Provider
62
59
  base_url: str | None = None
63
60
  api_key: str | None = None
61
+ timeout: float = 60.0
62
+ extra_kwargs: dict[str, Any] = field(default_factory=dict)
63
+
64
+
65
+ @dataclass
66
+ class ModelConfig:
67
+ """
68
+ Configuration for a specific LLM model.
69
+
70
+ Used with a client to create Model instances.
71
+
72
+ Attributes:
73
+ name: Model name (e.g., "gpt-4o-mini", "qwen2.5:7b-instruct", "claude-3-5-sonnet-latest")
74
+ temperature: Sampling temperature (0.0-1.0, optional - uses provider default if not set)
75
+ extra_kwargs: Additional kwargs passed to chat/stream methods
76
+
77
+ Examples:
78
+ >>> from casual_llm import ModelConfig
79
+ >>>
80
+ >>> # GPT-4 configuration
81
+ >>> config = ModelConfig(
82
+ ... name="gpt-4",
83
+ ... temperature=0.7
84
+ ... )
85
+ >>>
86
+ >>> # Claude configuration
87
+ >>> config = ModelConfig(
88
+ ... name="claude-3-5-sonnet-latest",
89
+ ... temperature=0.5
90
+ ... )
91
+ """
92
+
93
+ name: str
64
94
  temperature: float | None = None
95
+ extra_kwargs: dict[str, Any] = field(default_factory=dict)
@@ -162,7 +162,7 @@ def convert_messages_to_anthropic(messages: list[ChatMessage]) -> list[dict[str,
162
162
  if not messages:
163
163
  return []
164
164
 
165
- logger.debug(f"Converting {len(messages)} messages to Anthropic format")
165
+ logger.debug("Converting %d messages to Anthropic format", len(messages))
166
166
 
167
167
  anthropic_messages: list[dict[str, Any]] = []
168
168
 
@@ -185,7 +185,8 @@ def convert_messages_to_anthropic(messages: list[ChatMessage]) -> list[dict[str,
185
185
  except json.JSONDecodeError:
186
186
  input_data = {}
187
187
  logger.warning(
188
- f"Failed to parse tool call arguments: {tool_call.function.arguments}"
188
+ "Failed to parse tool call arguments: %s",
189
+ tool_call.function.arguments,
189
190
  )
190
191
 
191
192
  content_blocks.append(
@@ -236,7 +237,7 @@ def convert_messages_to_anthropic(messages: list[ChatMessage]) -> list[dict[str,
236
237
  )
237
238
 
238
239
  case _:
239
- logger.warning(f"Unknown message role: {msg.role}")
240
+ logger.warning("Unknown message role: %s", msg.role)
240
241
 
241
242
  return anthropic_messages
242
243
 
@@ -265,7 +266,7 @@ def convert_tool_calls_from_anthropic(
265
266
  tool_calls = []
266
267
 
267
268
  for tool in response_tool_calls:
268
- logger.debug(f"Converting tool call: {tool.name}")
269
+ logger.debug("Converting tool call: %s", tool.name)
269
270
 
270
271
  # Serialize input dict to JSON string for casual-llm format
271
272
  arguments = json.dumps(tool.input) if tool.input else "{}"
@@ -277,7 +278,7 @@ def convert_tool_calls_from_anthropic(
277
278
  )
278
279
  tool_calls.append(tool_call)
279
280
 
280
- logger.debug(f"Converted {len(tool_calls)} tool calls")
281
+ logger.debug("Converted %d tool calls", len(tool_calls))
281
282
  return tool_calls
282
283
 
283
284
 
@@ -285,6 +286,4 @@ __all__ = [
285
286
  "convert_messages_to_anthropic",
286
287
  "extract_system_message",
287
288
  "convert_tool_calls_from_anthropic",
288
- "_convert_image_to_anthropic",
289
- "_convert_user_content_to_anthropic",
290
289
  ]
@@ -45,7 +45,7 @@ async def _convert_image_to_ollama(image: ImageContent) -> str:
45
45
  return strip_base64_prefix(image.source)
46
46
  else:
47
47
  # Regular URL - fetch and convert to base64
48
- logger.debug(f"Fetching image from URL for Ollama: {image.source}")
48
+ logger.debug("Fetching image from URL for Ollama: %s", image.source)
49
49
  base64_data, _ = await fetch_image_as_base64(image.source)
50
50
  return base64_data
51
51
  else:
@@ -128,7 +128,7 @@ async def convert_messages_to_ollama(messages: list[ChatMessage]) -> list[dict[s
128
128
  if not messages:
129
129
  return []
130
130
 
131
- logger.debug(f"Converting {len(messages)} messages to Ollama format")
131
+ logger.debug("Converting %d messages to Ollama format", len(messages))
132
132
 
133
133
  ollama_messages: list[dict[str, Any]] = []
134
134
 
@@ -188,7 +188,7 @@ async def convert_messages_to_ollama(messages: list[ChatMessage]) -> list[dict[s
188
188
  ollama_messages.append(user_message)
189
189
 
190
190
  case _:
191
- logger.warning(f"Unknown message role: {msg.role}")
191
+ logger.warning("Unknown message role: %s", msg.role)
192
192
 
193
193
  return ollama_messages
194
194
 
@@ -221,9 +221,9 @@ def convert_tool_calls_from_ollama(
221
221
  tool_call_id = getattr(tool, "id", None)
222
222
  if not tool_call_id:
223
223
  tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
224
- logger.debug(f"Generated tool call ID: {tool_call_id}")
224
+ logger.debug("Generated tool call ID: %s", tool_call_id)
225
225
 
226
- logger.debug(f"Converting tool call: {tool.function.name}")
226
+ logger.debug("Converting tool call: %s", tool.function.name)
227
227
 
228
228
  # Convert arguments from Mapping[str, Any] to JSON string
229
229
  # Ollama returns arguments as a dict, but we need a JSON string
@@ -237,7 +237,7 @@ def convert_tool_calls_from_ollama(
237
237
  )
238
238
  tool_calls.append(tool_call)
239
239
 
240
- logger.debug(f"Converted {len(tool_calls)} tool calls")
240
+ logger.debug("Converted %d tool calls", len(tool_calls))
241
241
  return tool_calls
242
242
 
243
243
 
@@ -92,7 +92,7 @@ def convert_messages_to_openai(messages: list[ChatMessage]) -> list[dict[str, An
92
92
  if not messages:
93
93
  return []
94
94
 
95
- logger.debug(f"Converting {len(messages)} messages to OpenAI format")
95
+ logger.debug("Converting %d messages to OpenAI format", len(messages))
96
96
 
97
97
  openai_messages: list[dict[str, Any]] = []
98
98
 
@@ -145,7 +145,7 @@ def convert_messages_to_openai(messages: list[ChatMessage]) -> list[dict[str, An
145
145
  )
146
146
 
147
147
  case _:
148
- logger.warning(f"Unknown message role: {msg.role}")
148
+ logger.warning("Unknown message role: %s", msg.role)
149
149
 
150
150
  return openai_messages
151
151
 
@@ -171,7 +171,7 @@ def convert_tool_calls_from_openai(
171
171
  tool_calls = []
172
172
 
173
173
  for tool in response_tool_calls:
174
- logger.debug(f"Converting tool call: {tool.function.name}")
174
+ logger.debug("Converting tool call: %s", tool.function.name)
175
175
 
176
176
  tool_call = AssistantToolCall(
177
177
  id=tool.id,
@@ -182,7 +182,7 @@ def convert_tool_calls_from_openai(
182
182
  )
183
183
  tool_calls.append(tool_call)
184
184
 
185
- logger.debug(f"Converted {len(tool_calls)} tool calls")
185
+ logger.debug("Converted %d tool calls", len(tool_calls))
186
186
  return tool_calls
187
187
 
188
188
 
casual_llm/model.py ADDED
@@ -0,0 +1,193 @@
1
+ """
2
+ Model class for LLM interactions.
3
+
4
+ Provides a user-friendly interface for chat and streaming with per-model usage tracking.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Literal, AsyncIterator, Any, TYPE_CHECKING
10
+
11
+ from pydantic import BaseModel
12
+
13
+ from casual_llm.messages import ChatMessage, AssistantMessage, StreamChunk
14
+ from casual_llm.tools import Tool
15
+ from casual_llm.usage import Usage
16
+
17
+ if TYPE_CHECKING:
18
+ from casual_llm.providers.base import LLMClient
19
+
20
+
21
+ class Model:
22
+ """
23
+ User-facing class for LLM interactions.
24
+
25
+ A Model wraps an LLMClient with model-specific configuration. This allows
26
+ configuring providers once and creating multiple models that share the
27
+ same connection.
28
+
29
+ Examples:
30
+ >>> from casual_llm import OpenAIClient, Model, UserMessage
31
+ >>>
32
+ >>> # Create a client (configured once)
33
+ >>> client = OpenAIClient(api_key="...")
34
+ >>>
35
+ >>> # Create multiple models using the same client
36
+ >>> gpt4 = Model(client, name="gpt-4", temperature=0.7)
37
+ >>> gpt4o = Model(client, name="gpt-4o")
38
+ >>> gpt35 = Model(client, name="gpt-3.5-turbo", temperature=0.5)
39
+ >>>
40
+ >>> # Use models
41
+ >>> response = await gpt4.chat([UserMessage(content="Hello")])
42
+ >>> print(response.content)
43
+ >>>
44
+ >>> # Each model tracks its own usage
45
+ >>> print(f"GPT-4 used {gpt4.get_usage().total_tokens} tokens")
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ client: LLMClient,
51
+ name: str,
52
+ temperature: float | None = None,
53
+ extra_kwargs: dict[str, Any] | None = None,
54
+ ):
55
+ """
56
+ Create a new Model.
57
+
58
+ Args:
59
+ client: The LLM client to use (OpenAIClient, OllamaClient, etc.)
60
+ name: The model identifier (e.g., "gpt-4", "llama3.1", "claude-3-opus")
61
+ temperature: Default temperature for this model (can be overridden per-call)
62
+ extra_kwargs: Extra keyword arguments passed to the client methods
63
+ """
64
+ self._client = client
65
+ self.name = name
66
+ self.temperature = temperature
67
+ self.extra_kwargs = extra_kwargs or {}
68
+ self._last_usage: Usage | None = None
69
+
70
+ async def chat(
71
+ self,
72
+ messages: list[ChatMessage],
73
+ response_format: Literal["json", "text"] | type[BaseModel] = "text",
74
+ max_tokens: int | None = None,
75
+ tools: list[Tool] | None = None,
76
+ temperature: float | None = None,
77
+ ) -> AssistantMessage:
78
+ """
79
+ Generate a chat response from the LLM.
80
+
81
+ Args:
82
+ messages: List of ChatMessage (UserMessage, AssistantMessage, SystemMessage, etc.)
83
+ response_format: Expected response format. Can be "json", "text", or a Pydantic
84
+ BaseModel class for JSON Schema-based structured output. When a Pydantic model
85
+ is provided, the LLM will be instructed to return JSON matching the schema.
86
+ max_tokens: Maximum tokens to generate (optional)
87
+ tools: List of tools available for the LLM to call (optional)
88
+ temperature: Temperature for this request (optional, overrides model default)
89
+
90
+ Returns:
91
+ AssistantMessage with content and optional tool_calls
92
+
93
+ Raises:
94
+ Provider-specific exceptions (httpx.HTTPError, openai.OpenAIError, etc.)
95
+
96
+ Examples:
97
+ >>> from pydantic import BaseModel
98
+ >>>
99
+ >>> class PersonInfo(BaseModel):
100
+ ... name: str
101
+ ... age: int
102
+ >>>
103
+ >>> # Pass Pydantic model for structured output
104
+ >>> response = await model.chat(
105
+ ... messages=[UserMessage(content="Tell me about a person")],
106
+ ... response_format=PersonInfo
107
+ ... )
108
+ """
109
+ temp = temperature if temperature is not None else self.temperature
110
+ result, usage = await self._client._chat(
111
+ model=self.name,
112
+ messages=messages,
113
+ response_format=response_format,
114
+ max_tokens=max_tokens,
115
+ tools=tools,
116
+ temperature=temp,
117
+ )
118
+ self._last_usage = usage
119
+ return result
120
+
121
+ async def stream(
122
+ self,
123
+ messages: list[ChatMessage],
124
+ response_format: Literal["json", "text"] | type[BaseModel] = "text",
125
+ max_tokens: int | None = None,
126
+ tools: list[Tool] | None = None,
127
+ temperature: float | None = None,
128
+ ) -> AsyncIterator[StreamChunk]:
129
+ """
130
+ Stream a chat response from the LLM.
131
+
132
+ This method yields response chunks in real-time as they are generated,
133
+ enabling progressive display in chat interfaces.
134
+
135
+ Args:
136
+ messages: List of ChatMessage (UserMessage, AssistantMessage, SystemMessage, etc.)
137
+ response_format: Expected response format. Can be "json", "text", or a Pydantic
138
+ BaseModel class for JSON Schema-based structured output.
139
+ max_tokens: Maximum tokens to generate (optional)
140
+ tools: List of tools available for the LLM to call (optional, may not work
141
+ with all providers during streaming)
142
+ temperature: Temperature for this request (optional, overrides model default)
143
+
144
+ Yields:
145
+ StreamChunk objects containing content fragments as tokens are generated.
146
+ Each chunk has a `content` attribute with the text fragment.
147
+
148
+ Raises:
149
+ Provider-specific exceptions (httpx.HTTPError, openai.OpenAIError, etc.)
150
+
151
+ Examples:
152
+ >>> from casual_llm import UserMessage
153
+ >>>
154
+ >>> # Stream response and print tokens as they arrive
155
+ >>> async for chunk in model.stream([UserMessage(content="Tell me a story")]):
156
+ ... print(chunk.content, end="", flush=True)
157
+ >>>
158
+ >>> # Collect full response from stream
159
+ >>> chunks = []
160
+ >>> async for chunk in model.stream([UserMessage(content="Hello")]):
161
+ ... chunks.append(chunk.content)
162
+ >>> full_response = "".join(chunks)
163
+ """
164
+ temp = temperature if temperature is not None else self.temperature
165
+ async for chunk in self._client._stream(
166
+ model=self.name,
167
+ messages=messages,
168
+ response_format=response_format,
169
+ max_tokens=max_tokens,
170
+ tools=tools,
171
+ temperature=temp,
172
+ ):
173
+ yield chunk
174
+
175
+ def get_usage(self) -> Usage | None:
176
+ """
177
+ Get token usage statistics from the last chat() call.
178
+
179
+ Returns:
180
+ Usage object with prompt_tokens, completion_tokens, and total_tokens,
181
+ or None if no calls have been made yet.
182
+
183
+ Examples:
184
+ >>> model = Model(client, name="gpt-4")
185
+ >>> await model.chat([UserMessage(content="Hello")])
186
+ >>> usage = model.get_usage()
187
+ >>> if usage:
188
+ ... print(f"Used {usage.total_tokens} tokens")
189
+ """
190
+ return self._last_usage
191
+
192
+ def __repr__(self) -> str:
193
+ return f"Model(name={self.name!r}, temperature={self.temperature})"