casual-llm 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {casual_llm-0.1.0/src/casual_llm.egg-info → casual_llm-0.3.0}/PKG-INFO +2 -1
- {casual_llm-0.1.0 → casual_llm-0.3.0}/pyproject.toml +3 -1
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/__init__.py +9 -1
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/message_converters/ollama.py +91 -3
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/message_converters/openai.py +57 -1
- casual_llm-0.3.0/src/casual_llm/messages.py +111 -0
- casual_llm-0.3.0/src/casual_llm/providers/base.py +141 -0
- casual_llm-0.3.0/src/casual_llm/providers/ollama.py +263 -0
- casual_llm-0.3.0/src/casual_llm/providers/openai.py +287 -0
- casual_llm-0.3.0/src/casual_llm/utils/__init__.py +9 -0
- casual_llm-0.3.0/src/casual_llm/utils/image.py +162 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0/src/casual_llm.egg-info}/PKG-INFO +2 -1
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm.egg-info/SOURCES.txt +8 -1
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm.egg-info/requires.txt +1 -0
- casual_llm-0.3.0/tests/test_backward_compatibility.py +429 -0
- casual_llm-0.3.0/tests/test_image_utils.py +394 -0
- casual_llm-0.3.0/tests/test_providers.py +930 -0
- casual_llm-0.3.0/tests/test_vision_integration.py +420 -0
- casual_llm-0.3.0/tests/test_vision_ollama.py +615 -0
- casual_llm-0.3.0/tests/test_vision_openai.py +470 -0
- casual_llm-0.1.0/src/casual_llm/messages.py +0 -60
- casual_llm-0.1.0/src/casual_llm/providers/base.py +0 -78
- casual_llm-0.1.0/src/casual_llm/providers/ollama.py +0 -152
- casual_llm-0.1.0/src/casual_llm/providers/openai.py +0 -163
- casual_llm-0.1.0/tests/test_providers.py +0 -457
- {casual_llm-0.1.0 → casual_llm-0.3.0}/LICENSE +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/README.md +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/setup.cfg +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/config.py +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/message_converters/__init__.py +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/providers/__init__.py +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/py.typed +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/tool_converters/__init__.py +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/tool_converters/ollama.py +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/tool_converters/openai.py +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/tools.py +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm/usage.py +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm.egg-info/dependency_links.txt +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/src/casual_llm.egg-info/top_level.txt +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/tests/test_messages.py +0 -0
- {casual_llm-0.1.0 → casual_llm-0.3.0}/tests/test_tools.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: casual-llm
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Lightweight LLM provider abstraction with standardized message models
|
|
5
5
|
Author-email: Alex Stansfield <alex@casualgenius.com>
|
|
6
6
|
License: MIT
|
|
@@ -23,6 +23,7 @@ Description-Content-Type: text/markdown
|
|
|
23
23
|
License-File: LICENSE
|
|
24
24
|
Requires-Dist: pydantic>=2.0.0
|
|
25
25
|
Requires-Dist: ollama>=0.6.1
|
|
26
|
+
Requires-Dist: httpx[http2]>=0.28.1
|
|
26
27
|
Provides-Extra: openai
|
|
27
28
|
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
28
29
|
Dynamic: license-file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "casual-llm"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = "Lightweight LLM provider abstraction with standardized message models"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -24,6 +24,7 @@ classifiers = [
|
|
|
24
24
|
dependencies = [
|
|
25
25
|
"pydantic>=2.0.0",
|
|
26
26
|
"ollama>=0.6.1",
|
|
27
|
+
"httpx[http2]>=0.28.1",
|
|
27
28
|
]
|
|
28
29
|
|
|
29
30
|
[project.optional-dependencies]
|
|
@@ -63,6 +64,7 @@ strict = true
|
|
|
63
64
|
warn_return_any = true
|
|
64
65
|
warn_unused_configs = true
|
|
65
66
|
|
|
67
|
+
|
|
66
68
|
[dependency-groups]
|
|
67
69
|
dev = [
|
|
68
70
|
"openai>=2.8.1",
|
|
@@ -7,7 +7,7 @@ A simple, protocol-based library for working with different LLM providers
|
|
|
7
7
|
Part of the casual-* ecosystem of lightweight AI tools.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
__version__ = "0.
|
|
10
|
+
__version__ = "0.3.0"
|
|
11
11
|
|
|
12
12
|
# Model configuration
|
|
13
13
|
from casual_llm.config import ModelConfig, Provider
|
|
@@ -29,6 +29,10 @@ from casual_llm.messages import (
|
|
|
29
29
|
ToolResultMessage,
|
|
30
30
|
AssistantToolCall,
|
|
31
31
|
AssistantToolCallFunction,
|
|
32
|
+
StreamChunk,
|
|
33
|
+
# Multimodal content types
|
|
34
|
+
TextContent,
|
|
35
|
+
ImageContent,
|
|
32
36
|
)
|
|
33
37
|
|
|
34
38
|
# Tool models
|
|
@@ -71,6 +75,10 @@ __all__ = [
|
|
|
71
75
|
"ToolResultMessage",
|
|
72
76
|
"AssistantToolCall",
|
|
73
77
|
"AssistantToolCallFunction",
|
|
78
|
+
"StreamChunk",
|
|
79
|
+
# Multimodal content types
|
|
80
|
+
"TextContent",
|
|
81
|
+
"ImageContent",
|
|
74
82
|
# Tools
|
|
75
83
|
"Tool",
|
|
76
84
|
"ToolParameter",
|
|
@@ -13,6 +13,12 @@ from casual_llm.messages import (
|
|
|
13
13
|
ChatMessage,
|
|
14
14
|
AssistantToolCall,
|
|
15
15
|
AssistantToolCallFunction,
|
|
16
|
+
TextContent,
|
|
17
|
+
ImageContent,
|
|
18
|
+
)
|
|
19
|
+
from casual_llm.utils.image import (
|
|
20
|
+
strip_base64_prefix,
|
|
21
|
+
fetch_image_as_base64,
|
|
16
22
|
)
|
|
17
23
|
|
|
18
24
|
if TYPE_CHECKING:
|
|
@@ -21,23 +27,101 @@ if TYPE_CHECKING:
|
|
|
21
27
|
logger = logging.getLogger(__name__)
|
|
22
28
|
|
|
23
29
|
|
|
24
|
-
def
|
|
30
|
+
async def _convert_image_to_ollama(image: ImageContent) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Convert ImageContent to Ollama base64 format.
|
|
33
|
+
|
|
34
|
+
Ollama expects images as raw base64 strings (no data URI prefix).
|
|
35
|
+
|
|
36
|
+
For URL sources, this function fetches the image and converts to base64.
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
ImageFetchError: If a URL image cannot be fetched.
|
|
40
|
+
"""
|
|
41
|
+
if isinstance(image.source, str):
|
|
42
|
+
# Check if it's a data URI or a URL
|
|
43
|
+
if image.source.startswith("data:"):
|
|
44
|
+
# Data URI - extract base64 data
|
|
45
|
+
return strip_base64_prefix(image.source)
|
|
46
|
+
else:
|
|
47
|
+
# Regular URL - fetch and convert to base64
|
|
48
|
+
logger.debug(f"Fetching image from URL for Ollama: {image.source}")
|
|
49
|
+
base64_data, _ = await fetch_image_as_base64(image.source)
|
|
50
|
+
return base64_data
|
|
51
|
+
else:
|
|
52
|
+
# Base64 dict source - use directly
|
|
53
|
+
base64_data = image.source.get("data", "")
|
|
54
|
+
# Strip any data URI prefix that might be present
|
|
55
|
+
return strip_base64_prefix(base64_data)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def _convert_user_content_to_ollama(
|
|
59
|
+
content: str | list[TextContent | ImageContent] | None,
|
|
60
|
+
) -> tuple[str, list[str]]:
|
|
61
|
+
"""
|
|
62
|
+
Convert UserMessage content to Ollama format.
|
|
63
|
+
|
|
64
|
+
Handles both simple string content (backward compatible) and
|
|
65
|
+
multimodal content arrays (text + images).
|
|
66
|
+
|
|
67
|
+
Ollama uses a format where text goes in "content" and images
|
|
68
|
+
go in a separate "images" array as raw base64 strings.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
A tuple of (text_content, images_list) where:
|
|
72
|
+
- text_content: Combined text from all TextContent items
|
|
73
|
+
- images_list: List of base64-encoded image strings
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
ImageFetchError: If a URL image cannot be fetched.
|
|
77
|
+
"""
|
|
78
|
+
if content is None:
|
|
79
|
+
return "", []
|
|
80
|
+
|
|
81
|
+
if isinstance(content, str):
|
|
82
|
+
# Simple string content
|
|
83
|
+
return content, []
|
|
84
|
+
|
|
85
|
+
# Multimodal content array
|
|
86
|
+
text_parts: list[str] = []
|
|
87
|
+
images: list[str] = []
|
|
88
|
+
|
|
89
|
+
for item in content:
|
|
90
|
+
if isinstance(item, TextContent):
|
|
91
|
+
text_parts.append(item.text)
|
|
92
|
+
elif isinstance(item, ImageContent):
|
|
93
|
+
images.append(await _convert_image_to_ollama(item))
|
|
94
|
+
|
|
95
|
+
# Join text parts with newlines
|
|
96
|
+
text_content = "\n".join(text_parts) if text_parts else ""
|
|
97
|
+
|
|
98
|
+
return text_content, images
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
async def convert_messages_to_ollama(messages: list[ChatMessage]) -> list[dict[str, Any]]:
|
|
25
102
|
"""
|
|
26
103
|
Convert casual-llm ChatMessage list to Ollama format.
|
|
27
104
|
|
|
28
105
|
Unlike OpenAI which expects tool call arguments as JSON strings,
|
|
29
106
|
Ollama expects them as dictionaries. This function handles that conversion.
|
|
30
107
|
|
|
108
|
+
Supports multimodal messages with images. Ollama expects images as raw
|
|
109
|
+
base64 strings in a separate "images" array.
|
|
110
|
+
|
|
31
111
|
Args:
|
|
32
112
|
messages: List of ChatMessage objects
|
|
33
113
|
|
|
34
114
|
Returns:
|
|
35
115
|
List of dictionaries in Ollama message format
|
|
36
116
|
|
|
117
|
+
Raises:
|
|
118
|
+
ImageFetchError: If a URL image cannot be fetched.
|
|
119
|
+
|
|
37
120
|
Examples:
|
|
121
|
+
>>> import asyncio
|
|
38
122
|
>>> from casual_llm import UserMessage
|
|
39
123
|
>>> messages = [UserMessage(content="Hello")]
|
|
40
|
-
>>> ollama_msgs = convert_messages_to_ollama(messages)
|
|
124
|
+
>>> ollama_msgs = asyncio.run(convert_messages_to_ollama(messages))
|
|
41
125
|
>>> ollama_msgs[0]["role"]
|
|
42
126
|
'user'
|
|
43
127
|
"""
|
|
@@ -97,7 +181,11 @@ def convert_messages_to_ollama(messages: list[ChatMessage]) -> list[dict[str, An
|
|
|
97
181
|
)
|
|
98
182
|
|
|
99
183
|
case "user":
|
|
100
|
-
|
|
184
|
+
text_content, images = await _convert_user_content_to_ollama(msg.content)
|
|
185
|
+
user_message: dict[str, Any] = {"role": "user", "content": text_content}
|
|
186
|
+
if images:
|
|
187
|
+
user_message["images"] = images
|
|
188
|
+
ollama_messages.append(user_message)
|
|
101
189
|
|
|
102
190
|
case _:
|
|
103
191
|
logger.warning(f"Unknown message role: {msg.role}")
|
|
@@ -11,6 +11,8 @@ from casual_llm.messages import (
|
|
|
11
11
|
ChatMessage,
|
|
12
12
|
AssistantToolCall,
|
|
13
13
|
AssistantToolCallFunction,
|
|
14
|
+
TextContent,
|
|
15
|
+
ImageContent,
|
|
14
16
|
)
|
|
15
17
|
|
|
16
18
|
if TYPE_CHECKING:
|
|
@@ -19,6 +21,55 @@ if TYPE_CHECKING:
|
|
|
19
21
|
logger = logging.getLogger(__name__)
|
|
20
22
|
|
|
21
23
|
|
|
24
|
+
def _convert_image_to_openai(image: ImageContent) -> dict[str, Any]:
|
|
25
|
+
"""
|
|
26
|
+
Convert ImageContent to OpenAI image_url format.
|
|
27
|
+
|
|
28
|
+
OpenAI expects images in the format:
|
|
29
|
+
{"type": "image_url", "image_url": {"url": "..."}}
|
|
30
|
+
|
|
31
|
+
For base64 images, the URL should be a data URI:
|
|
32
|
+
data:image/jpeg;base64,...
|
|
33
|
+
"""
|
|
34
|
+
if isinstance(image.source, str):
|
|
35
|
+
# URL source - use directly
|
|
36
|
+
image_url = image.source
|
|
37
|
+
else:
|
|
38
|
+
# Base64 dict source - construct data URI
|
|
39
|
+
base64_data = image.source.get("data", "")
|
|
40
|
+
image_url = f"data:{image.media_type};base64,{base64_data}"
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
"type": "image_url",
|
|
44
|
+
"image_url": {"url": image_url},
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _convert_user_content_to_openai(
|
|
49
|
+
content: str | list[TextContent | ImageContent] | None,
|
|
50
|
+
) -> str | list[dict[str, Any]] | None:
|
|
51
|
+
"""
|
|
52
|
+
Convert UserMessage content to OpenAI format.
|
|
53
|
+
|
|
54
|
+
Handles both simple string content (backward compatible) and
|
|
55
|
+
multimodal content arrays (text + images).
|
|
56
|
+
"""
|
|
57
|
+
if content is None or isinstance(content, str):
|
|
58
|
+
# Simple string content or None - pass through
|
|
59
|
+
return content
|
|
60
|
+
|
|
61
|
+
# Multimodal content array
|
|
62
|
+
openai_content: list[dict[str, Any]] = []
|
|
63
|
+
|
|
64
|
+
for item in content:
|
|
65
|
+
if isinstance(item, TextContent):
|
|
66
|
+
openai_content.append({"type": "text", "text": item.text})
|
|
67
|
+
elif isinstance(item, ImageContent):
|
|
68
|
+
openai_content.append(_convert_image_to_openai(item))
|
|
69
|
+
|
|
70
|
+
return openai_content
|
|
71
|
+
|
|
72
|
+
|
|
22
73
|
def convert_messages_to_openai(messages: list[ChatMessage]) -> list[dict[str, Any]]:
|
|
23
74
|
"""
|
|
24
75
|
Convert casual-llm ChatMessage list to OpenAI format.
|
|
@@ -86,7 +137,12 @@ def convert_messages_to_openai(messages: list[ChatMessage]) -> list[dict[str, An
|
|
|
86
137
|
)
|
|
87
138
|
|
|
88
139
|
case "user":
|
|
89
|
-
openai_messages.append(
|
|
140
|
+
openai_messages.append(
|
|
141
|
+
{
|
|
142
|
+
"role": "user",
|
|
143
|
+
"content": _convert_user_content_to_openai(msg.content),
|
|
144
|
+
}
|
|
145
|
+
)
|
|
90
146
|
|
|
91
147
|
case _:
|
|
92
148
|
logger.warning(f"Unknown message role: {msg.role}")
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAI-compatible message models for LLM conversations.
|
|
3
|
+
|
|
4
|
+
These models follow the OpenAI chat completion API format and can be used
|
|
5
|
+
with any provider that implements the LLMProvider protocol.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Literal, TypeAlias
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TextContent(BaseModel):
|
|
14
|
+
"""Text content block for multimodal messages."""
|
|
15
|
+
|
|
16
|
+
type: Literal["text"] = "text"
|
|
17
|
+
text: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ImageContent(BaseModel):
|
|
21
|
+
"""Image content block for multimodal messages.
|
|
22
|
+
|
|
23
|
+
Supports both URL strings and base64-encoded image data.
|
|
24
|
+
|
|
25
|
+
Examples:
|
|
26
|
+
# URL source
|
|
27
|
+
ImageContent(type="image", source="https://example.com/image.jpg")
|
|
28
|
+
|
|
29
|
+
# Base64 source (dict format)
|
|
30
|
+
ImageContent(
|
|
31
|
+
type="image",
|
|
32
|
+
source={"type": "base64", "data": "...base64..."},
|
|
33
|
+
media_type="image/png"
|
|
34
|
+
)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
type: Literal["image"] = "image"
|
|
38
|
+
source: str | dict[str, str]
|
|
39
|
+
"""URL string or dict with {type: "base64", data: "..."} format."""
|
|
40
|
+
media_type: str = "image/jpeg"
|
|
41
|
+
"""MIME type of the image (e.g., image/jpeg, image/png, image/gif, image/webp)."""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class AssistantToolCallFunction(BaseModel):
|
|
45
|
+
"""Function call within an assistant tool call."""
|
|
46
|
+
|
|
47
|
+
name: str
|
|
48
|
+
arguments: str
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class AssistantToolCall(BaseModel):
|
|
52
|
+
"""Tool call made by the assistant."""
|
|
53
|
+
|
|
54
|
+
id: str
|
|
55
|
+
type: Literal["function"] = "function"
|
|
56
|
+
function: AssistantToolCallFunction
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class AssistantMessage(BaseModel):
|
|
60
|
+
"""Message from the AI assistant."""
|
|
61
|
+
|
|
62
|
+
role: Literal["assistant"] = "assistant"
|
|
63
|
+
content: str | None = None
|
|
64
|
+
tool_calls: list[AssistantToolCall] | None = None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class SystemMessage(BaseModel):
|
|
68
|
+
"""System prompt message that sets the assistant's behavior."""
|
|
69
|
+
|
|
70
|
+
role: Literal["system"] = "system"
|
|
71
|
+
content: str
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ToolResultMessage(BaseModel):
|
|
75
|
+
"""Result from a tool/function call execution."""
|
|
76
|
+
|
|
77
|
+
role: Literal["tool"] = "tool"
|
|
78
|
+
name: str
|
|
79
|
+
tool_call_id: str
|
|
80
|
+
content: str
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class UserMessage(BaseModel):
|
|
84
|
+
"""Message from the user.
|
|
85
|
+
|
|
86
|
+
Supports both simple text content and multimodal content (text + images).
|
|
87
|
+
|
|
88
|
+
Examples:
|
|
89
|
+
# Simple text content
|
|
90
|
+
UserMessage(content="Hello, world!")
|
|
91
|
+
|
|
92
|
+
# Multimodal content
|
|
93
|
+
UserMessage(content=[
|
|
94
|
+
TextContent(type="text", text="What's in this image?"),
|
|
95
|
+
ImageContent(type="image", source="https://example.com/image.jpg")
|
|
96
|
+
])
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
role: Literal["user"] = "user"
|
|
100
|
+
content: str | list[TextContent | ImageContent] | None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class StreamChunk(BaseModel):
|
|
104
|
+
"""A chunk of streamed response content from an LLM provider."""
|
|
105
|
+
|
|
106
|
+
content: str
|
|
107
|
+
finish_reason: str | None = None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
ChatMessage: TypeAlias = AssistantMessage | SystemMessage | ToolResultMessage | UserMessage
|
|
111
|
+
"""Type alias for any chat message type (user, assistant, system, or tool result)."""
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base protocol for LLM providers.
|
|
3
|
+
|
|
4
|
+
Provides a unified interface for different LLM backends (OpenAI, Ollama, etc.)
|
|
5
|
+
using standard OpenAI-compatible message formats.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Protocol, Literal, AsyncIterator
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
from casual_llm.messages import ChatMessage, AssistantMessage, StreamChunk
|
|
15
|
+
from casual_llm.tools import Tool
|
|
16
|
+
from casual_llm.usage import Usage
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LLMProvider(Protocol):
|
|
20
|
+
"""
|
|
21
|
+
Protocol for LLM providers.
|
|
22
|
+
|
|
23
|
+
Uses OpenAI-compatible ChatMessage format for all interactions.
|
|
24
|
+
Supports both structured (JSON) and unstructured (text) responses.
|
|
25
|
+
|
|
26
|
+
This is a Protocol (PEP 544), meaning any class that implements
|
|
27
|
+
the chat() method with this signature is compatible - no
|
|
28
|
+
inheritance required.
|
|
29
|
+
|
|
30
|
+
Examples:
|
|
31
|
+
>>> from casual_llm import LLMProvider, ChatMessage, UserMessage
|
|
32
|
+
>>>
|
|
33
|
+
>>> # Any provider implementing this protocol works
|
|
34
|
+
>>> async def get_response(provider: LLMProvider, prompt: str) -> str:
|
|
35
|
+
... messages = [UserMessage(content=prompt)]
|
|
36
|
+
... return await provider.chat(messages)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
async def chat(
|
|
40
|
+
self,
|
|
41
|
+
messages: list[ChatMessage],
|
|
42
|
+
response_format: Literal["json", "text"] | type[BaseModel] = "text",
|
|
43
|
+
max_tokens: int | None = None,
|
|
44
|
+
tools: list[Tool] | None = None,
|
|
45
|
+
temperature: float | None = None,
|
|
46
|
+
) -> AssistantMessage:
|
|
47
|
+
"""
|
|
48
|
+
Generate a chat response from the LLM.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
messages: List of ChatMessage (UserMessage, AssistantMessage, SystemMessage, etc.)
|
|
52
|
+
response_format: Expected response format. Can be "json", "text", or a Pydantic
|
|
53
|
+
BaseModel class for JSON Schema-based structured output. When a Pydantic model
|
|
54
|
+
is provided, the LLM will be instructed to return JSON matching the schema.
|
|
55
|
+
max_tokens: Maximum tokens to generate (optional)
|
|
56
|
+
tools: List of tools available for the LLM to call (optional)
|
|
57
|
+
temperature: Temperature for this request (optional, overrides instance temperature)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
AssistantMessage with content and optional tool_calls
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
Provider-specific exceptions (httpx.HTTPError, openai.OpenAIError, etc.)
|
|
64
|
+
|
|
65
|
+
Examples:
|
|
66
|
+
>>> from pydantic import BaseModel
|
|
67
|
+
>>>
|
|
68
|
+
>>> class PersonInfo(BaseModel):
|
|
69
|
+
... name: str
|
|
70
|
+
... age: int
|
|
71
|
+
>>>
|
|
72
|
+
>>> # Pass Pydantic model for structured output
|
|
73
|
+
>>> response = await provider.chat(
|
|
74
|
+
... messages=[UserMessage(content="Tell me about a person")],
|
|
75
|
+
... response_format=PersonInfo # Pass the class, not an instance
|
|
76
|
+
... )
|
|
77
|
+
"""
|
|
78
|
+
...
|
|
79
|
+
|
|
80
|
+
def stream(
|
|
81
|
+
self,
|
|
82
|
+
messages: list[ChatMessage],
|
|
83
|
+
response_format: Literal["json", "text"] | type[BaseModel] = "text",
|
|
84
|
+
max_tokens: int | None = None,
|
|
85
|
+
tools: list[Tool] | None = None,
|
|
86
|
+
temperature: float | None = None,
|
|
87
|
+
) -> AsyncIterator[StreamChunk]:
|
|
88
|
+
"""
|
|
89
|
+
Stream a chat response from the LLM.
|
|
90
|
+
|
|
91
|
+
This method yields response chunks in real-time as they are generated,
|
|
92
|
+
enabling progressive display in chat interfaces.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
messages: List of ChatMessage (UserMessage, AssistantMessage, SystemMessage, etc.)
|
|
96
|
+
response_format: Expected response format. Can be "json", "text", or a Pydantic
|
|
97
|
+
BaseModel class for JSON Schema-based structured output. When a Pydantic model
|
|
98
|
+
is provided, the LLM will be instructed to return JSON matching the schema.
|
|
99
|
+
max_tokens: Maximum tokens to generate (optional)
|
|
100
|
+
tools: List of tools available for the LLM to call (optional, may not work
|
|
101
|
+
with all providers during streaming)
|
|
102
|
+
temperature: Temperature for this request (optional, overrides instance temperature)
|
|
103
|
+
|
|
104
|
+
Yields:
|
|
105
|
+
StreamChunk objects containing content fragments as tokens are generated.
|
|
106
|
+
Each chunk has a `content` attribute with the text fragment.
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
Provider-specific exceptions (httpx.HTTPError, openai.OpenAIError, etc.)
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
>>> from casual_llm import UserMessage
|
|
113
|
+
>>>
|
|
114
|
+
>>> # Stream response and print tokens as they arrive
|
|
115
|
+
>>> async for chunk in provider.stream([UserMessage(content="Tell me a story")]):
|
|
116
|
+
... print(chunk.content, end="", flush=True)
|
|
117
|
+
>>>
|
|
118
|
+
>>> # Collect full response from stream
|
|
119
|
+
>>> chunks = []
|
|
120
|
+
>>> async for chunk in provider.stream([UserMessage(content="Hello")]):
|
|
121
|
+
... chunks.append(chunk.content)
|
|
122
|
+
>>> full_response = "".join(chunks)
|
|
123
|
+
"""
|
|
124
|
+
...
|
|
125
|
+
|
|
126
|
+
def get_usage(self) -> Usage | None:
|
|
127
|
+
"""
|
|
128
|
+
Get token usage statistics from the last chat() call.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Usage object with prompt_tokens, completion_tokens, and total_tokens,
|
|
132
|
+
or None if no calls have been made yet.
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
>>> provider = OllamaProvider(model="llama3.1")
|
|
136
|
+
>>> await provider.chat([UserMessage(content="Hello")])
|
|
137
|
+
>>> usage = provider.get_usage()
|
|
138
|
+
>>> if usage:
|
|
139
|
+
... print(f"Used {usage.total_tokens} tokens")
|
|
140
|
+
"""
|
|
141
|
+
...
|