mirascope 2.0.0a3__py3-none-any.whl → 2.0.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mirascope/api/_generated/__init__.py +62 -6
- mirascope/api/_generated/client.py +8 -0
- mirascope/api/_generated/errors/__init__.py +11 -1
- mirascope/api/_generated/errors/conflict_error.py +15 -0
- mirascope/api/_generated/errors/forbidden_error.py +15 -0
- mirascope/api/_generated/errors/internal_server_error.py +15 -0
- mirascope/api/_generated/errors/not_found_error.py +15 -0
- mirascope/api/_generated/organizations/__init__.py +25 -0
- mirascope/api/_generated/organizations/client.py +380 -0
- mirascope/api/_generated/organizations/raw_client.py +876 -0
- mirascope/api/_generated/organizations/types/__init__.py +23 -0
- mirascope/api/_generated/organizations/types/organizations_create_response.py +24 -0
- mirascope/api/_generated/organizations/types/organizations_create_response_role.py +7 -0
- mirascope/api/_generated/organizations/types/organizations_get_response.py +24 -0
- mirascope/api/_generated/organizations/types/organizations_get_response_role.py +7 -0
- mirascope/api/_generated/organizations/types/organizations_list_response_item.py +24 -0
- mirascope/api/_generated/organizations/types/organizations_list_response_item_role.py +7 -0
- mirascope/api/_generated/organizations/types/organizations_update_response.py +24 -0
- mirascope/api/_generated/organizations/types/organizations_update_response_role.py +7 -0
- mirascope/api/_generated/projects/__init__.py +17 -0
- mirascope/api/_generated/projects/client.py +458 -0
- mirascope/api/_generated/projects/raw_client.py +1016 -0
- mirascope/api/_generated/projects/types/__init__.py +15 -0
- mirascope/api/_generated/projects/types/projects_create_response.py +30 -0
- mirascope/api/_generated/projects/types/projects_get_response.py +30 -0
- mirascope/api/_generated/projects/types/projects_list_response_item.py +30 -0
- mirascope/api/_generated/projects/types/projects_update_response.py +30 -0
- mirascope/api/_generated/reference.md +586 -0
- mirascope/api/_generated/types/__init__.py +20 -4
- mirascope/api/_generated/types/already_exists_error.py +24 -0
- mirascope/api/_generated/types/already_exists_error_tag.py +5 -0
- mirascope/api/_generated/types/database_error.py +24 -0
- mirascope/api/_generated/types/database_error_tag.py +5 -0
- mirascope/api/_generated/types/http_api_decode_error.py +1 -3
- mirascope/api/_generated/types/issue.py +1 -5
- mirascope/api/_generated/types/not_found_error_body.py +24 -0
- mirascope/api/_generated/types/not_found_error_tag.py +5 -0
- mirascope/api/_generated/types/permission_denied_error.py +24 -0
- mirascope/api/_generated/types/permission_denied_error_tag.py +7 -0
- mirascope/api/_generated/types/property_key.py +2 -2
- mirascope/api/_generated/types/{property_key_tag.py → property_key_key.py} +3 -5
- mirascope/api/_generated/types/{property_key_tag_tag.py → property_key_key_tag.py} +1 -1
- mirascope/llm/__init__.py +4 -0
- mirascope/llm/providers/__init__.py +6 -0
- mirascope/llm/providers/anthropic/__init__.py +6 -1
- mirascope/llm/providers/anthropic/_utils/__init__.py +15 -5
- mirascope/llm/providers/anthropic/_utils/beta_decode.py +271 -0
- mirascope/llm/providers/anthropic/_utils/beta_encode.py +216 -0
- mirascope/llm/providers/anthropic/_utils/decode.py +39 -7
- mirascope/llm/providers/anthropic/_utils/encode.py +156 -64
- mirascope/llm/providers/anthropic/beta_provider.py +322 -0
- mirascope/llm/providers/anthropic/model_id.py +10 -27
- mirascope/llm/providers/anthropic/model_info.py +87 -0
- mirascope/llm/providers/anthropic/provider.py +127 -145
- mirascope/llm/providers/base/_utils.py +15 -1
- mirascope/llm/providers/google/_utils/decode.py +55 -3
- mirascope/llm/providers/google/_utils/encode.py +14 -6
- mirascope/llm/providers/google/model_id.py +7 -13
- mirascope/llm/providers/google/model_info.py +62 -0
- mirascope/llm/providers/google/provider.py +8 -4
- mirascope/llm/providers/load_provider.py +8 -2
- mirascope/llm/providers/mlx/_utils.py +23 -1
- mirascope/llm/providers/mlx/encoding/transformers.py +17 -1
- mirascope/llm/providers/mlx/provider.py +4 -0
- mirascope/llm/providers/ollama/__init__.py +19 -0
- mirascope/llm/providers/ollama/provider.py +71 -0
- mirascope/llm/providers/openai/completions/__init__.py +6 -1
- mirascope/llm/providers/openai/completions/_utils/decode.py +57 -5
- mirascope/llm/providers/openai/completions/_utils/encode.py +9 -8
- mirascope/llm/providers/openai/completions/base_provider.py +513 -0
- mirascope/llm/providers/openai/completions/provider.py +13 -447
- mirascope/llm/providers/openai/model_info.py +57 -0
- mirascope/llm/providers/openai/provider.py +16 -4
- mirascope/llm/providers/openai/responses/_utils/decode.py +55 -4
- mirascope/llm/providers/openai/responses/_utils/encode.py +9 -9
- mirascope/llm/providers/openai/responses/provider.py +20 -21
- mirascope/llm/providers/provider_id.py +11 -1
- mirascope/llm/providers/provider_registry.py +3 -1
- mirascope/llm/providers/together/__init__.py +19 -0
- mirascope/llm/providers/together/provider.py +40 -0
- mirascope/llm/responses/__init__.py +3 -0
- mirascope/llm/responses/base_response.py +4 -0
- mirascope/llm/responses/base_stream_response.py +25 -1
- mirascope/llm/responses/finish_reason.py +1 -0
- mirascope/llm/responses/response.py +9 -0
- mirascope/llm/responses/root_response.py +5 -1
- mirascope/llm/responses/usage.py +95 -0
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/METADATA +3 -3
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/RECORD +91 -50
- mirascope/llm/providers/openai/shared/__init__.py +0 -7
- mirascope/llm/providers/openai/shared/_utils.py +0 -59
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/WHEEL +0 -0
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,20 +1,14 @@
|
|
|
1
1
|
"""Google registered LLM models."""
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
"google/gemini-2.5-pro",
|
|
9
|
-
"google/gemini-2.5-flash",
|
|
10
|
-
"google/gemini-2.5-flash-lite",
|
|
11
|
-
"google/gemini-2.0-flash",
|
|
12
|
-
"google/gemini-2.0-flash-lite",
|
|
13
|
-
]
|
|
14
|
-
| str
|
|
15
|
-
)
|
|
3
|
+
from typing import TypeAlias, get_args
|
|
4
|
+
|
|
5
|
+
from .model_info import GoogleKnownModels
|
|
6
|
+
|
|
7
|
+
GoogleModelId: TypeAlias = GoogleKnownModels | str
|
|
16
8
|
"""The Google model ids registered with Mirascope."""
|
|
17
9
|
|
|
10
|
+
GOOGLE_KNOWN_MODELS: set[str] = set(get_args(GoogleKnownModels))
|
|
11
|
+
|
|
18
12
|
|
|
19
13
|
def model_name(model_id: GoogleModelId) -> str:
|
|
20
14
|
"""Extract the google model name from a full model ID.
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Google model information.
|
|
2
|
+
|
|
3
|
+
This file is auto-generated by scripts/model_features/codegen_google.py
|
|
4
|
+
Do not edit manually - run the codegen script to update."""
|
|
5
|
+
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
GoogleKnownModels = Literal[
|
|
9
|
+
"google/gemini-2.0-flash",
|
|
10
|
+
"google/gemini-2.0-flash-001",
|
|
11
|
+
"google/gemini-2.0-flash-exp",
|
|
12
|
+
"google/gemini-2.0-flash-exp-image-generation",
|
|
13
|
+
"google/gemini-2.0-flash-lite",
|
|
14
|
+
"google/gemini-2.0-flash-lite-001",
|
|
15
|
+
"google/gemini-2.0-flash-lite-preview",
|
|
16
|
+
"google/gemini-2.0-flash-lite-preview-02-05",
|
|
17
|
+
"google/gemini-2.5-flash",
|
|
18
|
+
"google/gemini-2.5-flash-image",
|
|
19
|
+
"google/gemini-2.5-flash-image-preview",
|
|
20
|
+
"google/gemini-2.5-flash-lite",
|
|
21
|
+
"google/gemini-2.5-flash-lite-preview-09-2025",
|
|
22
|
+
"google/gemini-2.5-flash-preview-09-2025",
|
|
23
|
+
"google/gemini-2.5-pro",
|
|
24
|
+
"google/gemini-3-pro-image-preview",
|
|
25
|
+
"google/gemini-3-pro-preview",
|
|
26
|
+
"google/gemini-flash-latest",
|
|
27
|
+
"google/gemini-flash-lite-latest",
|
|
28
|
+
"google/gemini-pro-latest",
|
|
29
|
+
"google/gemini-robotics-er-1.5-preview",
|
|
30
|
+
"google/gemma-3-12b-it",
|
|
31
|
+
"google/gemma-3-1b-it",
|
|
32
|
+
"google/gemma-3-27b-it",
|
|
33
|
+
"google/gemma-3-4b-it",
|
|
34
|
+
"google/gemma-3n-e2b-it",
|
|
35
|
+
"google/gemma-3n-e4b-it",
|
|
36
|
+
"google/nano-banana-pro-preview",
|
|
37
|
+
]
|
|
38
|
+
"""Valid Google model IDs."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
MODELS_WITHOUT_STRUCTURED_OUTPUT_AND_TOOLS_SUPPORT: set[str] = {
|
|
42
|
+
"gemini-2.5-flash",
|
|
43
|
+
"gemini-2.5-flash-image",
|
|
44
|
+
"gemini-2.5-flash-image-preview",
|
|
45
|
+
"gemini-2.5-flash-lite",
|
|
46
|
+
"gemini-2.5-flash-lite-preview-09-2025",
|
|
47
|
+
"gemini-2.5-flash-preview-09-2025",
|
|
48
|
+
"gemini-2.5-pro",
|
|
49
|
+
"gemini-3-pro-image-preview",
|
|
50
|
+
"gemini-flash-latest",
|
|
51
|
+
"gemini-flash-lite-latest",
|
|
52
|
+
"gemini-pro-latest",
|
|
53
|
+
"gemini-robotics-er-1.5-preview",
|
|
54
|
+
"gemma-3-12b-it",
|
|
55
|
+
"gemma-3-1b-it",
|
|
56
|
+
"gemma-3-27b-it",
|
|
57
|
+
"gemma-3-4b-it",
|
|
58
|
+
"gemma-3n-e2b-it",
|
|
59
|
+
"gemma-3n-e4b-it",
|
|
60
|
+
"nano-banana-pro-preview",
|
|
61
|
+
}
|
|
62
|
+
"""Models that do not support structured outputs when tools are present."""
|
|
@@ -81,7 +81,7 @@ class GoogleProvider(BaseProvider[Client]):
|
|
|
81
81
|
|
|
82
82
|
google_response = self.client.models.generate_content(**kwargs)
|
|
83
83
|
|
|
84
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
84
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
85
85
|
google_response, model_id
|
|
86
86
|
)
|
|
87
87
|
|
|
@@ -95,6 +95,7 @@ class GoogleProvider(BaseProvider[Client]):
|
|
|
95
95
|
input_messages=input_messages,
|
|
96
96
|
assistant_message=assistant_message,
|
|
97
97
|
finish_reason=finish_reason,
|
|
98
|
+
usage=usage,
|
|
98
99
|
format=format,
|
|
99
100
|
)
|
|
100
101
|
|
|
@@ -133,7 +134,7 @@ class GoogleProvider(BaseProvider[Client]):
|
|
|
133
134
|
|
|
134
135
|
google_response = self.client.models.generate_content(**kwargs)
|
|
135
136
|
|
|
136
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
137
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
137
138
|
google_response, model_id
|
|
138
139
|
)
|
|
139
140
|
|
|
@@ -147,6 +148,7 @@ class GoogleProvider(BaseProvider[Client]):
|
|
|
147
148
|
input_messages=input_messages,
|
|
148
149
|
assistant_message=assistant_message,
|
|
149
150
|
finish_reason=finish_reason,
|
|
151
|
+
usage=usage,
|
|
150
152
|
format=format,
|
|
151
153
|
)
|
|
152
154
|
|
|
@@ -181,7 +183,7 @@ class GoogleProvider(BaseProvider[Client]):
|
|
|
181
183
|
|
|
182
184
|
google_response = await self.client.aio.models.generate_content(**kwargs)
|
|
183
185
|
|
|
184
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
186
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
185
187
|
google_response, model_id
|
|
186
188
|
)
|
|
187
189
|
|
|
@@ -195,6 +197,7 @@ class GoogleProvider(BaseProvider[Client]):
|
|
|
195
197
|
input_messages=input_messages,
|
|
196
198
|
assistant_message=assistant_message,
|
|
197
199
|
finish_reason=finish_reason,
|
|
200
|
+
usage=usage,
|
|
198
201
|
format=format,
|
|
199
202
|
)
|
|
200
203
|
|
|
@@ -233,7 +236,7 @@ class GoogleProvider(BaseProvider[Client]):
|
|
|
233
236
|
|
|
234
237
|
google_response = await self.client.aio.models.generate_content(**kwargs)
|
|
235
238
|
|
|
236
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
239
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
237
240
|
google_response, model_id
|
|
238
241
|
)
|
|
239
242
|
|
|
@@ -247,6 +250,7 @@ class GoogleProvider(BaseProvider[Client]):
|
|
|
247
250
|
input_messages=input_messages,
|
|
248
251
|
assistant_message=assistant_message,
|
|
249
252
|
finish_reason=finish_reason,
|
|
253
|
+
usage=usage,
|
|
250
254
|
format=format,
|
|
251
255
|
)
|
|
252
256
|
|
|
@@ -4,10 +4,12 @@ from .anthropic import AnthropicProvider
|
|
|
4
4
|
from .base import Provider
|
|
5
5
|
from .google import GoogleProvider
|
|
6
6
|
from .mlx import MLXProvider
|
|
7
|
+
from .ollama import OllamaProvider
|
|
7
8
|
from .openai import OpenAIProvider
|
|
8
9
|
from .openai.completions.provider import OpenAICompletionsProvider
|
|
9
10
|
from .openai.responses.provider import OpenAIResponsesProvider
|
|
10
11
|
from .provider_id import ProviderId
|
|
12
|
+
from .together import TogetherProvider
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
@lru_cache(maxsize=256)
|
|
@@ -32,14 +34,18 @@ def load_provider(
|
|
|
32
34
|
return AnthropicProvider(api_key=api_key, base_url=base_url)
|
|
33
35
|
case "google":
|
|
34
36
|
return GoogleProvider(api_key=api_key, base_url=base_url)
|
|
37
|
+
case "mlx": # pragma: no cover (MLX is only available on macOS)
|
|
38
|
+
return MLXProvider()
|
|
39
|
+
case "ollama":
|
|
40
|
+
return OllamaProvider(api_key=api_key, base_url=base_url)
|
|
35
41
|
case "openai":
|
|
36
42
|
return OpenAIProvider(api_key=api_key, base_url=base_url)
|
|
37
43
|
case "openai:completions":
|
|
38
44
|
return OpenAICompletionsProvider(api_key=api_key, base_url=base_url)
|
|
39
45
|
case "openai:responses":
|
|
40
46
|
return OpenAIResponsesProvider(api_key=api_key, base_url=base_url)
|
|
41
|
-
case "
|
|
42
|
-
return
|
|
47
|
+
case "together":
|
|
48
|
+
return TogetherProvider(api_key=api_key, base_url=base_url)
|
|
43
49
|
case _: # pragma: no cover
|
|
44
50
|
raise ValueError(f"Unknown provider: '{provider_id}'")
|
|
45
51
|
|
|
@@ -5,7 +5,7 @@ import mlx.core as mx
|
|
|
5
5
|
from mlx_lm.generate import GenerationResponse
|
|
6
6
|
from mlx_lm.sample_utils import make_sampler
|
|
7
7
|
|
|
8
|
-
from ...responses import FinishReason
|
|
8
|
+
from ...responses import FinishReason, Usage
|
|
9
9
|
from ..base import Params, _utils as _base_utils
|
|
10
10
|
|
|
11
11
|
Sampler: TypeAlias = Callable[[mx.array], mx.array]
|
|
@@ -105,3 +105,25 @@ def extract_finish_reason(response: GenerationResponse | None) -> FinishReason |
|
|
|
105
105
|
return FinishReason.MAX_TOKENS
|
|
106
106
|
|
|
107
107
|
return None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def extract_usage(response: GenerationResponse | None) -> Usage | None:
|
|
111
|
+
"""Extract usage information from an MLX generation response.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
response: The MLX generation response to extract from.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
The Usage object with token counts, or None if not applicable.
|
|
118
|
+
"""
|
|
119
|
+
if response is None:
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
return Usage(
|
|
123
|
+
input_tokens=response.prompt_tokens,
|
|
124
|
+
output_tokens=response.generation_tokens,
|
|
125
|
+
cache_read_tokens=0,
|
|
126
|
+
cache_write_tokens=0,
|
|
127
|
+
reasoning_tokens=0,
|
|
128
|
+
raw=response,
|
|
129
|
+
)
|
|
@@ -10,7 +10,12 @@ from transformers import PreTrainedTokenizer
|
|
|
10
10
|
from ....content import ContentPart, TextChunk, TextEndChunk, TextStartChunk
|
|
11
11
|
from ....formatting import Format, FormattableT
|
|
12
12
|
from ....messages import AssistantContent, Message
|
|
13
|
-
from ....responses import
|
|
13
|
+
from ....responses import (
|
|
14
|
+
ChunkIterator,
|
|
15
|
+
FinishReasonChunk,
|
|
16
|
+
RawStreamEventChunk,
|
|
17
|
+
UsageDeltaChunk,
|
|
18
|
+
)
|
|
14
19
|
from ....tools import AnyToolSchema, BaseToolkit
|
|
15
20
|
from .. import _utils
|
|
16
21
|
from .base import BaseEncoder, TokenIds
|
|
@@ -129,3 +134,14 @@ class TransformersEncoder(BaseEncoder):
|
|
|
129
134
|
yield FinishReasonChunk(finish_reason=finish_reason)
|
|
130
135
|
else:
|
|
131
136
|
yield TextEndChunk()
|
|
137
|
+
|
|
138
|
+
# Emit usage delta if available
|
|
139
|
+
usage = _utils.extract_usage(response)
|
|
140
|
+
if usage:
|
|
141
|
+
yield UsageDeltaChunk(
|
|
142
|
+
input_tokens=usage.input_tokens,
|
|
143
|
+
output_tokens=usage.output_tokens,
|
|
144
|
+
cache_read_tokens=usage.cache_read_tokens,
|
|
145
|
+
cache_write_tokens=usage.cache_write_tokens,
|
|
146
|
+
reasoning_tokens=usage.reasoning_tokens,
|
|
147
|
+
)
|
|
@@ -108,6 +108,7 @@ class MLXProvider(BaseProvider[None]):
|
|
|
108
108
|
input_messages=input_messages,
|
|
109
109
|
assistant_message=assistant_message,
|
|
110
110
|
finish_reason=_utils.extract_finish_reason(response),
|
|
111
|
+
usage=_utils.extract_usage(response),
|
|
111
112
|
format=format,
|
|
112
113
|
)
|
|
113
114
|
|
|
@@ -152,6 +153,7 @@ class MLXProvider(BaseProvider[None]):
|
|
|
152
153
|
input_messages=input_messages,
|
|
153
154
|
assistant_message=assistant_message,
|
|
154
155
|
finish_reason=_utils.extract_finish_reason(response),
|
|
156
|
+
usage=_utils.extract_usage(response),
|
|
155
157
|
format=format,
|
|
156
158
|
)
|
|
157
159
|
|
|
@@ -196,6 +198,7 @@ class MLXProvider(BaseProvider[None]):
|
|
|
196
198
|
input_messages=input_messages,
|
|
197
199
|
assistant_message=assistant_message,
|
|
198
200
|
finish_reason=_utils.extract_finish_reason(response),
|
|
201
|
+
usage=_utils.extract_usage(response),
|
|
199
202
|
format=format,
|
|
200
203
|
)
|
|
201
204
|
|
|
@@ -244,6 +247,7 @@ class MLXProvider(BaseProvider[None]):
|
|
|
244
247
|
input_messages=input_messages,
|
|
245
248
|
assistant_message=assistant_message,
|
|
246
249
|
finish_reason=_utils.extract_finish_reason(response),
|
|
250
|
+
usage=_utils.extract_usage(response),
|
|
247
251
|
format=format,
|
|
248
252
|
)
|
|
249
253
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Ollama provider implementation."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from .provider import OllamaProvider
|
|
7
|
+
else:
|
|
8
|
+
try:
|
|
9
|
+
from .provider import OllamaProvider
|
|
10
|
+
except ImportError: # pragma: no cover
|
|
11
|
+
from .._missing_import_stubs import (
|
|
12
|
+
create_provider_stub,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
OllamaProvider = create_provider_stub("openai", "OllamaProvider")
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"OllamaProvider",
|
|
19
|
+
]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Ollama provider implementation."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import ClassVar
|
|
5
|
+
|
|
6
|
+
from openai import AsyncOpenAI, OpenAI
|
|
7
|
+
|
|
8
|
+
from ..openai.completions.base_provider import BaseOpenAICompletionsProvider
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OllamaProvider(BaseOpenAICompletionsProvider):
|
|
12
|
+
"""Provider for Ollama's OpenAI-compatible API.
|
|
13
|
+
|
|
14
|
+
Inherits from BaseOpenAICompletionsProvider with Ollama-specific configuration:
|
|
15
|
+
- Uses Ollama's local API endpoint (default: http://localhost:11434/v1/)
|
|
16
|
+
- API key is not required (Ollama ignores API keys)
|
|
17
|
+
- Supports OLLAMA_BASE_URL environment variable
|
|
18
|
+
|
|
19
|
+
Usage:
|
|
20
|
+
Register the provider with model ID prefixes you want to use:
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
import llm
|
|
24
|
+
|
|
25
|
+
# Register for ollama models
|
|
26
|
+
llm.register_provider("ollama", "ollama/")
|
|
27
|
+
|
|
28
|
+
# Now you can use ollama models directly
|
|
29
|
+
@llm.call("ollama/llama2")
|
|
30
|
+
def my_prompt():
|
|
31
|
+
return [llm.messages.user("Hello!")]
|
|
32
|
+
```
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
id: ClassVar[str] = "ollama"
|
|
36
|
+
default_scope: ClassVar[str | list[str]] = "ollama/"
|
|
37
|
+
default_base_url: ClassVar[str | None] = "http://localhost:11434/v1/"
|
|
38
|
+
api_key_env_var: ClassVar[str] = "OLLAMA_API_KEY"
|
|
39
|
+
api_key_required: ClassVar[bool] = False
|
|
40
|
+
provider_name: ClassVar[str | None] = "Ollama"
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
*,
|
|
45
|
+
api_key: str | None = None,
|
|
46
|
+
base_url: str | None = None,
|
|
47
|
+
) -> None:
|
|
48
|
+
"""Initialize the Ollama provider.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
api_key: API key (optional). Defaults to OLLAMA_API_KEY env var or 'ollama'.
|
|
52
|
+
base_url: Custom base URL. Defaults to OLLAMA_BASE_URL env var
|
|
53
|
+
or http://localhost:11434/v1/.
|
|
54
|
+
"""
|
|
55
|
+
resolved_api_key = api_key or os.environ.get(self.api_key_env_var) or "ollama"
|
|
56
|
+
resolved_base_url = (
|
|
57
|
+
base_url or os.environ.get("OLLAMA_BASE_URL") or self.default_base_url
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
self.client = OpenAI(
|
|
61
|
+
api_key=resolved_api_key,
|
|
62
|
+
base_url=resolved_base_url,
|
|
63
|
+
)
|
|
64
|
+
self.async_client = AsyncOpenAI(
|
|
65
|
+
api_key=resolved_api_key,
|
|
66
|
+
base_url=resolved_base_url,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def _model_name(self, model_id: str) -> str:
|
|
70
|
+
"""Strip 'ollama/' prefix from model ID for Ollama API."""
|
|
71
|
+
return model_id.removeprefix("ollama/")
|
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING
|
|
2
2
|
|
|
3
3
|
if TYPE_CHECKING:
|
|
4
|
+
from .base_provider import BaseOpenAICompletionsProvider
|
|
4
5
|
from .provider import OpenAICompletionsProvider
|
|
5
6
|
else:
|
|
6
7
|
try:
|
|
8
|
+
from .base_provider import BaseOpenAICompletionsProvider
|
|
7
9
|
from .provider import OpenAICompletionsProvider
|
|
8
10
|
except ImportError: # pragma: no cover
|
|
9
11
|
from ..._missing_import_stubs import (
|
|
10
|
-
create_import_error_stub,
|
|
11
12
|
create_provider_stub,
|
|
12
13
|
)
|
|
13
14
|
|
|
15
|
+
BaseOpenAICompletionsProvider = create_provider_stub(
|
|
16
|
+
"openai", "BaseOpenAICompletionsProvider"
|
|
17
|
+
)
|
|
14
18
|
OpenAICompletionsProvider = create_provider_stub(
|
|
15
19
|
"openai", "OpenAICompletionsProvider"
|
|
16
20
|
)
|
|
17
21
|
|
|
18
22
|
__all__ = [
|
|
23
|
+
"BaseOpenAICompletionsProvider",
|
|
19
24
|
"OpenAICompletionsProvider",
|
|
20
25
|
]
|
|
@@ -4,6 +4,7 @@ from typing import Literal
|
|
|
4
4
|
|
|
5
5
|
from openai import AsyncStream, Stream
|
|
6
6
|
from openai.types import chat as openai_types
|
|
7
|
+
from openai.types.completion_usage import CompletionUsage
|
|
7
8
|
|
|
8
9
|
from .....content import (
|
|
9
10
|
AssistantContentPart,
|
|
@@ -23,6 +24,8 @@ from .....responses import (
|
|
|
23
24
|
FinishReason,
|
|
24
25
|
FinishReasonChunk,
|
|
25
26
|
RawStreamEventChunk,
|
|
27
|
+
Usage,
|
|
28
|
+
UsageDeltaChunk,
|
|
26
29
|
)
|
|
27
30
|
from ...model_id import OpenAIModelId, model_name
|
|
28
31
|
|
|
@@ -32,12 +35,40 @@ OPENAI_FINISH_REASON_MAP = {
|
|
|
32
35
|
}
|
|
33
36
|
|
|
34
37
|
|
|
38
|
+
def _decode_usage(
|
|
39
|
+
usage: CompletionUsage | None,
|
|
40
|
+
) -> Usage | None:
|
|
41
|
+
"""Convert OpenAI CompletionUsage to Mirascope Usage."""
|
|
42
|
+
if usage is None: # pragma: no cover
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
return Usage(
|
|
46
|
+
input_tokens=usage.prompt_tokens,
|
|
47
|
+
output_tokens=usage.completion_tokens,
|
|
48
|
+
cache_read_tokens=(
|
|
49
|
+
usage.prompt_tokens_details.cached_tokens
|
|
50
|
+
if usage.prompt_tokens_details
|
|
51
|
+
else None
|
|
52
|
+
)
|
|
53
|
+
or 0,
|
|
54
|
+
cache_write_tokens=0,
|
|
55
|
+
reasoning_tokens=(
|
|
56
|
+
usage.completion_tokens_details.reasoning_tokens
|
|
57
|
+
if usage.completion_tokens_details
|
|
58
|
+
else None
|
|
59
|
+
)
|
|
60
|
+
or 0,
|
|
61
|
+
raw=usage,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
35
65
|
def decode_response(
|
|
36
66
|
response: openai_types.ChatCompletion,
|
|
37
67
|
model_id: OpenAIModelId,
|
|
38
|
-
provider_id:
|
|
39
|
-
|
|
40
|
-
|
|
68
|
+
provider_id: str,
|
|
69
|
+
provider_model_name: str | None = None,
|
|
70
|
+
) -> tuple[AssistantMessage, FinishReason | None, Usage | None]:
|
|
71
|
+
"""Convert OpenAI ChatCompletion to mirascope AssistantMessage and usage."""
|
|
41
72
|
choice = response.choices[0]
|
|
42
73
|
message = choice.message
|
|
43
74
|
refused = False
|
|
@@ -72,11 +103,12 @@ def decode_response(
|
|
|
72
103
|
content=parts,
|
|
73
104
|
provider_id=provider_id,
|
|
74
105
|
model_id=model_id,
|
|
75
|
-
provider_model_name=model_name(model_id, "completions"),
|
|
106
|
+
provider_model_name=provider_model_name or model_name(model_id, "completions"),
|
|
76
107
|
raw_message=message.model_dump(exclude_none=True),
|
|
77
108
|
)
|
|
78
109
|
|
|
79
|
-
|
|
110
|
+
usage = _decode_usage(response.usage)
|
|
111
|
+
return assistant_message, finish_reason, usage
|
|
80
112
|
|
|
81
113
|
|
|
82
114
|
class _OpenAIChunkProcessor:
|
|
@@ -91,6 +123,26 @@ class _OpenAIChunkProcessor:
|
|
|
91
123
|
"""Process a single OpenAI chunk and yield the appropriate content chunks."""
|
|
92
124
|
yield RawStreamEventChunk(raw_stream_event=chunk)
|
|
93
125
|
|
|
126
|
+
if chunk.usage:
|
|
127
|
+
usage = chunk.usage
|
|
128
|
+
yield UsageDeltaChunk(
|
|
129
|
+
input_tokens=usage.prompt_tokens,
|
|
130
|
+
output_tokens=usage.completion_tokens,
|
|
131
|
+
cache_read_tokens=(
|
|
132
|
+
usage.prompt_tokens_details.cached_tokens
|
|
133
|
+
if usage.prompt_tokens_details
|
|
134
|
+
else None
|
|
135
|
+
)
|
|
136
|
+
or 0,
|
|
137
|
+
cache_write_tokens=0,
|
|
138
|
+
reasoning_tokens=(
|
|
139
|
+
usage.completion_tokens_details.reasoning_tokens
|
|
140
|
+
if usage.completion_tokens_details
|
|
141
|
+
else None
|
|
142
|
+
)
|
|
143
|
+
or 0,
|
|
144
|
+
)
|
|
145
|
+
|
|
94
146
|
choice = chunk.choices[0] if chunk.choices else None
|
|
95
147
|
if not choice:
|
|
96
148
|
return # pragma: no cover
|
|
@@ -22,8 +22,11 @@ from .....messages import AssistantMessage, Message, UserMessage
|
|
|
22
22
|
from .....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
|
|
23
23
|
from ....base import Params, _utils as _base_utils
|
|
24
24
|
from ...model_id import OpenAIModelId, model_name
|
|
25
|
-
from ...model_info import
|
|
26
|
-
|
|
25
|
+
from ...model_info import (
|
|
26
|
+
MODELS_WITHOUT_AUDIO_SUPPORT,
|
|
27
|
+
MODELS_WITHOUT_JSON_OBJECT_SUPPORT,
|
|
28
|
+
MODELS_WITHOUT_JSON_SCHEMA_SUPPORT,
|
|
29
|
+
)
|
|
27
30
|
|
|
28
31
|
|
|
29
32
|
class ChatCompletionCreateKwargs(TypedDict, total=False):
|
|
@@ -233,7 +236,7 @@ def _convert_tool_to_tool_param(
|
|
|
233
236
|
"""Convert a single Mirascope `Tool` to OpenAI ChatCompletionToolParam with caching."""
|
|
234
237
|
schema_dict = tool.parameters.model_dump(by_alias=True, exclude_none=True)
|
|
235
238
|
schema_dict["type"] = "object"
|
|
236
|
-
|
|
239
|
+
_base_utils.ensure_additional_properties_false(schema_dict)
|
|
237
240
|
return openai_types.ChatCompletionToolParam(
|
|
238
241
|
type="function",
|
|
239
242
|
function={
|
|
@@ -258,7 +261,7 @@ def _create_strict_response_format(
|
|
|
258
261
|
"""
|
|
259
262
|
schema = format.schema.copy()
|
|
260
263
|
|
|
261
|
-
|
|
264
|
+
_base_utils.ensure_additional_properties_false(schema)
|
|
262
265
|
|
|
263
266
|
json_schema = JSONSchema(
|
|
264
267
|
name=format.name,
|
|
@@ -321,9 +324,7 @@ def encode_request(
|
|
|
321
324
|
|
|
322
325
|
openai_tools = [_convert_tool_to_tool_param(tool) for tool in tools]
|
|
323
326
|
|
|
324
|
-
model_supports_strict =
|
|
325
|
-
base_model_name not in _shared_utils.MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
|
|
326
|
-
)
|
|
327
|
+
model_supports_strict = base_model_name not in MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
|
|
327
328
|
default_mode = "strict" if model_supports_strict else "tool"
|
|
328
329
|
format = resolve_format(format, default_mode=default_mode)
|
|
329
330
|
if format is not None:
|
|
@@ -348,7 +349,7 @@ def encode_request(
|
|
|
348
349
|
openai_tools.append(_convert_tool_to_tool_param(format_tool_schema))
|
|
349
350
|
elif (
|
|
350
351
|
format.mode == "json"
|
|
351
|
-
and base_model_name not in
|
|
352
|
+
and base_model_name not in MODELS_WITHOUT_JSON_OBJECT_SUPPORT
|
|
352
353
|
):
|
|
353
354
|
kwargs["response_format"] = {"type": "json_object"}
|
|
354
355
|
|