osmosis-ai 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of osmosis-ai might be problematic. Click here for more details.

@@ -0,0 +1,241 @@
1
+ Metadata-Version: 2.4
2
+ Name: osmosis-ai
3
+ Version: 0.2.2
4
+ Summary: A Python library for reward function validation with strict type enforcement.
5
+ Author-email: Osmosis AI <jake@osmosis.ai>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Gulp AI
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Project-URL: Homepage, https://github.com/Osmosis-AI/osmosis-sdk-python
28
+ Project-URL: Issues, https://github.com/Osmosis-AI/osmosis-sdk-python/issues
29
+ Classifier: Programming Language :: Python :: 3
30
+ Classifier: License :: OSI Approved :: MIT License
31
+ Classifier: Operating System :: OS Independent
32
+ Requires-Python: >=3.6
33
+ Description-Content-Type: text/markdown
34
+ License-File: LICENSE
35
+ Dynamic: license-file
36
+
37
+ # osmosis-ai
38
+
39
+ A Python library that provides reward and rubric validation helpers for LLM applications with strict type enforcement.
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ pip install osmosis-ai
45
+ ```
46
+
47
+ For development:
48
+ ```bash
49
+ git clone https://github.com/Osmosis-AI/osmosis-sdk-python
50
+ cd osmosis-sdk-python
51
+ pip install -e .
52
+ ```
53
+
54
+ ## Quick Start
55
+
56
+ ```python
57
+ from osmosis_ai import osmosis_reward
58
+
59
+ @osmosis_reward
60
+ def simple_reward(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
61
+ """Basic exact match reward function."""
62
+ return 1.0 if solution_str.strip() == ground_truth.strip() else 0.0
63
+
64
+ # Use the reward function
65
+ score = simple_reward("hello world", "hello world") # Returns 1.0
66
+ ```
67
+
68
+ ```python
69
+ from osmosis_ai import evaluate_rubric
70
+
71
+ messages = [
72
+ {
73
+ "type": "message",
74
+ "role": "user",
75
+ "content": [{"type": "input_text", "text": "What is the capital of France?"}],
76
+ },
77
+ {
78
+ "type": "message",
79
+ "role": "assistant",
80
+ "content": [{"type": "output_text", "text": "The capital of France is Paris."}],
81
+ },
82
+ ]
83
+
84
+ # Export OPENAI_API_KEY in your shell before running this snippet.
85
+ rubric_score = evaluate_rubric(
86
+ rubric="Assistant must mention the verified capital city.",
87
+ messages=messages,
88
+ model_info={
89
+ "provider": "openai",
90
+ "model": "gpt-5",
91
+ "api_key_env": "OPENAI_API_KEY",
92
+ },
93
+ ground_truth="Paris",
94
+ )
95
+
96
+ print(rubric_score) # -> 1.0 (full payload available via return_details=True)
97
+ ```
98
+
99
+ ## Remote Rubric Evaluation
100
+
101
+ `evaluate_rubric` talks to each provider through its official Python SDK while enforcing the same JSON schema everywhere:
102
+
103
+ - **OpenAI / xAI** – Uses `OpenAI(...).responses.create` (or `chat.completions.create`) with `response_format={"type": "json_schema"}` and falls back to `json_object` when needed.
104
+ - **Anthropic** – Forces a tool call with a JSON schema via `Anthropic(...).messages.create`, extracting the returned tool arguments.
105
+ - **Google Gemini** – Invokes `google.genai.Client(...).models.generate_content` with `response_mime_type="application/json"` and `response_schema`.
106
+
107
+ Every provider therefore returns a strict JSON object with `{"score": number, "explanation": string}`. The helper clamps the score into your configured range, validates the structure, and exposes the raw payload when `return_details=True`.
108
+
109
+ Credentials are resolved from environment variables by default:
110
+
111
+ - `OPENAI_API_KEY` for OpenAI
112
+ - `ANTHROPIC_API_KEY` for Anthropic
113
+ - `GOOGLE_API_KEY` for Google Gemini
114
+ - `XAI_API_KEY` for xAI
115
+
116
+ Override the environment variable name with `model_info={"api_key_env": "CUSTOM_ENV_NAME"}` when needed, or supply an inline secret with `model_info={"api_key": "sk-..."}` for ephemeral credentials. Missing API keys raise a `MissingAPIKeyError` that explains how to export the secret before trying again.
117
+
118
+ `model_info` accepts additional rubric-specific knobs:
119
+
120
+ - `score_min` / `score_max` – change the default `[0.0, 1.0]` scoring bounds.
121
+ - `system_prompt` / `original_input` – override the helper’s transcript inference when those entries are absent.
122
+ - `timeout` – customise the provider timeout in seconds.
123
+
124
+ Pass `extra_info={...}` to `evaluate_rubric` when you need structured context quoted in the judge prompt, and set `return_details=True` to receive the full `RewardRubricRunResult` payload (including the provider’s raw response).
125
+
126
+ Remote failures surface as `ProviderRequestError` instances, with `ModelNotFoundError` reserved for missing model identifiers so you can retry with a new snapshot.
127
+
128
+ > Older SDK versions that lack schema parameters automatically fall back to instruction-only JSON; the helper still validates the response payload before returning.
129
+ > Provider model snapshot names change frequently. Check each vendor's dashboard for the latest identifier if you encounter a “model not found” error.
130
+
131
+ ### Provider Architecture
132
+
133
+ All remote integrations live in `osmosis_ai/providers/` and implement the `RubricProvider` interface. At import time the default registry registers OpenAI, xAI, Anthropic, and Google Gemini so `evaluate_rubric` can route requests without additional configuration. The request/response plumbing is encapsulated in each provider module, keeping `evaluate_rubric` focused on prompt construction, payload validation, and credential resolution.
134
+
135
+ Add your own provider by subclassing `RubricProvider`, implementing `run()` with the vendor SDK, and calling `register_provider()` during start-up. A step-by-step guide is available in [`osmosis_ai/providers/README.md`](osmosis_ai/providers/README.md).
136
+
137
+ ## Required Function Signature
138
+
139
+ All functions decorated with `@osmosis_reward` must have exactly this signature:
140
+
141
+ ```python
142
+ @osmosis_reward
143
+ def your_function(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
144
+ # Your reward logic here
145
+ return float_score
146
+ ```
147
+
148
+ ### Parameters
149
+
150
+ - **`solution_str: str`** - The solution string to evaluate (required)
151
+ - **`ground_truth: str`** - The correct/expected answer (required)
152
+ - **`extra_info: dict = None`** - Optional dictionary for additional configuration
153
+
154
+ ### Return Value
155
+
156
+ - **`-> float`** - Must return a float value representing the reward score
157
+
158
+ The decorator will raise a `TypeError` if the function doesn't match this exact signature or doesn't return a float.
159
+
160
+ ## Rubric Function Signature
161
+
162
+ Rubric functions decorated with `@osmosis_rubric` must accept the parameters:
163
+
164
+ - `model_info: dict`
165
+ - `rubric: str`
166
+ - `messages: list`
167
+ - `ground_truth: Optional[str] = None`
168
+ - `system_message: Optional[str] = None`
169
+ - `extra_info: dict = None`
170
+ - `score_min: float = 0.0` *(optional lower bound; must default to 0.0 and stay below `score_max`)*
171
+ - `score_max: float = 1.0` *(optional upper bound; must default to 1.0 and stay above `score_min`)*
172
+
173
+ and must return a `float`. The decorator validates the signature and runtime payload (including message role validation and return type) before delegating to your custom logic.
174
+
175
+ > Required fields: `model_info` must contain non-empty `provider` and `model` string entries.
176
+
177
+ > Annotation quirk: `extra_info` must be annotated as a plain `dict` with a default of `None` to satisfy the validator.
178
+
179
+ > Tip: You can call `evaluate_rubric` from inside a rubric function (or any other orchestrator) to outsource judging to a hosted model while still benefiting from the decorator’s validation.
180
+
181
+ ## Examples
182
+
183
+ See the [`examples/`](examples/) directory for complete examples:
184
+
185
+ ```python
186
+ @osmosis_reward
187
+ def case_insensitive_match(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
188
+ """Case-insensitive string matching with partial credit."""
189
+ match = solution_str.lower().strip() == ground_truth.lower().strip()
190
+
191
+ if extra_info and 'partial_credit' in extra_info:
192
+ if not match and extra_info['partial_credit']:
193
+ len_diff = abs(len(solution_str) - len(ground_truth))
194
+ if len_diff <= 2:
195
+ return 0.5
196
+
197
+ return 1.0 if match else 0.0
198
+
199
+ @osmosis_reward
200
+ def numeric_tolerance(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
201
+ """Numeric comparison with configurable tolerance."""
202
+ try:
203
+ solution_num = float(solution_str.strip())
204
+ truth_num = float(ground_truth.strip())
205
+
206
+ tolerance = extra_info.get('tolerance', 0.01) if extra_info else 0.01
207
+ return 1.0 if abs(solution_num - truth_num) <= tolerance else 0.0
208
+ except ValueError:
209
+ return 0.0
210
+ ```
211
+
212
+ - `examples/rubric_functions.py` demonstrates `evaluate_rubric` with OpenAI, Anthropic, Gemini, and xAI using the schema-enforced SDK integrations.
213
+ - `examples/reward_functions.py` keeps local reward helpers that showcase the decorator contract without external calls.
214
+
215
+ ## Running Examples
216
+
217
+ ```bash
218
+ PYTHONPATH=. python examples/reward_functions.py
219
+ PYTHONPATH=. python examples/rubric_functions.py # Uncomment the provider you need before running
220
+ ```
221
+
222
+ ## Testing
223
+
224
+ Run `python -m pytest tests/test_rubric_eval.py` to exercise the guards that ensure rubric prompts ignore message metadata (for example `tests/test_rubric_eval.py::test_collect_text_skips_metadata_fields`) while still preserving nested tool output. Add additional tests under `tests/` as you extend the library.
225
+
226
+ ## License
227
+
228
+ MIT License - see [LICENSE](LICENSE) file for details.
229
+
230
+ ## Contributing
231
+
232
+ 1. Fork the repository
233
+ 2. Create a feature branch
234
+ 3. Make your changes
235
+ 4. Run tests and examples
236
+ 5. Submit a pull request
237
+
238
+ ## Links
239
+
240
+ - [Homepage](https://github.com/Osmosis-AI/osmosis-sdk-python)
241
+ - [Issues](https://github.com/Osmosis-AI/osmosis-sdk-python/issues)
@@ -0,0 +1,205 @@
1
+ # osmosis-ai
2
+
3
+ A Python library that provides reward and rubric validation helpers for LLM applications with strict type enforcement.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install osmosis-ai
9
+ ```
10
+
11
+ For development:
12
+ ```bash
13
+ git clone https://github.com/Osmosis-AI/osmosis-sdk-python
14
+ cd osmosis-sdk-python
15
+ pip install -e .
16
+ ```
17
+
18
+ ## Quick Start
19
+
20
+ ```python
21
+ from osmosis_ai import osmosis_reward
22
+
23
+ @osmosis_reward
24
+ def simple_reward(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
25
+ """Basic exact match reward function."""
26
+ return 1.0 if solution_str.strip() == ground_truth.strip() else 0.0
27
+
28
+ # Use the reward function
29
+ score = simple_reward("hello world", "hello world") # Returns 1.0
30
+ ```
31
+
32
+ ```python
33
+ from osmosis_ai import evaluate_rubric
34
+
35
+ messages = [
36
+ {
37
+ "type": "message",
38
+ "role": "user",
39
+ "content": [{"type": "input_text", "text": "What is the capital of France?"}],
40
+ },
41
+ {
42
+ "type": "message",
43
+ "role": "assistant",
44
+ "content": [{"type": "output_text", "text": "The capital of France is Paris."}],
45
+ },
46
+ ]
47
+
48
+ # Export OPENAI_API_KEY in your shell before running this snippet.
49
+ rubric_score = evaluate_rubric(
50
+ rubric="Assistant must mention the verified capital city.",
51
+ messages=messages,
52
+ model_info={
53
+ "provider": "openai",
54
+ "model": "gpt-5",
55
+ "api_key_env": "OPENAI_API_KEY",
56
+ },
57
+ ground_truth="Paris",
58
+ )
59
+
60
+ print(rubric_score) # -> 1.0 (full payload available via return_details=True)
61
+ ```
62
+
63
+ ## Remote Rubric Evaluation
64
+
65
+ `evaluate_rubric` talks to each provider through its official Python SDK while enforcing the same JSON schema everywhere:
66
+
67
+ - **OpenAI / xAI** – Uses `OpenAI(...).responses.create` (or `chat.completions.create`) with `response_format={"type": "json_schema"}` and falls back to `json_object` when needed.
68
+ - **Anthropic** – Forces a tool call with a JSON schema via `Anthropic(...).messages.create`, extracting the returned tool arguments.
69
+ - **Google Gemini** – Invokes `google.genai.Client(...).models.generate_content` with `response_mime_type="application/json"` and `response_schema`.
70
+
71
+ Every provider therefore returns a strict JSON object with `{"score": number, "explanation": string}`. The helper clamps the score into your configured range, validates the structure, and exposes the raw payload when `return_details=True`.
72
+
73
+ Credentials are resolved from environment variables by default:
74
+
75
+ - `OPENAI_API_KEY` for OpenAI
76
+ - `ANTHROPIC_API_KEY` for Anthropic
77
+ - `GOOGLE_API_KEY` for Google Gemini
78
+ - `XAI_API_KEY` for xAI
79
+
80
+ Override the environment variable name with `model_info={"api_key_env": "CUSTOM_ENV_NAME"}` when needed, or supply an inline secret with `model_info={"api_key": "sk-..."}` for ephemeral credentials. Missing API keys raise a `MissingAPIKeyError` that explains how to export the secret before trying again.
81
+
82
+ `model_info` accepts additional rubric-specific knobs:
83
+
84
+ - `score_min` / `score_max` – change the default `[0.0, 1.0]` scoring bounds.
85
+ - `system_prompt` / `original_input` – override the helper’s transcript inference when those entries are absent.
86
+ - `timeout` – customise the provider timeout in seconds.
87
+
88
+ Pass `extra_info={...}` to `evaluate_rubric` when you need structured context quoted in the judge prompt, and set `return_details=True` to receive the full `RewardRubricRunResult` payload (including the provider’s raw response).
89
+
90
+ Remote failures surface as `ProviderRequestError` instances, with `ModelNotFoundError` reserved for missing model identifiers so you can retry with a new snapshot.
91
+
92
+ > Older SDK versions that lack schema parameters automatically fall back to instruction-only JSON; the helper still validates the response payload before returning.
93
+ > Provider model snapshot names change frequently. Check each vendor's dashboard for the latest identifier if you encounter a “model not found” error.
94
+
95
+ ### Provider Architecture
96
+
97
+ All remote integrations live in `osmosis_ai/providers/` and implement the `RubricProvider` interface. At import time the default registry registers OpenAI, xAI, Anthropic, and Google Gemini so `evaluate_rubric` can route requests without additional configuration. The request/response plumbing is encapsulated in each provider module, keeping `evaluate_rubric` focused on prompt construction, payload validation, and credential resolution.
98
+
99
+ Add your own provider by subclassing `RubricProvider`, implementing `run()` with the vendor SDK, and calling `register_provider()` during start-up. A step-by-step guide is available in [`osmosis_ai/providers/README.md`](osmosis_ai/providers/README.md).
100
+
101
+ ## Required Function Signature
102
+
103
+ All functions decorated with `@osmosis_reward` must have exactly this signature:
104
+
105
+ ```python
106
+ @osmosis_reward
107
+ def your_function(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
108
+ # Your reward logic here
109
+ return float_score
110
+ ```
111
+
112
+ ### Parameters
113
+
114
+ - **`solution_str: str`** - The solution string to evaluate (required)
115
+ - **`ground_truth: str`** - The correct/expected answer (required)
116
+ - **`extra_info: dict = None`** - Optional dictionary for additional configuration
117
+
118
+ ### Return Value
119
+
120
+ - **`-> float`** - Must return a float value representing the reward score
121
+
122
+ The decorator will raise a `TypeError` if the function doesn't match this exact signature or doesn't return a float.
123
+
124
+ ## Rubric Function Signature
125
+
126
+ Rubric functions decorated with `@osmosis_rubric` must accept the parameters:
127
+
128
+ - `model_info: dict`
129
+ - `rubric: str`
130
+ - `messages: list`
131
+ - `ground_truth: Optional[str] = None`
132
+ - `system_message: Optional[str] = None`
133
+ - `extra_info: dict = None`
134
+ - `score_min: float = 0.0` *(optional lower bound; must default to 0.0 and stay below `score_max`)*
135
+ - `score_max: float = 1.0` *(optional upper bound; must default to 1.0 and stay above `score_min`)*
136
+
137
+ and must return a `float`. The decorator validates the signature and runtime payload (including message role validation and return type) before delegating to your custom logic.
138
+
139
+ > Required fields: `model_info` must contain non-empty `provider` and `model` string entries.
140
+
141
+ > Annotation quirk: `extra_info` must be annotated as a plain `dict` with a default of `None` to satisfy the validator.
142
+
143
+ > Tip: You can call `evaluate_rubric` from inside a rubric function (or any other orchestrator) to outsource judging to a hosted model while still benefiting from the decorator’s validation.
144
+
145
+ ## Examples
146
+
147
+ See the [`examples/`](examples/) directory for complete examples:
148
+
149
+ ```python
150
+ @osmosis_reward
151
+ def case_insensitive_match(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
152
+ """Case-insensitive string matching with partial credit."""
153
+ match = solution_str.lower().strip() == ground_truth.lower().strip()
154
+
155
+ if extra_info and 'partial_credit' in extra_info:
156
+ if not match and extra_info['partial_credit']:
157
+ len_diff = abs(len(solution_str) - len(ground_truth))
158
+ if len_diff <= 2:
159
+ return 0.5
160
+
161
+ return 1.0 if match else 0.0
162
+
163
+ @osmosis_reward
164
+ def numeric_tolerance(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
165
+ """Numeric comparison with configurable tolerance."""
166
+ try:
167
+ solution_num = float(solution_str.strip())
168
+ truth_num = float(ground_truth.strip())
169
+
170
+ tolerance = extra_info.get('tolerance', 0.01) if extra_info else 0.01
171
+ return 1.0 if abs(solution_num - truth_num) <= tolerance else 0.0
172
+ except ValueError:
173
+ return 0.0
174
+ ```
175
+
176
+ - `examples/rubric_functions.py` demonstrates `evaluate_rubric` with OpenAI, Anthropic, Gemini, and xAI using the schema-enforced SDK integrations.
177
+ - `examples/reward_functions.py` keeps local reward helpers that showcase the decorator contract without external calls.
178
+
179
+ ## Running Examples
180
+
181
+ ```bash
182
+ PYTHONPATH=. python examples/reward_functions.py
183
+ PYTHONPATH=. python examples/rubric_functions.py # Uncomment the provider you need before running
184
+ ```
185
+
186
+ ## Testing
187
+
188
+ Run `python -m pytest tests/test_rubric_eval.py` to exercise the guards that ensure rubric prompts ignore message metadata (for example `tests/test_rubric_eval.py::test_collect_text_skips_metadata_fields`) while still preserving nested tool output. Add additional tests under `tests/` as you extend the library.
189
+
190
+ ## License
191
+
192
+ MIT License - see [LICENSE](LICENSE) file for details.
193
+
194
+ ## Contributing
195
+
196
+ 1. Fork the repository
197
+ 2. Create a feature branch
198
+ 3. Make your changes
199
+ 4. Run tests and examples
200
+ 5. Submit a pull request
201
+
202
+ ## Links
203
+
204
+ - [Homepage](https://github.com/Osmosis-AI/osmosis-sdk-python)
205
+ - [Issues](https://github.com/Osmosis-AI/osmosis-sdk-python/issues)
@@ -0,0 +1,24 @@
1
+ """
2
+ osmosis-ai: A Python library for reward function validation with strict type enforcement.
3
+
4
+ This library provides decorators such as @osmosis_reward and @osmosis_rubric that
5
+ enforce standardized function signatures for LLM-centric workflows.
6
+
7
+ Features:
8
+ - Type-safe reward function decoration
9
+ - Parameter name and type validation
10
+ - Support for optional configuration parameters
11
+ """
12
+
13
+ from .rubric_eval import MissingAPIKeyError, evaluate_rubric
14
+ from .rubric_types import ModelNotFoundError, ProviderRequestError
15
+ from .utils import osmosis_reward, osmosis_rubric
16
+
17
+ __all__ = [
18
+ "osmosis_reward",
19
+ "osmosis_rubric",
20
+ "evaluate_rubric",
21
+ "MissingAPIKeyError",
22
+ "ProviderRequestError",
23
+ "ModelNotFoundError",
24
+ ]
@@ -1,3 +1,3 @@
1
1
  # package metadata
2
2
  package_name = "osmosis-ai"
3
- package_version = "0.2.0"
3
+ package_version = "0.2.2"
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Tuple
4
+
5
+ from .anthropic_provider import AnthropicProvider
6
+ from .base import DEFAULT_REQUEST_TIMEOUT_SECONDS, ProviderRegistry, ProviderRequest, RubricProvider
7
+ from .gemini_provider import GeminiProvider
8
+ from .openai_family import OpenAIProvider, XAIProvider
9
+
10
+ _REGISTRY = ProviderRegistry()
11
+ _REGISTRY.register(OpenAIProvider())
12
+ _REGISTRY.register(XAIProvider())
13
+ _REGISTRY.register(AnthropicProvider())
14
+ _REGISTRY.register(GeminiProvider())
15
+
16
+
17
+ def get_provider(name: str) -> RubricProvider:
18
+ return _REGISTRY.get(name)
19
+
20
+
21
+ def register_provider(provider: RubricProvider) -> None:
22
+ _REGISTRY.register(provider)
23
+
24
+
25
+ def supported_providers() -> Tuple[str, ...]:
26
+ return _REGISTRY.supported_providers()
27
+
28
+
29
+ __all__ = [
30
+ "DEFAULT_REQUEST_TIMEOUT_SECONDS",
31
+ "ProviderRequest",
32
+ "RubricProvider",
33
+ "get_provider",
34
+ "register_provider",
35
+ "supported_providers",
36
+ ]
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict
4
+
5
+ try: # pragma: no cover - optional dependency
6
+ import anthropic # type: ignore
7
+ from anthropic import APIError # type: ignore
8
+ except ImportError: # pragma: no cover - optional dependency
9
+ anthropic = None # type: ignore[assignment]
10
+ APIError = None # type: ignore[assignment]
11
+
12
+ from ..rubric_types import ModelNotFoundError, ProviderRequestError, RewardRubricRunResult
13
+ from .base import DEFAULT_REQUEST_TIMEOUT_SECONDS, ProviderRequest, RubricProvider
14
+ from .shared import dump_model, extract_structured_score, reward_schema_definition
15
+
16
+
17
+ class AnthropicProvider(RubricProvider):
18
+ name = "anthropic"
19
+
20
+ def default_timeout(self, model: str) -> float:
21
+ return DEFAULT_REQUEST_TIMEOUT_SECONDS
22
+
23
+ def run(self, request: ProviderRequest) -> RewardRubricRunResult:
24
+ if anthropic is None or APIError is None:
25
+ raise ProviderRequestError(
26
+ self.name,
27
+ request.model,
28
+ "Anthropic SDK is required. Install it via `pip install anthropic`.",
29
+ )
30
+
31
+ client = anthropic.Anthropic(api_key=request.api_key)
32
+ tool_name = "emit_reward_rubric_response"
33
+ schema_definition = reward_schema_definition()
34
+ tool = {
35
+ "name": tool_name,
36
+ "description": "Return the reward rubric score and explanation as structured JSON.",
37
+ "input_schema": schema_definition,
38
+ }
39
+
40
+ try:
41
+ response = client.messages.create(
42
+ model=request.model,
43
+ system=request.system_content,
44
+ messages=[{"role": "user", "content": [{"type": "text", "text": request.user_content}]}],
45
+ tools=[tool],
46
+ tool_choice={"type": "tool", "name": tool_name},
47
+ max_tokens=512,
48
+ temperature=0,
49
+ timeout=request.timeout,
50
+ )
51
+ except APIError as err:
52
+ detail = getattr(err, "message", None)
53
+ if not isinstance(detail, str) or not detail.strip():
54
+ detail = str(err)
55
+ status_code = getattr(err, "status_code", None)
56
+ if status_code == 404:
57
+ not_found_detail = (
58
+ f"Model '{request.model}' was not found. Confirm your Anthropic account has access "
59
+ "to the requested snapshot or update the model identifier."
60
+ )
61
+ raise ModelNotFoundError(self.name, request.model, not_found_detail) from err
62
+ raise ProviderRequestError(self.name, request.model, detail) from err
63
+ except Exception as err:
64
+ detail = str(err).strip() or "Unexpected error during Anthropic request."
65
+ raise ProviderRequestError(self.name, request.model, detail) from err
66
+
67
+ raw = dump_model(response)
68
+
69
+ payload: Dict[str, Any] | None = None
70
+ content_blocks = raw.get("content") if isinstance(raw, dict) else None
71
+ if isinstance(content_blocks, list):
72
+ for block in content_blocks:
73
+ if isinstance(block, dict) and block.get("type") == "tool_use" and block.get("name") == tool_name:
74
+ maybe_input = block.get("input")
75
+ if isinstance(maybe_input, dict):
76
+ payload = maybe_input
77
+ break
78
+ if payload is None:
79
+ raise ProviderRequestError(self.name, request.model, "Model response missing expected tool output.")
80
+ score, explanation = extract_structured_score(payload)
81
+ bounded = max(request.score_min, min(request.score_max, score))
82
+ return {"score": bounded, "explanation": explanation, "raw": raw}
83
+
84
+
85
+ __all__ = ["AnthropicProvider"]
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Tuple
5
+
6
+ from ..rubric_types import RewardRubricRunResult
7
+
8
+ DEFAULT_REQUEST_TIMEOUT_SECONDS = 30.0
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class ProviderRequest:
13
+ provider: str
14
+ model: str
15
+ api_key: str
16
+ system_content: str
17
+ user_content: str
18
+ score_min: float
19
+ score_max: float
20
+ timeout: float
21
+
22
+
23
+ class RubricProvider:
24
+ """Interface for hosted LLM providers that can score rubrics."""
25
+
26
+ name: str
27
+
28
+ def default_timeout(self, model: str) -> float:
29
+ return DEFAULT_REQUEST_TIMEOUT_SECONDS
30
+
31
+ def run(self, request: ProviderRequest) -> RewardRubricRunResult:
32
+ raise NotImplementedError
33
+
34
+
35
+ class ProviderRegistry:
36
+ def __init__(self) -> None:
37
+ self._providers: Dict[str, RubricProvider] = {}
38
+
39
+ def register(self, provider: RubricProvider) -> None:
40
+ key = provider.name
41
+ if key in self._providers:
42
+ raise ValueError(f"Provider '{key}' is already registered.")
43
+ self._providers[key] = provider
44
+
45
+ def get(self, name: str) -> RubricProvider:
46
+ try:
47
+ return self._providers[name]
48
+ except KeyError as exc:
49
+ raise ValueError(f"Unsupported provider '{name}'.") from exc
50
+
51
+ def supported_providers(self) -> Tuple[str, ...]:
52
+ return tuple(sorted(self._providers))
53
+
54
+
55
+ __all__ = [
56
+ "DEFAULT_REQUEST_TIMEOUT_SECONDS",
57
+ "ProviderRequest",
58
+ "RubricProvider",
59
+ "ProviderRegistry",
60
+ ]