osmosis-ai 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. osmosis_ai-0.2.5/LICENSE +21 -0
  2. osmosis_ai-0.2.5/MANIFEST.in +18 -0
  3. osmosis_ai-0.2.5/PKG-INFO +314 -0
  4. osmosis_ai-0.2.5/README.md +269 -0
  5. osmosis_ai-0.2.5/osmosis_ai/__init__.py +24 -0
  6. osmosis_ai-0.2.5/osmosis_ai/cli.py +50 -0
  7. osmosis_ai-0.2.5/osmosis_ai/cli_commands.py +181 -0
  8. osmosis_ai-0.2.5/osmosis_ai/cli_services/__init__.py +60 -0
  9. osmosis_ai-0.2.5/osmosis_ai/cli_services/config.py +410 -0
  10. osmosis_ai-0.2.5/osmosis_ai/cli_services/dataset.py +175 -0
  11. osmosis_ai-0.2.5/osmosis_ai/cli_services/engine.py +421 -0
  12. osmosis_ai-0.2.5/osmosis_ai/cli_services/errors.py +7 -0
  13. osmosis_ai-0.2.5/osmosis_ai/cli_services/reporting.py +307 -0
  14. osmosis_ai-0.2.5/osmosis_ai/cli_services/session.py +174 -0
  15. osmosis_ai-0.2.5/osmosis_ai/cli_services/shared.py +209 -0
  16. osmosis_ai-0.2.5/osmosis_ai/consts.py +3 -0
  17. osmosis_ai-0.2.5/osmosis_ai/providers/__init__.py +36 -0
  18. osmosis_ai-0.2.5/osmosis_ai/providers/anthropic_provider.py +85 -0
  19. osmosis_ai-0.2.5/osmosis_ai/providers/base.py +60 -0
  20. osmosis_ai-0.2.5/osmosis_ai/providers/gemini_provider.py +314 -0
  21. osmosis_ai-0.2.5/osmosis_ai/providers/openai_family.py +607 -0
  22. osmosis_ai-0.2.5/osmosis_ai/providers/shared.py +92 -0
  23. osmosis_ai-0.2.5/osmosis_ai/rubric_eval.py +356 -0
  24. osmosis_ai-0.2.5/osmosis_ai/rubric_types.py +49 -0
  25. osmosis_ai-0.2.5/osmosis_ai/utils.py +187 -0
  26. osmosis_ai-0.2.5/osmosis_ai.egg-info/SOURCES.txt +31 -0
  27. osmosis_ai-0.2.5/pyproject.toml +42 -0
  28. osmosis_ai-0.2.5/pytest.ini +4 -0
  29. osmosis_ai-0.2.5/requirements.txt +12 -0
  30. osmosis_ai-0.2.5/setup.cfg +4 -0
  31. osmosis_ai-0.2.5/setup_env.bat +25 -0
  32. osmosis_ai-0.2.5/tests/test_cli.py +528 -0
  33. osmosis_ai-0.2.5/tests/test_cli_services.py +400 -0
  34. osmosis_ai-0.2.5/tests/test_rubric_eval.py +40 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Gulp AI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,18 @@
1
+ include LICENSE
2
+ include README.md
3
+ include pyproject.toml
4
+ include requirements.txt
5
+ include pytest.ini
6
+ include .env.sample
7
+ include setup_env.sh
8
+ include setup_env.bat
9
+
10
+ recursive-exclude __pycache__ *
11
+ recursive-exclude *.py[cod] *
12
+ recursive-exclude venv *
13
+ recursive-exclude .pytest_cache *
14
+ recursive-exclude .git *
15
+ recursive-exclude .github *
16
+ recursive-exclude osmosis_ai.egg-info *
17
+
18
+ prune examples
@@ -0,0 +1,314 @@
1
+ Metadata-Version: 2.4
2
+ Name: osmosis-ai
3
+ Version: 0.2.5
4
+ Summary: A Python library for reward function validation with strict type enforcement.
5
+ Author-email: Osmosis AI <jake@osmosis.ai>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Gulp AI
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Project-URL: Homepage, https://github.com/Osmosis-AI/osmosis-sdk-python
28
+ Project-URL: Issues, https://github.com/Osmosis-AI/osmosis-sdk-python/issues
29
+ Classifier: Programming Language :: Python :: 3
30
+ Classifier: License :: OSI Approved :: MIT License
31
+ Classifier: Operating System :: OS Independent
32
+ Requires-Python: >=3.9
33
+ Description-Content-Type: text/markdown
34
+ License-File: LICENSE
35
+ Requires-Dist: PyYAML<7.0,>=6.0
36
+ Requires-Dist: python-dotenv<2.0.0,>=0.1.0
37
+ Requires-Dist: requests<3.0.0,>=2.0.0
38
+ Requires-Dist: xxhash<4.0.0,>=3.0.0
39
+ Requires-Dist: anthropic<0.50.0,>=0.36.0
40
+ Requires-Dist: openai>=2.0.0
41
+ Requires-Dist: google-genai>=1.0.0
42
+ Requires-Dist: xai-sdk>=1.2.0
43
+ Requires-Dist: tqdm<5.0.0,>=4.0.0
44
+ Dynamic: license-file
45
+
46
+ # osmosis-ai
47
+
48
+ A Python library that provides reward and rubric validation helpers for LLM applications with strict type enforcement.
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ pip install osmosis-ai
54
+ ```
55
+
56
+ Requires Python 3.9 or newer.
57
+
58
+ This installs the Osmosis CLI and pulls in the required provider SDKs (`openai`, `anthropic`, `google-genai`, `xai-sdk`) along with supporting utilities such as `PyYAML`, `python-dotenv`, `requests`, and `xxhash`.
59
+
60
+ For development:
61
+ ```bash
62
+ git clone https://github.com/Osmosis-AI/osmosis-sdk-python
63
+ cd osmosis-sdk-python
64
+ pip install -e .
65
+ ```
66
+
67
+ ## Quick Start
68
+
69
+ ```python
70
+ from osmosis_ai import osmosis_reward
71
+
72
+ @osmosis_reward
73
+ def simple_reward(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
74
+ """Basic exact match reward function."""
75
+ return 1.0 if solution_str.strip() == ground_truth.strip() else 0.0
76
+
77
+ # Use the reward function
78
+ score = simple_reward("hello world", "hello world") # Returns 1.0
79
+ ```
80
+
81
+ ```python
82
+ from osmosis_ai import evaluate_rubric
83
+
84
+ solution = "The capital of France is Paris."
85
+
86
+ # Export OPENAI_API_KEY in your shell before running this snippet.
87
+ rubric_score = evaluate_rubric(
88
+ rubric="Assistant must mention the verified capital city.",
89
+ solution_str=solution,
90
+ model_info={
91
+ "provider": "openai",
92
+ "model": "gpt-5",
93
+ "api_key_env": "OPENAI_API_KEY",
94
+ },
95
+ ground_truth="Paris",
96
+ )
97
+
98
+ print(rubric_score) # -> 1.0 (full payload available via return_details=True)
99
+ ```
100
+
101
+ ## Remote Rubric Evaluation
102
+
103
+ `evaluate_rubric` talks to each provider through its official Python SDK while enforcing the same JSON schema everywhere:
104
+
105
+ - **OpenAI / xAI** – Uses `OpenAI(...).responses.create` (or `chat.completions.create`) with `response_format={"type": "json_schema"}` and falls back to `json_object` when needed.
106
+ - **Anthropic** – Forces a tool call with a JSON schema via `Anthropic(...).messages.create`, extracting the returned tool arguments.
107
+ - **Google Gemini** – Invokes `google.genai.Client(...).models.generate_content` with `response_mime_type="application/json"` and `response_schema`.
108
+
109
+ Every provider therefore returns a strict JSON object with `{"score": number, "explanation": string}`. The helper clamps the score into your configured range, validates the structure, and exposes the raw payload when `return_details=True`.
110
+
111
+ Credentials are resolved from environment variables by default:
112
+
113
+ - `OPENAI_API_KEY` for OpenAI
114
+ - `ANTHROPIC_API_KEY` for Anthropic
115
+ - `GOOGLE_API_KEY` for Google Gemini
116
+ - `XAI_API_KEY` for xAI
117
+
118
+ Override the environment variable name with `model_info={"api_key_env": "CUSTOM_ENV_NAME"}` when needed, or supply an inline secret with `model_info={"api_key": "sk-..."}` for ephemeral credentials. Missing API keys raise a `MissingAPIKeyError` that explains how to export the secret before trying again.
119
+
120
+ `api_key` and `api_key_env` are mutually exclusive ways to provide the same credential. When `api_key` is present and non-empty it is used directly, skipping any environment lookup. Otherwise the resolver falls back to `api_key_env` (or the provider default) and pulls the value from your local environment with `os.getenv`.
121
+
122
+ `model_info` accepts additional rubric-specific knobs:
123
+
124
+ - `score_min` / `score_max` – change the default `[0.0, 1.0]` scoring bounds.
125
+ - `system_prompt` / `original_input` – provide optional context strings that will be quoted in the judging prompt.
126
+ - `timeout` – customise the provider timeout in seconds.
127
+
128
+ Pass `metadata={...}` to `evaluate_rubric` when you need structured context quoted in the judge prompt, and set `return_details=True` to receive the full `RewardRubricRunResult` payload (including the provider’s raw response).
129
+
130
+ Remote failures surface as `ProviderRequestError` instances, with `ModelNotFoundError` reserved for missing model identifiers so you can retry with a new snapshot.
131
+
132
+ > Older SDK versions that lack schema parameters automatically fall back to instruction-only JSON; the helper still validates the response payload before returning.
133
+ > Provider model snapshot names change frequently. Check each vendor's dashboard for the latest identifier if you encounter a “model not found” error.
134
+
135
+ ### Provider Architecture
136
+
137
+ All remote integrations live in `osmosis_ai/providers/` and implement the `RubricProvider` interface. At import time the default registry registers OpenAI, xAI, Anthropic, and Google Gemini so `evaluate_rubric` can route requests without additional configuration. The request/response plumbing is encapsulated in each provider module, keeping `evaluate_rubric` focused on prompt construction, payload validation, and credential resolution.
138
+
139
+ Add your own provider by subclassing `RubricProvider`, implementing `run()` with the vendor SDK, and calling `register_provider()` during start-up. A step-by-step guide is available in [`osmosis_ai/providers/README.md`](osmosis_ai/providers/README.md).
140
+
141
+ ## Required Function Signature
142
+
143
+ All functions decorated with `@osmosis_reward` must have exactly this signature:
144
+
145
+ ```python
146
+ @osmosis_reward
147
+ def your_function(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
148
+ # Your reward logic here
149
+ return float_score
150
+ ```
151
+
152
+ ### Parameters
153
+
154
+ - **`solution_str: str`** - The solution string to evaluate (required)
155
+ - **`ground_truth: str`** - The correct/expected answer (required)
156
+ - **`extra_info: dict = None`** - Optional dictionary for additional configuration
157
+
158
+ ### Return Value
159
+
160
+ - **`-> float`** - Must return a float value representing the reward score
161
+
162
+ The decorator will raise a `TypeError` if the function doesn't match this exact signature or doesn't return a float.
163
+
164
+ ## Rubric Function Signature
165
+
166
+ Rubric functions decorated with `@osmosis_rubric` must match this signature:
167
+
168
+ ```python
169
+ @osmosis_rubric
170
+ def your_rubric(solution_str: str, ground_truth: str | None, extra_info: dict) -> float:
171
+ # Your rubric logic here
172
+ return float_score
173
+ ```
174
+
175
+ > The runtime forwards `None` for `ground_truth` when no reference answer exists. Annotate the parameter as `Optional[str]` (or handle `None` explicitly) if your rubric logic expects to run in that scenario.
176
+
177
+ ### Required `extra_info` fields
178
+
179
+ - **`provider`** – Non-empty string identifying the judge provider.
180
+ - **`model`** – Non-empty string naming the provider model to call.
181
+ - **`rubric`** – Natural-language rubric instructions for the judge model.
182
+ - **`api_key` / `api_key_env`** – Supply either the raw key or the environment variable name that exposes it.
183
+
184
+ ### Optional `extra_info` fields
185
+
186
+ - **`system_prompt`** – Optional string prepended to the provider’s base system prompt when invoking the judge; include it inside `extra_info` rather than as a separate argument.
187
+ - **`score_min` / `score_max`** – Optional numeric overrides for the expected score range.
188
+ - **`model_info_overrides`** – Optional dict merged into the provider configuration passed to the judge.
189
+
190
+ Additional keys are passthrough and can be used for custom configuration. If you need to extend the provider payload (for example adding `api_key_env`), add a dict under `model_info_overrides` and it will be merged with the required `provider`/`model` pair before invoking `evaluate_rubric`. The decorator enforces the parameter names/annotations, validates the embedded configuration at call time, and ensures the wrapped function returns a `float`.
191
+
192
+ > Annotation quirk: `extra_info` must be annotated as `dict` **without** a default value, unlike `@osmosis_reward`.
193
+
194
+ > Tip: When delegating to `evaluate_rubric`, pass the raw `solution_str` directly and include any extra context inside the `metadata` payload.
195
+
196
+ ## Examples
197
+
198
+ See the [`examples/`](examples/) directory for complete examples:
199
+
200
+ ```python
201
+ @osmosis_reward
202
+ def case_insensitive_match(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
203
+ """Case-insensitive string matching with partial credit."""
204
+ match = solution_str.lower().strip() == ground_truth.lower().strip()
205
+
206
+ if extra_info and 'partial_credit' in extra_info:
207
+ if not match and extra_info['partial_credit']:
208
+ len_diff = abs(len(solution_str) - len(ground_truth))
209
+ if len_diff <= 2:
210
+ return 0.5
211
+
212
+ return 1.0 if match else 0.0
213
+
214
+ @osmosis_reward
215
+ def numeric_tolerance(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
216
+ """Numeric comparison with configurable tolerance."""
217
+ try:
218
+ solution_num = float(solution_str.strip())
219
+ truth_num = float(ground_truth.strip())
220
+
221
+ tolerance = extra_info.get('tolerance', 0.01) if extra_info else 0.01
222
+ return 1.0 if abs(solution_num - truth_num) <= tolerance else 0.0
223
+ except ValueError:
224
+ return 0.0
225
+ ```
226
+
227
+ - `examples/rubric_functions.py` demonstrates `evaluate_rubric` with OpenAI, Anthropic, Gemini, and xAI using the schema-enforced SDK integrations.
228
+ - `examples/reward_functions.py` keeps local reward helpers that showcase the decorator contract without external calls.
229
+ - `examples/rubric_configs.yaml` bundles two rubric definitions with provider configuration and scoring bounds.
230
+ - `examples/sample_data.jsonl` contains two rubric-aligned solution strings so you can trial dataset validation.
231
+
232
+ ```yaml
233
+ # examples/rubric_configs.yaml (excerpt)
234
+ version: 1
235
+ rubrics:
236
+ - id: support_followup
237
+ model_info:
238
+ provider: openai
239
+ model: gpt-5-mini
240
+ api_key_env: OPENAI_API_KEY
241
+ ```
242
+
243
+ ```jsonl
244
+ {"conversation_id": "ticket-001", "rubric_id": "support_followup", "original_input": "...", "solution_str": "..."}
245
+ {"conversation_id": "ticket-047", "rubric_id": "policy_grounding", "original_input": "...", "solution_str": "..."}
246
+ ```
247
+
248
+ ## CLI Tools
249
+
250
+ Installing the SDK also provides a lightweight CLI available as `osmosis` (aliases: `osmosis_ai`, `osmosis-ai`) for inspecting rubric YAML files and JSONL test payloads.
251
+
252
+ Preview a rubric file and print every configuration discovered, including nested entries:
253
+
254
+ ```bash
255
+ osmosis preview --path path/to/rubric.yaml
256
+ ```
257
+
258
+ Preview a dataset of rubric-scored solutions stored as JSONL:
259
+
260
+ ```bash
261
+ osmosis preview --path path/to/data.jsonl
262
+ ```
263
+
264
+ Evaluate a dataset against a hosted rubric configuration and print the returned scores:
265
+
266
+ ```bash
267
+ osmosis eval --rubric support_followup --data examples/sample_data.jsonl
268
+ ```
269
+
270
+ - Supply the dataset with `-d`/`--data path/to/data.jsonl`; the path is resolved relative to the current working directory.
271
+ - Use `--config path/to/rubric_configs.yaml` when the rubric definitions are not located alongside the dataset.
272
+ - Pass `-n`/`--number` to sample the provider multiple times per record; the CLI prints every run along with aggregate statistics (average, variance, standard deviation, and min/max).
273
+ - Provide `--output path/to/dir` to create the directory (if needed) and emit `rubric_eval_result_<unix_timestamp>.json`, or supply a full file path (any extension) to control the filename; each file captures every run, provider payloads, timestamps, and aggregate statistics for downstream analysis.
274
+ - Skip `--output` to collect results under `~/.cache/osmosis/eval_result/<rubric_id>/rubric_eval_result_<identifier>.json`; the CLI writes this JSON whether the evaluation finishes cleanly or hits provider/runtime errors so you can inspect failures later (only a manual Ctrl+C interrupt leaves no file behind).
275
+ - Dataset rows whose `rubric_id` does not match the requested rubric are skipped automatically.
276
+ - Each dataset record must provide a non-empty `solution_str`; optional fields such as `original_input`, `ground_truth`, and `extra_info` travel with the record and are forwarded to the evaluator when present.
277
+ - When delegating to a custom `@osmosis_rubric` function, the CLI enriches `extra_info` with the active `provider`, `model`, `rubric`, score bounds, any configured `system_prompt`, the resolved `original_input`, and the record’s metadata/extra fields so the decorator’s required entries are always present.
278
+ - Rubric configuration files intentionally reject `extra_info`; provide per-example context through the dataset instead.
279
+
280
+ Both commands validate the file, echo a short summary (`Loaded <n> ...`), and pretty-print the parsed records so you can confirm that new rubrics or test fixtures look correct before committing them. Invalid files raise a descriptive error and exit with a non-zero status code.
281
+
282
+ ## Running Examples
283
+
284
+ ```bash
285
+ PYTHONPATH=. python examples/reward_functions.py
286
+ PYTHONPATH=. python examples/rubric_functions.py # Uncomment the provider you need before running
287
+ ```
288
+
289
+ ## Testing
290
+
291
+ Run `python -m pytest` (or any subset under `tests/`) to exercise the updated helpers:
292
+
293
+ - `tests/test_rubric_eval.py` covers prompt construction for `solution_str` evaluations.
294
+ - `tests/test_cli_services.py` validates dataset parsing, extra-info enrichment, and engine interactions.
295
+ - `tests/test_cli.py` ensures the CLI pathways surface the new fields end to end.
296
+
297
+ Add additional tests under `tests/` as you extend the library.
298
+
299
+ ## License
300
+
301
+ MIT License - see [LICENSE](LICENSE) file for details.
302
+
303
+ ## Contributing
304
+
305
+ 1. Fork the repository
306
+ 2. Create a feature branch
307
+ 3. Make your changes
308
+ 4. Run tests and examples
309
+ 5. Submit a pull request
310
+
311
+ ## Links
312
+
313
+ - [Homepage](https://github.com/Osmosis-AI/osmosis-sdk-python)
314
+ - [Issues](https://github.com/Osmosis-AI/osmosis-sdk-python/issues)
@@ -0,0 +1,269 @@
1
+ # osmosis-ai
2
+
3
+ A Python library that provides reward and rubric validation helpers for LLM applications with strict type enforcement.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install osmosis-ai
9
+ ```
10
+
11
+ Requires Python 3.9 or newer.
12
+
13
+ This installs the Osmosis CLI and pulls in the required provider SDKs (`openai`, `anthropic`, `google-genai`, `xai-sdk`) along with supporting utilities such as `PyYAML`, `python-dotenv`, `requests`, and `xxhash`.
14
+
15
+ For development:
16
+ ```bash
17
+ git clone https://github.com/Osmosis-AI/osmosis-sdk-python
18
+ cd osmosis-sdk-python
19
+ pip install -e .
20
+ ```
21
+
22
+ ## Quick Start
23
+
24
+ ```python
25
+ from osmosis_ai import osmosis_reward
26
+
27
+ @osmosis_reward
28
+ def simple_reward(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
29
+ """Basic exact match reward function."""
30
+ return 1.0 if solution_str.strip() == ground_truth.strip() else 0.0
31
+
32
+ # Use the reward function
33
+ score = simple_reward("hello world", "hello world") # Returns 1.0
34
+ ```
35
+
36
+ ```python
37
+ from osmosis_ai import evaluate_rubric
38
+
39
+ solution = "The capital of France is Paris."
40
+
41
+ # Export OPENAI_API_KEY in your shell before running this snippet.
42
+ rubric_score = evaluate_rubric(
43
+ rubric="Assistant must mention the verified capital city.",
44
+ solution_str=solution,
45
+ model_info={
46
+ "provider": "openai",
47
+ "model": "gpt-5",
48
+ "api_key_env": "OPENAI_API_KEY",
49
+ },
50
+ ground_truth="Paris",
51
+ )
52
+
53
+ print(rubric_score) # -> 1.0 (full payload available via return_details=True)
54
+ ```
55
+
56
+ ## Remote Rubric Evaluation
57
+
58
+ `evaluate_rubric` talks to each provider through its official Python SDK while enforcing the same JSON schema everywhere:
59
+
60
+ - **OpenAI / xAI** – Uses `OpenAI(...).responses.create` (or `chat.completions.create`) with `response_format={"type": "json_schema"}` and falls back to `json_object` when needed.
61
+ - **Anthropic** – Forces a tool call with a JSON schema via `Anthropic(...).messages.create`, extracting the returned tool arguments.
62
+ - **Google Gemini** – Invokes `google.genai.Client(...).models.generate_content` with `response_mime_type="application/json"` and `response_schema`.
63
+
64
+ Every provider therefore returns a strict JSON object with `{"score": number, "explanation": string}`. The helper clamps the score into your configured range, validates the structure, and exposes the raw payload when `return_details=True`.
65
+
66
+ Credentials are resolved from environment variables by default:
67
+
68
+ - `OPENAI_API_KEY` for OpenAI
69
+ - `ANTHROPIC_API_KEY` for Anthropic
70
+ - `GOOGLE_API_KEY` for Google Gemini
71
+ - `XAI_API_KEY` for xAI
72
+
73
+ Override the environment variable name with `model_info={"api_key_env": "CUSTOM_ENV_NAME"}` when needed, or supply an inline secret with `model_info={"api_key": "sk-..."}` for ephemeral credentials. Missing API keys raise a `MissingAPIKeyError` that explains how to export the secret before trying again.
74
+
75
+ `api_key` and `api_key_env` are mutually exclusive ways to provide the same credential. When `api_key` is present and non-empty it is used directly, skipping any environment lookup. Otherwise the resolver falls back to `api_key_env` (or the provider default) and pulls the value from your local environment with `os.getenv`.
76
+
77
+ `model_info` accepts additional rubric-specific knobs:
78
+
79
+ - `score_min` / `score_max` – change the default `[0.0, 1.0]` scoring bounds.
80
+ - `system_prompt` / `original_input` – provide optional context strings that will be quoted in the judging prompt.
81
+ - `timeout` – customise the provider timeout in seconds.
82
+
83
+ Pass `metadata={...}` to `evaluate_rubric` when you need structured context quoted in the judge prompt, and set `return_details=True` to receive the full `RewardRubricRunResult` payload (including the provider’s raw response).
84
+
85
+ Remote failures surface as `ProviderRequestError` instances, with `ModelNotFoundError` reserved for missing model identifiers so you can retry with a new snapshot.
86
+
87
+ > Older SDK versions that lack schema parameters automatically fall back to instruction-only JSON; the helper still validates the response payload before returning.
88
+ > Provider model snapshot names change frequently. Check each vendor's dashboard for the latest identifier if you encounter a “model not found” error.
89
+
90
+ ### Provider Architecture
91
+
92
+ All remote integrations live in `osmosis_ai/providers/` and implement the `RubricProvider` interface. At import time the default registry registers OpenAI, xAI, Anthropic, and Google Gemini so `evaluate_rubric` can route requests without additional configuration. The request/response plumbing is encapsulated in each provider module, keeping `evaluate_rubric` focused on prompt construction, payload validation, and credential resolution.
93
+
94
+ Add your own provider by subclassing `RubricProvider`, implementing `run()` with the vendor SDK, and calling `register_provider()` during start-up. A step-by-step guide is available in [`osmosis_ai/providers/README.md`](osmosis_ai/providers/README.md).
95
+
96
+ ## Required Function Signature
97
+
98
+ All functions decorated with `@osmosis_reward` must have exactly this signature:
99
+
100
+ ```python
101
+ @osmosis_reward
102
+ def your_function(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
103
+ # Your reward logic here
104
+ return float_score
105
+ ```
106
+
107
+ ### Parameters
108
+
109
+ - **`solution_str: str`** - The solution string to evaluate (required)
110
+ - **`ground_truth: str`** - The correct/expected answer (required)
111
+ - **`extra_info: dict = None`** - Optional dictionary for additional configuration
112
+
113
+ ### Return Value
114
+
115
+ - **`-> float`** - Must return a float value representing the reward score
116
+
117
+ The decorator will raise a `TypeError` if the function doesn't match this exact signature or doesn't return a float.
118
+
119
+ ## Rubric Function Signature
120
+
121
+ Rubric functions decorated with `@osmosis_rubric` must match this signature:
122
+
123
+ ```python
124
+ @osmosis_rubric
125
+ def your_rubric(solution_str: str, ground_truth: str | None, extra_info: dict) -> float:
126
+ # Your rubric logic here
127
+ return float_score
128
+ ```
129
+
130
+ > The runtime forwards `None` for `ground_truth` when no reference answer exists. Annotate the parameter as `Optional[str]` (or handle `None` explicitly) if your rubric logic expects to run in that scenario.
131
+
132
+ ### Required `extra_info` fields
133
+
134
+ - **`provider`** – Non-empty string identifying the judge provider.
135
+ - **`model`** – Non-empty string naming the provider model to call.
136
+ - **`rubric`** – Natural-language rubric instructions for the judge model.
137
+ - **`api_key` / `api_key_env`** – Supply either the raw key or the environment variable name that exposes it.
138
+
139
+ ### Optional `extra_info` fields
140
+
141
+ - **`system_prompt`** – Optional string prepended to the provider’s base system prompt when invoking the judge; include it inside `extra_info` rather than as a separate argument.
142
+ - **`score_min` / `score_max`** – Optional numeric overrides for the expected score range.
143
+ - **`model_info_overrides`** – Optional dict merged into the provider configuration passed to the judge.
144
+
145
+ Additional keys are passthrough and can be used for custom configuration. If you need to extend the provider payload (for example adding `api_key_env`), add a dict under `model_info_overrides` and it will be merged with the required `provider`/`model` pair before invoking `evaluate_rubric`. The decorator enforces the parameter names/annotations, validates the embedded configuration at call time, and ensures the wrapped function returns a `float`.
146
+
147
+ > Annotation quirk: `extra_info` must be annotated as `dict` **without** a default value, unlike `@osmosis_reward`.
148
+
149
+ > Tip: When delegating to `evaluate_rubric`, pass the raw `solution_str` directly and include any extra context inside the `metadata` payload.
150
+
151
+ ## Examples
152
+
153
+ See the [`examples/`](examples/) directory for complete examples:
154
+
155
+ ```python
156
+ @osmosis_reward
157
+ def case_insensitive_match(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
158
+ """Case-insensitive string matching with partial credit."""
159
+ match = solution_str.lower().strip() == ground_truth.lower().strip()
160
+
161
+ if extra_info and 'partial_credit' in extra_info:
162
+ if not match and extra_info['partial_credit']:
163
+ len_diff = abs(len(solution_str) - len(ground_truth))
164
+ if len_diff <= 2:
165
+ return 0.5
166
+
167
+ return 1.0 if match else 0.0
168
+
169
+ @osmosis_reward
170
+ def numeric_tolerance(solution_str: str, ground_truth: str, extra_info: dict = None) -> float:
171
+ """Numeric comparison with configurable tolerance."""
172
+ try:
173
+ solution_num = float(solution_str.strip())
174
+ truth_num = float(ground_truth.strip())
175
+
176
+ tolerance = extra_info.get('tolerance', 0.01) if extra_info else 0.01
177
+ return 1.0 if abs(solution_num - truth_num) <= tolerance else 0.0
178
+ except ValueError:
179
+ return 0.0
180
+ ```
181
+
182
+ - `examples/rubric_functions.py` demonstrates `evaluate_rubric` with OpenAI, Anthropic, Gemini, and xAI using the schema-enforced SDK integrations.
183
+ - `examples/reward_functions.py` keeps local reward helpers that showcase the decorator contract without external calls.
184
+ - `examples/rubric_configs.yaml` bundles two rubric definitions with provider configuration and scoring bounds.
185
+ - `examples/sample_data.jsonl` contains two rubric-aligned solution strings so you can trial dataset validation.
186
+
187
+ ```yaml
188
+ # examples/rubric_configs.yaml (excerpt)
189
+ version: 1
190
+ rubrics:
191
+ - id: support_followup
192
+ model_info:
193
+ provider: openai
194
+ model: gpt-5-mini
195
+ api_key_env: OPENAI_API_KEY
196
+ ```
197
+
198
+ ```jsonl
199
+ {"conversation_id": "ticket-001", "rubric_id": "support_followup", "original_input": "...", "solution_str": "..."}
200
+ {"conversation_id": "ticket-047", "rubric_id": "policy_grounding", "original_input": "...", "solution_str": "..."}
201
+ ```
202
+
203
+ ## CLI Tools
204
+
205
+ Installing the SDK also provides a lightweight CLI available as `osmosis` (aliases: `osmosis_ai`, `osmosis-ai`) for inspecting rubric YAML files and JSONL test payloads.
206
+
207
+ Preview a rubric file and print every configuration discovered, including nested entries:
208
+
209
+ ```bash
210
+ osmosis preview --path path/to/rubric.yaml
211
+ ```
212
+
213
+ Preview a dataset of rubric-scored solutions stored as JSONL:
214
+
215
+ ```bash
216
+ osmosis preview --path path/to/data.jsonl
217
+ ```
218
+
219
+ Evaluate a dataset against a hosted rubric configuration and print the returned scores:
220
+
221
+ ```bash
222
+ osmosis eval --rubric support_followup --data examples/sample_data.jsonl
223
+ ```
224
+
225
+ - Supply the dataset with `-d`/`--data path/to/data.jsonl`; the path is resolved relative to the current working directory.
226
+ - Use `--config path/to/rubric_configs.yaml` when the rubric definitions are not located alongside the dataset.
227
+ - Pass `-n`/`--number` to sample the provider multiple times per record; the CLI prints every run along with aggregate statistics (average, variance, standard deviation, and min/max).
228
+ - Provide `--output path/to/dir` to create the directory (if needed) and emit `rubric_eval_result_<unix_timestamp>.json`, or supply a full file path (any extension) to control the filename; each file captures every run, provider payloads, timestamps, and aggregate statistics for downstream analysis.
229
+ - Skip `--output` to collect results under `~/.cache/osmosis/eval_result/<rubric_id>/rubric_eval_result_<identifier>.json`; the CLI writes this JSON whether the evaluation finishes cleanly or hits provider/runtime errors so you can inspect failures later (only a manual Ctrl+C interrupt leaves no file behind).
230
+ - Dataset rows whose `rubric_id` does not match the requested rubric are skipped automatically.
231
+ - Each dataset record must provide a non-empty `solution_str`; optional fields such as `original_input`, `ground_truth`, and `extra_info` travel with the record and are forwarded to the evaluator when present.
232
+ - When delegating to a custom `@osmosis_rubric` function, the CLI enriches `extra_info` with the active `provider`, `model`, `rubric`, score bounds, any configured `system_prompt`, the resolved `original_input`, and the record’s metadata/extra fields so the decorator’s required entries are always present.
233
+ - Rubric configuration files intentionally reject `extra_info`; provide per-example context through the dataset instead.
234
+
235
+ Both commands validate the file, echo a short summary (`Loaded <n> ...`), and pretty-print the parsed records so you can confirm that new rubrics or test fixtures look correct before committing them. Invalid files raise a descriptive error and exit with a non-zero status code.
236
+
237
+ ## Running Examples
238
+
239
+ ```bash
240
+ PYTHONPATH=. python examples/reward_functions.py
241
+ PYTHONPATH=. python examples/rubric_functions.py # Uncomment the provider you need before running
242
+ ```
243
+
244
+ ## Testing
245
+
246
+ Run `python -m pytest` (or any subset under `tests/`) to exercise the updated helpers:
247
+
248
+ - `tests/test_rubric_eval.py` covers prompt construction for `solution_str` evaluations.
249
+ - `tests/test_cli_services.py` validates dataset parsing, extra-info enrichment, and engine interactions.
250
+ - `tests/test_cli.py` ensures the CLI pathways surface the new fields end to end.
251
+
252
+ Add additional tests under `tests/` as you extend the library.
253
+
254
+ ## License
255
+
256
+ MIT License - see [LICENSE](LICENSE) file for details.
257
+
258
+ ## Contributing
259
+
260
+ 1. Fork the repository
261
+ 2. Create a feature branch
262
+ 3. Make your changes
263
+ 4. Run tests and examples
264
+ 5. Submit a pull request
265
+
266
+ ## Links
267
+
268
+ - [Homepage](https://github.com/Osmosis-AI/osmosis-sdk-python)
269
+ - [Issues](https://github.com/Osmosis-AI/osmosis-sdk-python/issues)
@@ -0,0 +1,24 @@
1
+ """
2
+ osmosis-ai: A Python library for reward function validation with strict type enforcement.
3
+
4
+ This library provides decorators such as @osmosis_reward and @osmosis_rubric that
5
+ enforce standardized function signatures for LLM-centric workflows.
6
+
7
+ Features:
8
+ - Type-safe reward function decoration
9
+ - Parameter name and type validation
10
+ - Support for optional configuration parameters
11
+ """
12
+
13
+ from .rubric_eval import MissingAPIKeyError, evaluate_rubric
14
+ from .rubric_types import ModelNotFoundError, ProviderRequestError
15
+ from .utils import osmosis_reward, osmosis_rubric
16
+
17
+ __all__ = [
18
+ "osmosis_reward",
19
+ "osmosis_rubric",
20
+ "evaluate_rubric",
21
+ "MissingAPIKeyError",
22
+ "ProviderRequestError",
23
+ "ModelNotFoundError",
24
+ ]