osmosis-ai 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of osmosis-ai might be problematic. Click here for more details.

Files changed (34) hide show
  1. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/PKG-INFO +64 -2
  2. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/README.md +53 -0
  3. osmosis_ai-0.2.3/osmosis_ai/cli.py +50 -0
  4. osmosis_ai-0.2.3/osmosis_ai/cli_commands.py +181 -0
  5. osmosis_ai-0.2.3/osmosis_ai/cli_services/__init__.py +67 -0
  6. osmosis_ai-0.2.3/osmosis_ai/cli_services/config.py +407 -0
  7. osmosis_ai-0.2.3/osmosis_ai/cli_services/dataset.py +229 -0
  8. osmosis_ai-0.2.3/osmosis_ai/cli_services/engine.py +251 -0
  9. osmosis_ai-0.2.3/osmosis_ai/cli_services/errors.py +7 -0
  10. osmosis_ai-0.2.3/osmosis_ai/cli_services/reporting.py +307 -0
  11. osmosis_ai-0.2.3/osmosis_ai/cli_services/session.py +174 -0
  12. osmosis_ai-0.2.3/osmosis_ai/cli_services/shared.py +209 -0
  13. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/providers/gemini_provider.py +73 -28
  14. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/rubric_eval.py +27 -66
  15. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/utils.py +0 -4
  16. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai.egg-info/SOURCES.txt +12 -0
  17. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/pyproject.toml +18 -3
  18. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/requirements.txt +3 -1
  19. osmosis_ai-0.2.3/tests/test_cli.py +510 -0
  20. osmosis_ai-0.2.3/tests/test_cli_services.py +193 -0
  21. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/LICENSE +0 -0
  22. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/MANIFEST.in +0 -0
  23. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/__init__.py +0 -0
  24. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/consts.py +0 -0
  25. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/providers/__init__.py +0 -0
  26. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/providers/anthropic_provider.py +0 -0
  27. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/providers/base.py +0 -0
  28. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/providers/openai_family.py +0 -0
  29. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/providers/shared.py +0 -0
  30. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/osmosis_ai/rubric_types.py +0 -0
  31. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/pytest.ini +0 -0
  32. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/setup.cfg +0 -0
  33. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/setup_env.bat +0 -0
  34. {osmosis_ai-0.2.2 → osmosis_ai-0.2.3}/tests/test_rubric_eval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: osmosis-ai
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: A Python library for reward function validation with strict type enforcement.
5
5
  Author-email: Osmosis AI <jake@osmosis.ai>
6
6
  License: MIT License
@@ -29,9 +29,18 @@ Project-URL: Issues, https://github.com/Osmosis-AI/osmosis-sdk-python/issues
29
29
  Classifier: Programming Language :: Python :: 3
30
30
  Classifier: License :: OSI Approved :: MIT License
31
31
  Classifier: Operating System :: OS Independent
32
- Requires-Python: >=3.6
32
+ Requires-Python: >=3.9
33
33
  Description-Content-Type: text/markdown
34
34
  License-File: LICENSE
35
+ Requires-Dist: PyYAML<7.0,>=6.0
36
+ Requires-Dist: python-dotenv<2.0.0,>=0.1.0
37
+ Requires-Dist: requests<3.0.0,>=2.0.0
38
+ Requires-Dist: xxhash<4.0.0,>=3.0.0
39
+ Requires-Dist: anthropic<0.50.0,>=0.36.0
40
+ Requires-Dist: openai>=2.0.0
41
+ Requires-Dist: google-genai>=1.0.0
42
+ Requires-Dist: xai-sdk>=1.2.0
43
+ Requires-Dist: tqdm<5.0.0,>=4.0.0
35
44
  Dynamic: license-file
36
45
 
37
46
  # osmosis-ai
@@ -44,6 +53,10 @@ A Python library that provides reward and rubric validation helpers for LLM appl
44
53
  pip install osmosis-ai
45
54
  ```
46
55
 
56
+ Requires Python 3.9 or newer.
57
+
58
+ This installs the Osmosis CLI and pulls in the required provider SDKs (`openai`, `anthropic`, `google-genai`, `xai-sdk`) along with supporting utilities such as `PyYAML`, `python-dotenv`, `requests`, and `xxhash`.
59
+
47
60
  For development:
48
61
  ```bash
49
62
  git clone https://github.com/Osmosis-AI/osmosis-sdk-python
@@ -211,6 +224,55 @@ def numeric_tolerance(solution_str: str, ground_truth: str, extra_info: dict = N
211
224
 
212
225
  - `examples/rubric_functions.py` demonstrates `evaluate_rubric` with OpenAI, Anthropic, Gemini, and xAI using the schema-enforced SDK integrations.
213
226
  - `examples/reward_functions.py` keeps local reward helpers that showcase the decorator contract without external calls.
227
+ - `examples/rubric_configs.yaml` bundles two rubric definitions, each with its own provider configuration and extra prompt context.
228
+ - `examples/sample_data.jsonl` contains two conversation payloads mapped to those rubrics so you can trial dataset validation.
229
+
230
+ ```yaml
231
+ # examples/rubric_configs.yaml (excerpt)
232
+ version: 1
233
+ rubrics:
234
+ - id: support_followup
235
+ model_info:
236
+ provider: openai
237
+ model: gpt-5-mini
238
+ api_key_env: OPENAI_API_KEY
239
+ ```
240
+
241
+ ```jsonl
242
+ {"conversation_id": "ticket-001", "rubric_id": "support_followup", "...": "..."}
243
+ {"conversation_id": "ticket-047", "rubric_id": "policy_grounding", "...": "..."}
244
+ ```
245
+
246
+ ## CLI Tools
247
+
248
+ Installing the SDK also provides a lightweight CLI available as `osmosis` (aliases: `osmosis_ai`, `osmosis-ai`) for inspecting rubric YAML files and JSONL test payloads.
249
+
250
+ Preview a rubric file and print every configuration discovered, including nested entries:
251
+
252
+ ```bash
253
+ osmosis preview --path path/to/rubric.yaml
254
+ ```
255
+
256
+ Preview a dataset of chat transcripts stored as JSONL:
257
+
258
+ ```bash
259
+ osmosis preview --path path/to/data.jsonl
260
+ ```
261
+
262
+ Evaluate a dataset against a hosted rubric configuration and print the returned scores:
263
+
264
+ ```bash
265
+ osmosis eval --rubric support_followup --data examples/sample_data.jsonl
266
+ ```
267
+
268
+ - Supply the dataset with `-d`/`--data path/to/data.jsonl`; the path is resolved relative to the current working directory.
269
+ - Use `--config path/to/rubric_configs.yaml` when the rubric definitions are not located alongside the dataset.
270
+ - Pass `-n`/`--number` to sample the provider multiple times per record; the CLI prints every run along with aggregate statistics (average, variance, standard deviation, and min/max).
271
+ - Provide `--output path/to/dir` to create the directory (if needed) and emit `rubric_eval_result_<unix_timestamp>.json`, or supply a full file path (any extension) to control the filename; each file captures every run, provider payloads, timestamps, and aggregate statistics for downstream analysis.
272
+ - Skip `--output` to collect results under `~/.cache/osmosis/eval_result/<rubric_id>/rubric_eval_result_<identifier>.json`; the CLI writes this JSON whether the evaluation finishes cleanly or hits provider/runtime errors so you can inspect failures later (only a manual Ctrl+C interrupt leaves no file behind).
273
+ - Dataset rows whose `rubric_id` does not match the requested rubric are skipped automatically.
274
+
275
+ Both commands validate the file, echo a short summary (`Loaded <n> ...`), and pretty-print the parsed records so you can confirm that new rubrics or test fixtures look correct before committing them. Invalid files raise a descriptive error and exit with a non-zero status code.
214
276
 
215
277
  ## Running Examples
216
278
 
@@ -8,6 +8,10 @@ A Python library that provides reward and rubric validation helpers for LLM appl
8
8
  pip install osmosis-ai
9
9
  ```
10
10
 
11
+ Requires Python 3.9 or newer.
12
+
13
+ This installs the Osmosis CLI and pulls in the required provider SDKs (`openai`, `anthropic`, `google-genai`, `xai-sdk`) along with supporting utilities such as `PyYAML`, `python-dotenv`, `requests`, and `xxhash`.
14
+
11
15
  For development:
12
16
  ```bash
13
17
  git clone https://github.com/Osmosis-AI/osmosis-sdk-python
@@ -175,6 +179,55 @@ def numeric_tolerance(solution_str: str, ground_truth: str, extra_info: dict = N
175
179
 
176
180
  - `examples/rubric_functions.py` demonstrates `evaluate_rubric` with OpenAI, Anthropic, Gemini, and xAI using the schema-enforced SDK integrations.
177
181
  - `examples/reward_functions.py` keeps local reward helpers that showcase the decorator contract without external calls.
182
+ - `examples/rubric_configs.yaml` bundles two rubric definitions, each with its own provider configuration and extra prompt context.
183
+ - `examples/sample_data.jsonl` contains two conversation payloads mapped to those rubrics so you can trial dataset validation.
184
+
185
+ ```yaml
186
+ # examples/rubric_configs.yaml (excerpt)
187
+ version: 1
188
+ rubrics:
189
+ - id: support_followup
190
+ model_info:
191
+ provider: openai
192
+ model: gpt-5-mini
193
+ api_key_env: OPENAI_API_KEY
194
+ ```
195
+
196
+ ```jsonl
197
+ {"conversation_id": "ticket-001", "rubric_id": "support_followup", "...": "..."}
198
+ {"conversation_id": "ticket-047", "rubric_id": "policy_grounding", "...": "..."}
199
+ ```
200
+
201
+ ## CLI Tools
202
+
203
+ Installing the SDK also provides a lightweight CLI available as `osmosis` (aliases: `osmosis_ai`, `osmosis-ai`) for inspecting rubric YAML files and JSONL test payloads.
204
+
205
+ Preview a rubric file and print every configuration discovered, including nested entries:
206
+
207
+ ```bash
208
+ osmosis preview --path path/to/rubric.yaml
209
+ ```
210
+
211
+ Preview a dataset of chat transcripts stored as JSONL:
212
+
213
+ ```bash
214
+ osmosis preview --path path/to/data.jsonl
215
+ ```
216
+
217
+ Evaluate a dataset against a hosted rubric configuration and print the returned scores:
218
+
219
+ ```bash
220
+ osmosis eval --rubric support_followup --data examples/sample_data.jsonl
221
+ ```
222
+
223
+ - Supply the dataset with `-d`/`--data path/to/data.jsonl`; the path is resolved relative to the current working directory.
224
+ - Use `--config path/to/rubric_configs.yaml` when the rubric definitions are not located alongside the dataset.
225
+ - Pass `-n`/`--number` to sample the provider multiple times per record; the CLI prints every run along with aggregate statistics (average, variance, standard deviation, and min/max).
226
+ - Provide `--output path/to/dir` to create the directory (if needed) and emit `rubric_eval_result_<unix_timestamp>.json`, or supply a full file path (any extension) to control the filename; each file captures every run, provider payloads, timestamps, and aggregate statistics for downstream analysis.
227
+ - Skip `--output` to collect results under `~/.cache/osmosis/eval_result/<rubric_id>/rubric_eval_result_<identifier>.json`; the CLI writes this JSON whether the evaluation finishes cleanly or hits provider/runtime errors so you can inspect failures later (only a manual Ctrl+C interrupt leaves no file behind).
228
+ - Dataset rows whose `rubric_id` does not match the requested rubric are skipped automatically.
229
+
230
+ Both commands validate the file, echo a short summary (`Loaded <n> ...`), and pretty-print the parsed records so you can confirm that new rubrics or test fixtures look correct before committing them. Invalid files raise a descriptive error and exit with a non-zero status code.
178
231
 
179
232
  ## Running Examples
180
233
 
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import sys
5
+ from typing import Optional
6
+
7
+ from .cli_commands import EvalCommand, PreviewCommand
8
+ from .cli_services import CLIError
9
+
10
+
11
+ def main(argv: Optional[list[str]] = None) -> int:
12
+ """Entry point for the osmosis CLI."""
13
+ parser = _build_parser()
14
+ args = parser.parse_args(argv)
15
+
16
+ handler = getattr(args, "handler", None)
17
+ if handler is None:
18
+ parser.print_help()
19
+ return 1
20
+
21
+ try:
22
+ return handler(args)
23
+ except CLIError as exc:
24
+ print(f"Error: {exc}", file=sys.stderr)
25
+ return 1
26
+
27
+
28
+ def _build_parser() -> argparse.ArgumentParser:
29
+ parser = argparse.ArgumentParser(
30
+ prog="osmosis", description="Utilities for inspecting Osmosis rubric and test data files."
31
+ )
32
+ subparsers = parser.add_subparsers(dest="command")
33
+
34
+ preview_parser = subparsers.add_parser(
35
+ "preview",
36
+ help="Preview a rubric YAML file or test JSONL file and print its parsed contents.",
37
+ )
38
+ PreviewCommand().configure_parser(preview_parser)
39
+
40
+ eval_parser = subparsers.add_parser(
41
+ "eval",
42
+ help="Evaluate JSONL conversations against a rubric using remote providers.",
43
+ )
44
+ EvalCommand().configure_parser(eval_parser)
45
+
46
+ return parser
47
+
48
+
49
+ if __name__ == "__main__":
50
+ sys.exit(main())
@@ -0,0 +1,181 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Any, Callable, Optional
7
+
8
+ from .cli_services import (
9
+ CLIError,
10
+ ParsedItem,
11
+ BaselineComparator,
12
+ ConsoleReportRenderer,
13
+ DatasetLoader,
14
+ EvaluationSession,
15
+ EvaluationSessionRequest,
16
+ JsonReportWriter,
17
+ RubricEvaluationEngine,
18
+ RubricSuite,
19
+ discover_rubric_config_path,
20
+ load_jsonl_records,
21
+ load_rubric_configs,
22
+ load_rubric_suite,
23
+ render_json_records,
24
+ render_yaml_items,
25
+ )
26
+
27
+
28
+ class PreviewCommand:
29
+ """Handler for `osmosis preview`."""
30
+
31
+ def __init__(
32
+ self,
33
+ *,
34
+ yaml_loader: Callable[[Path], list[ParsedItem]] = load_rubric_configs,
35
+ json_loader: Callable[[Path], list[dict[str, Any]]] = load_jsonl_records,
36
+ ):
37
+ self._yaml_loader = yaml_loader
38
+ self._json_loader = json_loader
39
+
40
+ def configure_parser(self, parser: argparse.ArgumentParser) -> None:
41
+ parser.set_defaults(handler=self.run)
42
+ parser.add_argument(
43
+ "-p",
44
+ "--path",
45
+ dest="path",
46
+ required=True,
47
+ help="Path to the YAML or JSONL file to inspect.",
48
+ )
49
+
50
+ def run(self, args: argparse.Namespace) -> int:
51
+ path = Path(args.path).expanduser()
52
+ if not path.exists():
53
+ raise CLIError(f"Path '{path}' does not exist.")
54
+ if path.is_dir():
55
+ raise CLIError(f"Expected a file path but got directory '{path}'.")
56
+
57
+ suffix = path.suffix.lower()
58
+ if suffix in {".yaml", ".yml"}:
59
+ items = self._yaml_loader(path)
60
+ print(f"Loaded {len(items)} rubric config(s) from {path}")
61
+ print(render_yaml_items(items, label="Rubric config"))
62
+ elif suffix == ".jsonl":
63
+ records = self._json_loader(path)
64
+ print(f"Loaded {len(records)} JSONL record(s) from {path}")
65
+ print(render_json_records(records))
66
+ else:
67
+ raise CLIError(f"Unsupported file extension '{suffix}'. Expected .yaml, .yml, or .jsonl.")
68
+
69
+ return 0
70
+
71
+
72
+ class EvalCommand:
73
+ """Handler for `osmosis eval`."""
74
+
75
+ def __init__(
76
+ self,
77
+ *,
78
+ session: Optional[EvaluationSession] = None,
79
+ config_locator: Callable[[Optional[str], Path], Path] = discover_rubric_config_path,
80
+ suite_loader: Callable[[Path], RubricSuite] = load_rubric_suite,
81
+ dataset_loader: Optional[DatasetLoader] = None,
82
+ engine: Optional[RubricEvaluationEngine] = None,
83
+ renderer: Optional[ConsoleReportRenderer] = None,
84
+ report_writer: Optional[JsonReportWriter] = None,
85
+ baseline_comparator: Optional[BaselineComparator] = None,
86
+ ):
87
+ self._renderer = renderer or ConsoleReportRenderer()
88
+ if session is not None:
89
+ self._session = session
90
+ else:
91
+ self._session = EvaluationSession(
92
+ config_locator=config_locator,
93
+ suite_loader=suite_loader,
94
+ dataset_loader=dataset_loader,
95
+ engine=engine,
96
+ baseline_comparator=baseline_comparator,
97
+ report_writer=report_writer,
98
+ identifier_factory=self._generate_output_identifier,
99
+ )
100
+
101
+ def configure_parser(self, parser: argparse.ArgumentParser) -> None:
102
+ parser.set_defaults(handler=self.run)
103
+ parser.add_argument(
104
+ "-r",
105
+ "--rubric",
106
+ dest="rubric_id",
107
+ required=True,
108
+ help="Rubric identifier declared in the rubric config file.",
109
+ )
110
+ parser.add_argument(
111
+ "-d",
112
+ "--data",
113
+ dest="data_path",
114
+ required=True,
115
+ help="Path to the JSONL file containing evaluation records.",
116
+ )
117
+ parser.add_argument(
118
+ "-n",
119
+ "--number",
120
+ dest="number",
121
+ type=int,
122
+ default=1,
123
+ help="Run the evaluation multiple times to sample provider variance (default: 1).",
124
+ )
125
+ parser.add_argument(
126
+ "-c",
127
+ "--config",
128
+ dest="config_path",
129
+ help="Path to the rubric config YAML (defaults to searching near the data file).",
130
+ )
131
+ parser.add_argument(
132
+ "-o",
133
+ "--output",
134
+ dest="output_path",
135
+ help="Optional path to write evaluation results as JSON.",
136
+ )
137
+ parser.add_argument(
138
+ "-b",
139
+ "--baseline",
140
+ dest="baseline_path",
141
+ help="Optional path to a prior evaluation JSON to compare against.",
142
+ )
143
+
144
+ def run(self, args: argparse.Namespace) -> int:
145
+ rubric_id_raw = getattr(args, "rubric_id", "")
146
+ rubric_id = str(rubric_id_raw).strip()
147
+ if not rubric_id:
148
+ raise CLIError("Rubric identifier cannot be empty.")
149
+
150
+ data_path = Path(args.data_path).expanduser()
151
+ config_path_value = getattr(args, "config_path", None)
152
+ output_path_value = getattr(args, "output_path", None)
153
+ baseline_path_value = getattr(args, "baseline_path", None)
154
+
155
+ number_value = getattr(args, "number", None)
156
+ number = int(number_value) if number_value is not None else 1
157
+
158
+ request = EvaluationSessionRequest(
159
+ rubric_id=rubric_id,
160
+ data_path=data_path,
161
+ number=number,
162
+ config_path=Path(config_path_value).expanduser() if config_path_value else None,
163
+ output_path=Path(output_path_value).expanduser() if output_path_value else None,
164
+ baseline_path=Path(baseline_path_value).expanduser() if baseline_path_value else None,
165
+ )
166
+
167
+ try:
168
+ result = self._session.execute(request)
169
+ except KeyboardInterrupt:
170
+ print("Evaluation cancelled by user.")
171
+ return 1
172
+ self._renderer.render(result.report, result.baseline)
173
+
174
+ if result.written_path is not None:
175
+ print(f"Wrote evaluation results to {result.written_path}")
176
+
177
+ return 0
178
+
179
+ @staticmethod
180
+ def _generate_output_identifier() -> str:
181
+ return str(int(time.time()))
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from .config import (
4
+ ParsedItem,
5
+ RubricConfig,
6
+ RubricConfigParser,
7
+ RubricSuite,
8
+ discover_rubric_config_path,
9
+ load_rubric_configs,
10
+ load_rubric_suite,
11
+ render_yaml_items,
12
+ )
13
+ from .dataset import (
14
+ ConversationMessage,
15
+ DatasetLoader,
16
+ DatasetRecord,
17
+ load_jsonl_records,
18
+ render_json_records,
19
+ )
20
+ from .engine import (
21
+ EvaluationRecordResult,
22
+ EvaluationReport,
23
+ EvaluationRun,
24
+ RubricEvaluationEngine,
25
+ RubricEvaluator,
26
+ )
27
+ from .errors import CLIError
28
+ from .reporting import (
29
+ BaselineComparator,
30
+ BaselineStatistics,
31
+ ConsoleReportRenderer,
32
+ JsonReportFormatter,
33
+ JsonReportWriter,
34
+ TextReportFormatter,
35
+ )
36
+ from .session import EvaluationSession, EvaluationSessionRequest, EvaluationSessionResult
37
+
38
+ __all__ = [
39
+ "BaselineComparator",
40
+ "BaselineStatistics",
41
+ "CLIError",
42
+ "ConsoleReportRenderer",
43
+ "ConversationMessage",
44
+ "DatasetLoader",
45
+ "DatasetRecord",
46
+ "EvaluationSession",
47
+ "EvaluationSessionRequest",
48
+ "EvaluationSessionResult",
49
+ "EvaluationRecordResult",
50
+ "EvaluationReport",
51
+ "EvaluationRun",
52
+ "JsonReportFormatter",
53
+ "JsonReportWriter",
54
+ "ParsedItem",
55
+ "RubricConfig",
56
+ "RubricConfigParser",
57
+ "RubricEvaluationEngine",
58
+ "RubricEvaluator",
59
+ "RubricSuite",
60
+ "TextReportFormatter",
61
+ "discover_rubric_config_path",
62
+ "load_jsonl_records",
63
+ "load_rubric_configs",
64
+ "load_rubric_suite",
65
+ "render_json_records",
66
+ "render_yaml_items",
67
+ ]