osmosis-ai 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of osmosis-ai might be problematic. Click here for more details.
- osmosis_ai/__init__.py +13 -4
- osmosis_ai/cli.py +50 -0
- osmosis_ai/cli_commands.py +181 -0
- osmosis_ai/cli_services/__init__.py +67 -0
- osmosis_ai/cli_services/config.py +407 -0
- osmosis_ai/cli_services/dataset.py +229 -0
- osmosis_ai/cli_services/engine.py +251 -0
- osmosis_ai/cli_services/errors.py +7 -0
- osmosis_ai/cli_services/reporting.py +307 -0
- osmosis_ai/cli_services/session.py +174 -0
- osmosis_ai/cli_services/shared.py +209 -0
- osmosis_ai/consts.py +1 -1
- osmosis_ai/providers/__init__.py +36 -0
- osmosis_ai/providers/anthropic_provider.py +85 -0
- osmosis_ai/providers/base.py +60 -0
- osmosis_ai/providers/gemini_provider.py +314 -0
- osmosis_ai/providers/openai_family.py +607 -0
- osmosis_ai/providers/shared.py +92 -0
- osmosis_ai/rubric_eval.py +498 -0
- osmosis_ai/rubric_types.py +49 -0
- osmosis_ai/utils.py +392 -5
- osmosis_ai-0.2.3.dist-info/METADATA +303 -0
- osmosis_ai-0.2.3.dist-info/RECORD +27 -0
- osmosis_ai-0.2.3.dist-info/entry_points.txt +4 -0
- osmosis_ai-0.2.1.dist-info/METADATA +0 -143
- osmosis_ai-0.2.1.dist-info/RECORD +0 -8
- {osmosis_ai-0.2.1.dist-info → osmosis_ai-0.2.3.dist-info}/WHEEL +0 -0
- {osmosis_ai-0.2.1.dist-info → osmosis_ai-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {osmosis_ai-0.2.1.dist-info → osmosis_ai-0.2.3.dist-info}/top_level.txt +0 -0
osmosis_ai/__init__.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
osmosis-ai: A Python library for reward function validation with strict type enforcement.
|
|
3
3
|
|
|
4
|
-
This library provides
|
|
5
|
-
function signatures for
|
|
4
|
+
This library provides decorators such as @osmosis_reward and @osmosis_rubric that
|
|
5
|
+
enforce standardized function signatures for LLM-centric workflows.
|
|
6
6
|
|
|
7
7
|
Features:
|
|
8
8
|
- Type-safe reward function decoration
|
|
@@ -10,6 +10,15 @@ Features:
|
|
|
10
10
|
- Support for optional configuration parameters
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
from .
|
|
13
|
+
from .rubric_eval import MissingAPIKeyError, evaluate_rubric
|
|
14
|
+
from .rubric_types import ModelNotFoundError, ProviderRequestError
|
|
15
|
+
from .utils import osmosis_reward, osmosis_rubric
|
|
14
16
|
|
|
15
|
-
__all__ = [
|
|
17
|
+
__all__ = [
|
|
18
|
+
"osmosis_reward",
|
|
19
|
+
"osmosis_rubric",
|
|
20
|
+
"evaluate_rubric",
|
|
21
|
+
"MissingAPIKeyError",
|
|
22
|
+
"ProviderRequestError",
|
|
23
|
+
"ModelNotFoundError",
|
|
24
|
+
]
|
osmosis_ai/cli.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from .cli_commands import EvalCommand, PreviewCommand
|
|
8
|
+
from .cli_services import CLIError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main(argv: Optional[list[str]] = None) -> int:
|
|
12
|
+
"""Entry point for the osmosis CLI."""
|
|
13
|
+
parser = _build_parser()
|
|
14
|
+
args = parser.parse_args(argv)
|
|
15
|
+
|
|
16
|
+
handler = getattr(args, "handler", None)
|
|
17
|
+
if handler is None:
|
|
18
|
+
parser.print_help()
|
|
19
|
+
return 1
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
return handler(args)
|
|
23
|
+
except CLIError as exc:
|
|
24
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
25
|
+
return 1
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
29
|
+
parser = argparse.ArgumentParser(
|
|
30
|
+
prog="osmosis", description="Utilities for inspecting Osmosis rubric and test data files."
|
|
31
|
+
)
|
|
32
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
33
|
+
|
|
34
|
+
preview_parser = subparsers.add_parser(
|
|
35
|
+
"preview",
|
|
36
|
+
help="Preview a rubric YAML file or test JSONL file and print its parsed contents.",
|
|
37
|
+
)
|
|
38
|
+
PreviewCommand().configure_parser(preview_parser)
|
|
39
|
+
|
|
40
|
+
eval_parser = subparsers.add_parser(
|
|
41
|
+
"eval",
|
|
42
|
+
help="Evaluate JSONL conversations against a rubric using remote providers.",
|
|
43
|
+
)
|
|
44
|
+
EvalCommand().configure_parser(eval_parser)
|
|
45
|
+
|
|
46
|
+
return parser
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
if __name__ == "__main__":
|
|
50
|
+
sys.exit(main())
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Callable, Optional
|
|
7
|
+
|
|
8
|
+
from .cli_services import (
|
|
9
|
+
CLIError,
|
|
10
|
+
ParsedItem,
|
|
11
|
+
BaselineComparator,
|
|
12
|
+
ConsoleReportRenderer,
|
|
13
|
+
DatasetLoader,
|
|
14
|
+
EvaluationSession,
|
|
15
|
+
EvaluationSessionRequest,
|
|
16
|
+
JsonReportWriter,
|
|
17
|
+
RubricEvaluationEngine,
|
|
18
|
+
RubricSuite,
|
|
19
|
+
discover_rubric_config_path,
|
|
20
|
+
load_jsonl_records,
|
|
21
|
+
load_rubric_configs,
|
|
22
|
+
load_rubric_suite,
|
|
23
|
+
render_json_records,
|
|
24
|
+
render_yaml_items,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PreviewCommand:
|
|
29
|
+
"""Handler for `osmosis preview`."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
*,
|
|
34
|
+
yaml_loader: Callable[[Path], list[ParsedItem]] = load_rubric_configs,
|
|
35
|
+
json_loader: Callable[[Path], list[dict[str, Any]]] = load_jsonl_records,
|
|
36
|
+
):
|
|
37
|
+
self._yaml_loader = yaml_loader
|
|
38
|
+
self._json_loader = json_loader
|
|
39
|
+
|
|
40
|
+
def configure_parser(self, parser: argparse.ArgumentParser) -> None:
|
|
41
|
+
parser.set_defaults(handler=self.run)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"-p",
|
|
44
|
+
"--path",
|
|
45
|
+
dest="path",
|
|
46
|
+
required=True,
|
|
47
|
+
help="Path to the YAML or JSONL file to inspect.",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def run(self, args: argparse.Namespace) -> int:
|
|
51
|
+
path = Path(args.path).expanduser()
|
|
52
|
+
if not path.exists():
|
|
53
|
+
raise CLIError(f"Path '{path}' does not exist.")
|
|
54
|
+
if path.is_dir():
|
|
55
|
+
raise CLIError(f"Expected a file path but got directory '{path}'.")
|
|
56
|
+
|
|
57
|
+
suffix = path.suffix.lower()
|
|
58
|
+
if suffix in {".yaml", ".yml"}:
|
|
59
|
+
items = self._yaml_loader(path)
|
|
60
|
+
print(f"Loaded {len(items)} rubric config(s) from {path}")
|
|
61
|
+
print(render_yaml_items(items, label="Rubric config"))
|
|
62
|
+
elif suffix == ".jsonl":
|
|
63
|
+
records = self._json_loader(path)
|
|
64
|
+
print(f"Loaded {len(records)} JSONL record(s) from {path}")
|
|
65
|
+
print(render_json_records(records))
|
|
66
|
+
else:
|
|
67
|
+
raise CLIError(f"Unsupported file extension '{suffix}'. Expected .yaml, .yml, or .jsonl.")
|
|
68
|
+
|
|
69
|
+
return 0
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class EvalCommand:
|
|
73
|
+
"""Handler for `osmosis eval`."""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
*,
|
|
78
|
+
session: Optional[EvaluationSession] = None,
|
|
79
|
+
config_locator: Callable[[Optional[str], Path], Path] = discover_rubric_config_path,
|
|
80
|
+
suite_loader: Callable[[Path], RubricSuite] = load_rubric_suite,
|
|
81
|
+
dataset_loader: Optional[DatasetLoader] = None,
|
|
82
|
+
engine: Optional[RubricEvaluationEngine] = None,
|
|
83
|
+
renderer: Optional[ConsoleReportRenderer] = None,
|
|
84
|
+
report_writer: Optional[JsonReportWriter] = None,
|
|
85
|
+
baseline_comparator: Optional[BaselineComparator] = None,
|
|
86
|
+
):
|
|
87
|
+
self._renderer = renderer or ConsoleReportRenderer()
|
|
88
|
+
if session is not None:
|
|
89
|
+
self._session = session
|
|
90
|
+
else:
|
|
91
|
+
self._session = EvaluationSession(
|
|
92
|
+
config_locator=config_locator,
|
|
93
|
+
suite_loader=suite_loader,
|
|
94
|
+
dataset_loader=dataset_loader,
|
|
95
|
+
engine=engine,
|
|
96
|
+
baseline_comparator=baseline_comparator,
|
|
97
|
+
report_writer=report_writer,
|
|
98
|
+
identifier_factory=self._generate_output_identifier,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def configure_parser(self, parser: argparse.ArgumentParser) -> None:
|
|
102
|
+
parser.set_defaults(handler=self.run)
|
|
103
|
+
parser.add_argument(
|
|
104
|
+
"-r",
|
|
105
|
+
"--rubric",
|
|
106
|
+
dest="rubric_id",
|
|
107
|
+
required=True,
|
|
108
|
+
help="Rubric identifier declared in the rubric config file.",
|
|
109
|
+
)
|
|
110
|
+
parser.add_argument(
|
|
111
|
+
"-d",
|
|
112
|
+
"--data",
|
|
113
|
+
dest="data_path",
|
|
114
|
+
required=True,
|
|
115
|
+
help="Path to the JSONL file containing evaluation records.",
|
|
116
|
+
)
|
|
117
|
+
parser.add_argument(
|
|
118
|
+
"-n",
|
|
119
|
+
"--number",
|
|
120
|
+
dest="number",
|
|
121
|
+
type=int,
|
|
122
|
+
default=1,
|
|
123
|
+
help="Run the evaluation multiple times to sample provider variance (default: 1).",
|
|
124
|
+
)
|
|
125
|
+
parser.add_argument(
|
|
126
|
+
"-c",
|
|
127
|
+
"--config",
|
|
128
|
+
dest="config_path",
|
|
129
|
+
help="Path to the rubric config YAML (defaults to searching near the data file).",
|
|
130
|
+
)
|
|
131
|
+
parser.add_argument(
|
|
132
|
+
"-o",
|
|
133
|
+
"--output",
|
|
134
|
+
dest="output_path",
|
|
135
|
+
help="Optional path to write evaluation results as JSON.",
|
|
136
|
+
)
|
|
137
|
+
parser.add_argument(
|
|
138
|
+
"-b",
|
|
139
|
+
"--baseline",
|
|
140
|
+
dest="baseline_path",
|
|
141
|
+
help="Optional path to a prior evaluation JSON to compare against.",
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def run(self, args: argparse.Namespace) -> int:
|
|
145
|
+
rubric_id_raw = getattr(args, "rubric_id", "")
|
|
146
|
+
rubric_id = str(rubric_id_raw).strip()
|
|
147
|
+
if not rubric_id:
|
|
148
|
+
raise CLIError("Rubric identifier cannot be empty.")
|
|
149
|
+
|
|
150
|
+
data_path = Path(args.data_path).expanduser()
|
|
151
|
+
config_path_value = getattr(args, "config_path", None)
|
|
152
|
+
output_path_value = getattr(args, "output_path", None)
|
|
153
|
+
baseline_path_value = getattr(args, "baseline_path", None)
|
|
154
|
+
|
|
155
|
+
number_value = getattr(args, "number", None)
|
|
156
|
+
number = int(number_value) if number_value is not None else 1
|
|
157
|
+
|
|
158
|
+
request = EvaluationSessionRequest(
|
|
159
|
+
rubric_id=rubric_id,
|
|
160
|
+
data_path=data_path,
|
|
161
|
+
number=number,
|
|
162
|
+
config_path=Path(config_path_value).expanduser() if config_path_value else None,
|
|
163
|
+
output_path=Path(output_path_value).expanduser() if output_path_value else None,
|
|
164
|
+
baseline_path=Path(baseline_path_value).expanduser() if baseline_path_value else None,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
result = self._session.execute(request)
|
|
169
|
+
except KeyboardInterrupt:
|
|
170
|
+
print("Evaluation cancelled by user.")
|
|
171
|
+
return 1
|
|
172
|
+
self._renderer.render(result.report, result.baseline)
|
|
173
|
+
|
|
174
|
+
if result.written_path is not None:
|
|
175
|
+
print(f"Wrote evaluation results to {result.written_path}")
|
|
176
|
+
|
|
177
|
+
return 0
|
|
178
|
+
|
|
179
|
+
@staticmethod
|
|
180
|
+
def _generate_output_identifier() -> str:
|
|
181
|
+
return str(int(time.time()))
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .config import (
|
|
4
|
+
ParsedItem,
|
|
5
|
+
RubricConfig,
|
|
6
|
+
RubricConfigParser,
|
|
7
|
+
RubricSuite,
|
|
8
|
+
discover_rubric_config_path,
|
|
9
|
+
load_rubric_configs,
|
|
10
|
+
load_rubric_suite,
|
|
11
|
+
render_yaml_items,
|
|
12
|
+
)
|
|
13
|
+
from .dataset import (
|
|
14
|
+
ConversationMessage,
|
|
15
|
+
DatasetLoader,
|
|
16
|
+
DatasetRecord,
|
|
17
|
+
load_jsonl_records,
|
|
18
|
+
render_json_records,
|
|
19
|
+
)
|
|
20
|
+
from .engine import (
|
|
21
|
+
EvaluationRecordResult,
|
|
22
|
+
EvaluationReport,
|
|
23
|
+
EvaluationRun,
|
|
24
|
+
RubricEvaluationEngine,
|
|
25
|
+
RubricEvaluator,
|
|
26
|
+
)
|
|
27
|
+
from .errors import CLIError
|
|
28
|
+
from .reporting import (
|
|
29
|
+
BaselineComparator,
|
|
30
|
+
BaselineStatistics,
|
|
31
|
+
ConsoleReportRenderer,
|
|
32
|
+
JsonReportFormatter,
|
|
33
|
+
JsonReportWriter,
|
|
34
|
+
TextReportFormatter,
|
|
35
|
+
)
|
|
36
|
+
from .session import EvaluationSession, EvaluationSessionRequest, EvaluationSessionResult
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"BaselineComparator",
|
|
40
|
+
"BaselineStatistics",
|
|
41
|
+
"CLIError",
|
|
42
|
+
"ConsoleReportRenderer",
|
|
43
|
+
"ConversationMessage",
|
|
44
|
+
"DatasetLoader",
|
|
45
|
+
"DatasetRecord",
|
|
46
|
+
"EvaluationSession",
|
|
47
|
+
"EvaluationSessionRequest",
|
|
48
|
+
"EvaluationSessionResult",
|
|
49
|
+
"EvaluationRecordResult",
|
|
50
|
+
"EvaluationReport",
|
|
51
|
+
"EvaluationRun",
|
|
52
|
+
"JsonReportFormatter",
|
|
53
|
+
"JsonReportWriter",
|
|
54
|
+
"ParsedItem",
|
|
55
|
+
"RubricConfig",
|
|
56
|
+
"RubricConfigParser",
|
|
57
|
+
"RubricEvaluationEngine",
|
|
58
|
+
"RubricEvaluator",
|
|
59
|
+
"RubricSuite",
|
|
60
|
+
"TextReportFormatter",
|
|
61
|
+
"discover_rubric_config_path",
|
|
62
|
+
"load_jsonl_records",
|
|
63
|
+
"load_rubric_configs",
|
|
64
|
+
"load_rubric_suite",
|
|
65
|
+
"render_json_records",
|
|
66
|
+
"render_yaml_items",
|
|
67
|
+
]
|