rakam-eval-sdk 0.1.15__py3-none-any.whl → 0.1.16rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rakam_eval_sdk/cli.py +119 -0
- rakam_eval_sdk/client.py +22 -9
- rakam_eval_sdk/decorators.py +44 -0
- rakam_eval_sdk/schema.py +4 -3
- rakam_eval_sdk/utils/decorator_utils.py +69 -0
- {rakam_eval_sdk-0.1.15.dist-info → rakam_eval_sdk-0.1.16rc1.dist-info}/METADATA +3 -1
- rakam_eval_sdk-0.1.16rc1.dist-info/RECORD +10 -0
- rakam_eval_sdk-0.1.16rc1.dist-info/entry_points.txt +3 -0
- rakam_eval_sdk-0.1.15.dist-info/RECORD +0 -6
- {rakam_eval_sdk-0.1.15.dist-info → rakam_eval_sdk-0.1.16rc1.dist-info}/WHEEL +0 -0
rakam_eval_sdk/cli.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# cli.py
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import typer
|
|
5
|
+
|
|
6
|
+
from rakam_eval_sdk.utils.decorator_utils import find_decorated_functions, load_module_from_path
|
|
7
|
+
from rakam_eval_sdk.decorators import eval_run
|
|
8
|
+
app = typer.Typer(help="CLI tools for evaluation utilities")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@app.command()
|
|
12
|
+
def find_eval_run_by_name(
|
|
13
|
+
directory: Path = typer.Argument(
|
|
14
|
+
Path("./eval"),
|
|
15
|
+
exists=True,
|
|
16
|
+
file_okay=False,
|
|
17
|
+
dir_okay=True,
|
|
18
|
+
help="Directory to scan (default: ./eval)",
|
|
19
|
+
),
|
|
20
|
+
recursive: bool = typer.Option(
|
|
21
|
+
False,
|
|
22
|
+
"--recursive",
|
|
23
|
+
"-r",
|
|
24
|
+
help="Recursively search for Python files",
|
|
25
|
+
),
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Find functions decorated with @track.
|
|
29
|
+
"""
|
|
30
|
+
TARGET_DECORATOR = eval_run.__name__
|
|
31
|
+
files = (
|
|
32
|
+
directory.rglob("*.py")
|
|
33
|
+
if recursive
|
|
34
|
+
else directory.glob("*.py")
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
found = False
|
|
38
|
+
|
|
39
|
+
for file in sorted(files):
|
|
40
|
+
functions = find_decorated_functions(file, TARGET_DECORATOR)
|
|
41
|
+
for fn in functions:
|
|
42
|
+
found = True
|
|
43
|
+
typer.echo(f"{file}:{fn}")
|
|
44
|
+
|
|
45
|
+
if not found:
|
|
46
|
+
typer.echo(f"No @{TARGET_DECORATOR} functions found.")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@app.command("run")
|
|
50
|
+
def run_eval_runs(
|
|
51
|
+
directory: Path = typer.Argument(
|
|
52
|
+
Path("./eval"),
|
|
53
|
+
exists=True,
|
|
54
|
+
file_okay=False,
|
|
55
|
+
dir_okay=True,
|
|
56
|
+
help="Directory to scan (default: ./eval)",
|
|
57
|
+
),
|
|
58
|
+
recursive: bool = typer.Option(
|
|
59
|
+
False,
|
|
60
|
+
"-r",
|
|
61
|
+
"--recursive",
|
|
62
|
+
help="Recursively search for Python files",
|
|
63
|
+
),
|
|
64
|
+
dry_run: bool = typer.Option(
|
|
65
|
+
False,
|
|
66
|
+
"--dry-run",
|
|
67
|
+
help="Only list functions without executing them",
|
|
68
|
+
),
|
|
69
|
+
):
|
|
70
|
+
"""
|
|
71
|
+
Find and execute all functions decorated with @eval_run.
|
|
72
|
+
"""
|
|
73
|
+
files = (
|
|
74
|
+
directory.rglob("*.py")
|
|
75
|
+
if recursive
|
|
76
|
+
else directory.glob("*.py")
|
|
77
|
+
)
|
|
78
|
+
TARGET_DECORATOR = eval_run.__name__
|
|
79
|
+
|
|
80
|
+
executed_any = False
|
|
81
|
+
|
|
82
|
+
for file in sorted(files):
|
|
83
|
+
functions = find_decorated_functions(file, TARGET_DECORATOR)
|
|
84
|
+
if not functions:
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
typer.echo(f"\n📄 {file}")
|
|
88
|
+
|
|
89
|
+
module = None
|
|
90
|
+
if not dry_run:
|
|
91
|
+
try:
|
|
92
|
+
module = load_module_from_path(file)
|
|
93
|
+
except Exception as e:
|
|
94
|
+
typer.echo(f" ❌ Failed to import module: {e}")
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
for fn_name in functions:
|
|
98
|
+
typer.echo(f" ▶ {fn_name}")
|
|
99
|
+
|
|
100
|
+
if dry_run:
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
func = getattr(module, fn_name)
|
|
105
|
+
func() # <-- actual execution
|
|
106
|
+
executed_any = True
|
|
107
|
+
except Exception as e:
|
|
108
|
+
typer.echo(f" ❌ Execution failed: {e}")
|
|
109
|
+
|
|
110
|
+
if not executed_any and not dry_run:
|
|
111
|
+
typer.echo("\nNo @eval_run functions executed.")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def main():
|
|
115
|
+
app()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
if __name__ == "__main__":
|
|
119
|
+
main()
|
rakam_eval_sdk/client.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import random
|
|
3
|
-
from typing import Any, List, Optional, cast
|
|
3
|
+
from typing import Any, List, Optional, Union, cast
|
|
4
|
+
|
|
4
5
|
import requests
|
|
5
6
|
|
|
6
7
|
from .schema import (
|
|
@@ -79,10 +80,11 @@ class DeepEvalClient:
|
|
|
79
80
|
metrics: List[MetricConfig],
|
|
80
81
|
raise_exception: bool = False,
|
|
81
82
|
component: str = "unknown",
|
|
83
|
+
version: Union[str, None] = None,
|
|
82
84
|
) -> Optional[dict]:
|
|
83
85
|
"""Run synchronous text evaluation."""
|
|
84
86
|
payload = EvalConfig.model_construct(
|
|
85
|
-
data=data, metrics=metrics, component=component
|
|
87
|
+
data=data, metrics=metrics, component=component, version=version
|
|
86
88
|
).model_dump()
|
|
87
89
|
return self._request("/deepeval/text-eval", payload, raise_exception)
|
|
88
90
|
|
|
@@ -92,10 +94,11 @@ class DeepEvalClient:
|
|
|
92
94
|
metrics: List[MetricConfig],
|
|
93
95
|
raise_exception: bool = False,
|
|
94
96
|
component: str = "unknown",
|
|
97
|
+
version: Union[str, None] = None,
|
|
95
98
|
) -> Optional[dict]:
|
|
96
99
|
"""Run background text evaluation (async job)."""
|
|
97
100
|
payload = EvalConfig.model_construct(
|
|
98
|
-
data=data, metrics=metrics, component=component
|
|
101
|
+
data=data, metrics=metrics, component=component, version=version
|
|
99
102
|
).model_dump()
|
|
100
103
|
return self._request("/deepeval/text-eval/background", payload, raise_exception)
|
|
101
104
|
|
|
@@ -105,10 +108,11 @@ class DeepEvalClient:
|
|
|
105
108
|
metrics: List[SchemaMetricConfig],
|
|
106
109
|
raise_exception: bool = False,
|
|
107
110
|
component: str = "unknown",
|
|
111
|
+
version: Union[str, None] = None,
|
|
108
112
|
) -> Optional[dict]:
|
|
109
113
|
"""Run synchronous schema evaluation."""
|
|
110
114
|
payload = SchemaEvalConfig.model_construct(
|
|
111
|
-
data=data, metrics=metrics, component=component
|
|
115
|
+
data=data, metrics=metrics, component=component, version=version
|
|
112
116
|
).model_dump()
|
|
113
117
|
return self._request("/deepeval/schema-eval", payload, raise_exception)
|
|
114
118
|
|
|
@@ -118,10 +122,11 @@ class DeepEvalClient:
|
|
|
118
122
|
metrics: List[SchemaMetricConfig],
|
|
119
123
|
raise_exception: bool = False,
|
|
120
124
|
component: str = "unknown",
|
|
125
|
+
version: Union[str, None] = None,
|
|
121
126
|
) -> Optional[dict]:
|
|
122
127
|
"""Run background schema evaluation (async job)."""
|
|
123
128
|
payload = SchemaEvalConfig.model_construct(
|
|
124
|
-
data=data, metrics=metrics, component=component
|
|
129
|
+
data=data, metrics=metrics, component=component, version=version
|
|
125
130
|
).model_dump()
|
|
126
131
|
return self._request(
|
|
127
132
|
"/deepeval/schema-eval/background", payload, raise_exception
|
|
@@ -134,11 +139,14 @@ class DeepEvalClient:
|
|
|
134
139
|
chance: float,
|
|
135
140
|
raise_exception: bool = False,
|
|
136
141
|
component: str = "unknown",
|
|
142
|
+
version: Union[str, None] = None,
|
|
137
143
|
) -> Optional[dict]:
|
|
138
144
|
"""Randomly run text_eval based on a probability between 0 and 1."""
|
|
139
145
|
self._validate_chance(chance)
|
|
140
146
|
return (
|
|
141
|
-
self.text_eval(
|
|
147
|
+
self.text_eval(
|
|
148
|
+
data, metrics, raise_exception, component=component, version=version
|
|
149
|
+
)
|
|
142
150
|
if random.random() <= chance
|
|
143
151
|
else None
|
|
144
152
|
)
|
|
@@ -150,12 +158,13 @@ class DeepEvalClient:
|
|
|
150
158
|
chance: float,
|
|
151
159
|
raise_exception: bool = False,
|
|
152
160
|
component: str = "unknown",
|
|
161
|
+
version: Union[str, None] = None,
|
|
153
162
|
) -> Optional[dict]:
|
|
154
163
|
"""Randomly run text_eval_background based on a probability between 0 and 1."""
|
|
155
164
|
self._validate_chance(chance)
|
|
156
165
|
return (
|
|
157
166
|
self.text_eval_background(
|
|
158
|
-
data, metrics, raise_exception, component=component
|
|
167
|
+
data, metrics, raise_exception, component=component, version=version
|
|
159
168
|
)
|
|
160
169
|
if random.random() <= chance
|
|
161
170
|
else None
|
|
@@ -168,11 +177,14 @@ class DeepEvalClient:
|
|
|
168
177
|
chance: float,
|
|
169
178
|
raise_exception: bool = False,
|
|
170
179
|
component: str = "unknown",
|
|
180
|
+
version: Union[str, None] = None,
|
|
171
181
|
) -> Optional[dict]:
|
|
172
182
|
"""Randomly run schema_eval based on a probability between 0 and 1."""
|
|
173
183
|
self._validate_chance(chance)
|
|
174
184
|
return (
|
|
175
|
-
self.schema_eval(
|
|
185
|
+
self.schema_eval(
|
|
186
|
+
data, metrics, raise_exception, component=component, version=version
|
|
187
|
+
)
|
|
176
188
|
if random.random() <= chance
|
|
177
189
|
else None
|
|
178
190
|
)
|
|
@@ -184,12 +196,13 @@ class DeepEvalClient:
|
|
|
184
196
|
chance: float,
|
|
185
197
|
raise_exception: bool = False,
|
|
186
198
|
component: str = "unknown",
|
|
199
|
+
version: Union[str, None] = None,
|
|
187
200
|
) -> Optional[dict]:
|
|
188
201
|
"""Randomly run text_eval_background based on a probability between 0 and 1."""
|
|
189
202
|
self._validate_chance(chance)
|
|
190
203
|
return (
|
|
191
204
|
self.schema_eval_background(
|
|
192
|
-
data, metrics, raise_exception, component=component
|
|
205
|
+
data, metrics, raise_exception, component=component, version=version
|
|
193
206
|
)
|
|
194
207
|
if random.random() <= chance
|
|
195
208
|
else None
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
|
|
2
|
+
import time
|
|
3
|
+
import os
|
|
4
|
+
import psutil
|
|
5
|
+
import functools
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def eval_run(*dargs, **dkwargs):
|
|
9
|
+
def wrapper(func):
|
|
10
|
+
@functools.wraps(func)
|
|
11
|
+
def inner(*args, **kwargs):
|
|
12
|
+
process = psutil.Process(os.getpid())
|
|
13
|
+
|
|
14
|
+
# Start metrics
|
|
15
|
+
start_time = time.perf_counter()
|
|
16
|
+
start_cpu = process.cpu_times()
|
|
17
|
+
start_mem = process.memory_info().rss
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
result = func(*args, **kwargs)
|
|
21
|
+
return result
|
|
22
|
+
finally:
|
|
23
|
+
# End metrics
|
|
24
|
+
end_time = time.perf_counter()
|
|
25
|
+
end_cpu = process.cpu_times()
|
|
26
|
+
end_mem = process.memory_info().rss
|
|
27
|
+
|
|
28
|
+
elapsed = end_time - start_time
|
|
29
|
+
cpu_used = (
|
|
30
|
+
(end_cpu.user + end_cpu.system)
|
|
31
|
+
- (start_cpu.user + start_cpu.system)
|
|
32
|
+
)
|
|
33
|
+
mem_diff_mb = (end_mem - start_mem) / (1024 * 1024)
|
|
34
|
+
|
|
35
|
+
print(
|
|
36
|
+
f"[eval_run] {func.__module__}.{func.__name__} | "
|
|
37
|
+
f"time={elapsed:.4f}s | "
|
|
38
|
+
f"cpu={cpu_used:.4f}s | "
|
|
39
|
+
f"mem_delta={mem_diff_mb:.2f}MB"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
return inner
|
|
43
|
+
|
|
44
|
+
return wrapper
|
rakam_eval_sdk/schema.py
CHANGED
|
@@ -39,7 +39,7 @@ class CorrectnessConfig(MetricConfigBase):
|
|
|
39
39
|
"Minor formatting differences like '$1,250.00' vs '$1250.00' are acceptable."
|
|
40
40
|
]
|
|
41
41
|
)
|
|
42
|
-
criteria: Optional[str] = None,
|
|
42
|
+
criteria: Optional[str] = (None,)
|
|
43
43
|
params: List[Literal["actual_output", "expected_output"]] = Field(
|
|
44
44
|
default=["actual_output", "expected_output"]
|
|
45
45
|
)
|
|
@@ -94,8 +94,7 @@ MetricConfig = Annotated[
|
|
|
94
94
|
]
|
|
95
95
|
|
|
96
96
|
SchemaMetricConfig = Annotated[
|
|
97
|
-
Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(
|
|
98
|
-
discriminator="type")
|
|
97
|
+
Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(discriminator="type")
|
|
99
98
|
]
|
|
100
99
|
|
|
101
100
|
|
|
@@ -118,11 +117,13 @@ class SchemaInputItem(InputItem):
|
|
|
118
117
|
|
|
119
118
|
class EvalConfig(BaseModel):
|
|
120
119
|
component: str = "unknown"
|
|
120
|
+
version: Union[str, None] = None
|
|
121
121
|
data: List[TextInputItem]
|
|
122
122
|
metrics: List[MetricConfig] = Field(default_factory=list)
|
|
123
123
|
|
|
124
124
|
|
|
125
125
|
class SchemaEvalConfig(BaseModel):
|
|
126
126
|
component: str = "unknown"
|
|
127
|
+
version: Union[str, None] = None
|
|
127
128
|
data: List[SchemaInputItem]
|
|
128
129
|
metrics: List[SchemaMetricConfig] = Field(default_factory=list)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import importlib
|
|
3
|
+
import importlib.util
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from types import ModuleType
|
|
7
|
+
from typing import Callable, Iterable, List, Tuple
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DecoratedFunctionVisitor(ast.NodeVisitor):
|
|
11
|
+
def __init__(self, decorator_name: str):
|
|
12
|
+
self.decorator_name = decorator_name
|
|
13
|
+
self.results: List[str] = []
|
|
14
|
+
|
|
15
|
+
def visit_FunctionDef(self, node: ast.FunctionDef):
|
|
16
|
+
for deco in node.decorator_list:
|
|
17
|
+
if self._matches(deco):
|
|
18
|
+
self.results.append(node.name)
|
|
19
|
+
self.generic_visit(node)
|
|
20
|
+
|
|
21
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
|
|
22
|
+
for deco in node.decorator_list:
|
|
23
|
+
if self._matches(deco):
|
|
24
|
+
self.results.append(node.name)
|
|
25
|
+
self.generic_visit(node)
|
|
26
|
+
|
|
27
|
+
def _matches(self, deco: ast.expr) -> bool:
|
|
28
|
+
# @deco
|
|
29
|
+
if isinstance(deco, ast.Name):
|
|
30
|
+
return deco.id == self.decorator_name
|
|
31
|
+
|
|
32
|
+
# @module.deco
|
|
33
|
+
if isinstance(deco, ast.Attribute):
|
|
34
|
+
return deco.attr == self.decorator_name
|
|
35
|
+
|
|
36
|
+
# @deco(...)
|
|
37
|
+
if isinstance(deco, ast.Call):
|
|
38
|
+
return self._matches(deco.func)
|
|
39
|
+
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def find_decorated_functions(
|
|
44
|
+
file_path: Path,
|
|
45
|
+
decorator_name: str,
|
|
46
|
+
) -> List[str]:
|
|
47
|
+
tree = ast.parse(file_path.read_text(encoding="utf-8"))
|
|
48
|
+
visitor = DecoratedFunctionVisitor(decorator_name)
|
|
49
|
+
visitor.visit(tree)
|
|
50
|
+
return visitor.results
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def load_module_from_path(file_path: Path) -> ModuleType:
|
|
54
|
+
spec = importlib.util.spec_from_file_location(file_path.stem, file_path)
|
|
55
|
+
if spec is None or spec.loader is None:
|
|
56
|
+
raise ImportError(f"Cannot import {file_path}")
|
|
57
|
+
module = importlib.util.module_from_spec(spec)
|
|
58
|
+
spec.loader.exec_module(module)
|
|
59
|
+
return module
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_function(module: ModuleType, function_name: str) -> Callable:
|
|
66
|
+
func = getattr(module, function_name, None)
|
|
67
|
+
if func is None:
|
|
68
|
+
raise AttributeError(f"{function_name} not found in {module.__name__}")
|
|
69
|
+
return func
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: rakam-eval-sdk
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.16rc1
|
|
4
4
|
Summary: Evaluation Framework SDK
|
|
5
5
|
Author: Mohamed Bachar Touil
|
|
6
6
|
License: MIT
|
|
7
|
+
Requires-Dist: psutil>=7.2.1
|
|
7
8
|
Requires-Dist: pydantic>=2.10.6
|
|
8
9
|
Requires-Dist: requests
|
|
10
|
+
Requires-Dist: typer>=0.20.1
|
|
9
11
|
Requires-Python: >=3.8
|
|
10
12
|
Description-Content-Type: text/markdown
|
|
11
13
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
rakam_eval_sdk/cli.py,sha256=9BHZte3cS1LWL0_dOVEtws9xIhdw0yORW93Dm1uDxDw,2876
|
|
3
|
+
rakam_eval_sdk/client.py,sha256=q-Y11maLVKaEnq4OSyFCqrP3JgFS1xpyp9-bZhFssIA,7123
|
|
4
|
+
rakam_eval_sdk/decorators.py,sha256=ZEcZb2KUsPrtx-Guc7tYN9MVCMxIQ83yhiJxKE1fjdw,1262
|
|
5
|
+
rakam_eval_sdk/schema.py,sha256=MQfF0SEHf2wzeXJNTsMs-yDbN0vZJQbN_crfpPXsTk8,3467
|
|
6
|
+
rakam_eval_sdk/utils/decorator_utils.py,sha256=hCC4F7v3KjGSDt2NUXfDsbBTMPzlG6wMzZVdR_wWn14,2048
|
|
7
|
+
rakam_eval_sdk-0.1.16rc1.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
8
|
+
rakam_eval_sdk-0.1.16rc1.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
|
|
9
|
+
rakam_eval_sdk-0.1.16rc1.dist-info/METADATA,sha256=DRKzVNNF426R3ipnpG8Xr5LXKLTY4Ar9WdPIxe6hjzI,5991
|
|
10
|
+
rakam_eval_sdk-0.1.16rc1.dist-info/RECORD,,
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
rakam_eval_sdk/client.py,sha256=EdYA8SFoq6PhO6JNxu_j2eJSd3g4I0rtUtGJmGgvfzA,6583
|
|
3
|
-
rakam_eval_sdk/schema.py,sha256=FaY7nlcbzlFhH7lZl9iFfJ6T0wGVte7TYbt-w_wpFuI,3400
|
|
4
|
-
rakam_eval_sdk-0.1.15.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
5
|
-
rakam_eval_sdk-0.1.15.dist-info/METADATA,sha256=PhyFhXFiTeCt2KK_kBjGGXDI69q8qFmyg-aEiKh16OQ,5930
|
|
6
|
-
rakam_eval_sdk-0.1.15.dist-info/RECORD,,
|
|
File without changes
|