eval-protocol 0.2.93.dev2__py3-none-any.whl → 0.2.93.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_protocol/_version.py +3 -3
- eval_protocol/cli.py +20 -0
- eval_protocol/cli_commands/create_rft.py +435 -337
- eval_protocol/cli_commands/local_test.py +65 -56
- eval_protocol/cli_commands/upload.py +18 -455
- eval_protocol/cli_commands/utils.py +511 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/METADATA +1 -1
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/RECORD +12 -11
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/WHEEL +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/entry_points.txt +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/licenses/LICENSE +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import os
|
|
3
|
+
import shlex
|
|
3
4
|
import subprocess
|
|
4
5
|
import sys
|
|
5
|
-
import shlex
|
|
6
6
|
from typing import List
|
|
7
7
|
|
|
8
|
-
from .
|
|
8
|
+
from .utils import _build_entry_point, _discover_and_select_tests
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def _find_dockerfiles(root: str) -> List[str]:
|
|
@@ -19,12 +19,6 @@ def _find_dockerfiles(root: str) -> List[str]:
|
|
|
19
19
|
return dockerfiles
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
def _run_pytest_host(pytest_target: str) -> int:
|
|
23
|
-
print(f"Running locally: pytest {pytest_target} -vs")
|
|
24
|
-
proc = subprocess.run([sys.executable, "-m", "pytest", pytest_target, "-vs"])
|
|
25
|
-
return proc.returncode
|
|
26
|
-
|
|
27
|
-
|
|
28
22
|
def _build_docker_image(dockerfile_path: str, image_tag: str, build_extras: List[str] | None = None) -> bool:
|
|
29
23
|
context_dir = os.path.dirname(dockerfile_path)
|
|
30
24
|
print(f"Building Docker image '{image_tag}' from {dockerfile_path} ...")
|
|
@@ -41,6 +35,13 @@ def _build_docker_image(dockerfile_path: str, image_tag: str, build_extras: List
|
|
|
41
35
|
return False
|
|
42
36
|
|
|
43
37
|
|
|
38
|
+
def _run_pytest_host(pytest_target: str) -> int:
|
|
39
|
+
"""Run pytest against a target on the host and return its exit code."""
|
|
40
|
+
print(f"Running locally: pytest {pytest_target} -vs")
|
|
41
|
+
proc = subprocess.run([sys.executable, "-m", "pytest", pytest_target, "-vs"])
|
|
42
|
+
return proc.returncode
|
|
43
|
+
|
|
44
|
+
|
|
44
45
|
def _run_pytest_in_docker(
|
|
45
46
|
project_root: str, image_tag: str, pytest_target: str, run_extras: List[str] | None = None
|
|
46
47
|
) -> int:
|
|
@@ -87,6 +88,53 @@ def _run_pytest_in_docker(
|
|
|
87
88
|
return 1
|
|
88
89
|
|
|
89
90
|
|
|
91
|
+
def run_evaluator_test(
|
|
92
|
+
project_root: str,
|
|
93
|
+
pytest_target: str,
|
|
94
|
+
ignore_docker: bool,
|
|
95
|
+
docker_build_extra: str = "",
|
|
96
|
+
docker_run_extra: str = "",
|
|
97
|
+
) -> int:
|
|
98
|
+
"""Run an evaluator test either on host or in Docker, reusing local-test logic."""
|
|
99
|
+
build_extras = shlex.split(docker_build_extra) if docker_build_extra else []
|
|
100
|
+
run_extras = shlex.split(docker_run_extra) if docker_run_extra else []
|
|
101
|
+
|
|
102
|
+
if ignore_docker:
|
|
103
|
+
if not pytest_target:
|
|
104
|
+
print("Error: Failed to resolve a pytest target to run.")
|
|
105
|
+
return 1
|
|
106
|
+
return _run_pytest_host(pytest_target)
|
|
107
|
+
|
|
108
|
+
dockerfiles = _find_dockerfiles(project_root)
|
|
109
|
+
if len(dockerfiles) > 1:
|
|
110
|
+
print("Error: Multiple Dockerfiles found. Only one Dockerfile is allowed for evaluator validation/local-test.")
|
|
111
|
+
for df in dockerfiles:
|
|
112
|
+
print(f" - {df}")
|
|
113
|
+
print("Hint: or use --ignore-docker to bypass Docker and use local pytest.")
|
|
114
|
+
return 1
|
|
115
|
+
if len(dockerfiles) == 1:
|
|
116
|
+
# Ensure host home logs directory exists so container writes are visible to host ep logs
|
|
117
|
+
try:
|
|
118
|
+
os.makedirs(os.path.join(os.path.expanduser("~"), ".eval_protocol"), exist_ok=True)
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
image_tag = "ep-evaluator:local"
|
|
122
|
+
ok = _build_docker_image(dockerfiles[0], image_tag, build_extras=build_extras)
|
|
123
|
+
if not ok:
|
|
124
|
+
print("Docker build failed. See logs above.")
|
|
125
|
+
return 1
|
|
126
|
+
if not pytest_target:
|
|
127
|
+
print("Error: Failed to resolve a pytest target to run.")
|
|
128
|
+
return 1
|
|
129
|
+
return _run_pytest_in_docker(project_root, image_tag, pytest_target, run_extras=run_extras)
|
|
130
|
+
|
|
131
|
+
# No Dockerfile: run on host
|
|
132
|
+
if not pytest_target:
|
|
133
|
+
print("Error: Failed to resolve a pytest target to run.")
|
|
134
|
+
return 1
|
|
135
|
+
return _run_pytest_host(pytest_target)
|
|
136
|
+
|
|
137
|
+
|
|
90
138
|
def local_test_command(args: argparse.Namespace) -> int:
|
|
91
139
|
project_root = os.getcwd()
|
|
92
140
|
|
|
@@ -99,12 +147,7 @@ def local_test_command(args: argparse.Namespace) -> int:
|
|
|
99
147
|
file_path = (
|
|
100
148
|
file_part if os.path.isabs(file_part) else os.path.abspath(os.path.join(project_root, file_part))
|
|
101
149
|
)
|
|
102
|
-
|
|
103
|
-
try:
|
|
104
|
-
rel = os.path.relpath(file_path, project_root)
|
|
105
|
-
except Exception:
|
|
106
|
-
rel = file_path
|
|
107
|
-
pytest_target = f"{rel}::{func_part}"
|
|
150
|
+
pytest_target = _build_entry_point(project_root, file_path, func_part)
|
|
108
151
|
else:
|
|
109
152
|
file_path = entry if os.path.isabs(entry) else os.path.abspath(os.path.join(project_root, entry))
|
|
110
153
|
# Use path relative to project_root when possible
|
|
@@ -114,14 +157,9 @@ def local_test_command(args: argparse.Namespace) -> int:
|
|
|
114
157
|
rel = file_path
|
|
115
158
|
pytest_target = rel
|
|
116
159
|
else:
|
|
117
|
-
tests = _discover_tests(project_root)
|
|
118
|
-
if not tests:
|
|
119
|
-
print("No evaluation tests found.\nHint: Ensure @evaluation_test is applied.")
|
|
120
|
-
return 1
|
|
121
160
|
non_interactive = bool(getattr(args, "yes", False))
|
|
122
|
-
selected =
|
|
161
|
+
selected = _discover_and_select_tests(project_root, non_interactive=non_interactive)
|
|
123
162
|
if not selected:
|
|
124
|
-
print("No tests selected.")
|
|
125
163
|
return 1
|
|
126
164
|
if len(selected) != 1:
|
|
127
165
|
print("Error: Please select exactly one evaluation test for 'local-test'.")
|
|
@@ -137,39 +175,10 @@ def local_test_command(args: argparse.Namespace) -> int:
|
|
|
137
175
|
ignore_docker = bool(getattr(args, "ignore_docker", False))
|
|
138
176
|
build_extras_str = getattr(args, "docker_build_extra", "") or ""
|
|
139
177
|
run_extras_str = getattr(args, "docker_run_extra", "") or ""
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
dockerfiles = _find_dockerfiles(project_root)
|
|
149
|
-
if len(dockerfiles) > 1:
|
|
150
|
-
print("Error: Multiple Dockerfiles found. Only one Dockerfile is allowed for local-test.")
|
|
151
|
-
for df in dockerfiles:
|
|
152
|
-
print(f" - {df}")
|
|
153
|
-
print("Hint: use --ignore-docker to bypass Docker.")
|
|
154
|
-
return 1
|
|
155
|
-
if len(dockerfiles) == 1:
|
|
156
|
-
# Ensure host home logs directory exists so container writes are visible to host ep logs
|
|
157
|
-
try:
|
|
158
|
-
os.makedirs(os.path.join(os.path.expanduser("~"), ".eval_protocol"), exist_ok=True)
|
|
159
|
-
except Exception:
|
|
160
|
-
pass
|
|
161
|
-
image_tag = "ep-evaluator:local"
|
|
162
|
-
ok = _build_docker_image(dockerfiles[0], image_tag, build_extras=build_extras)
|
|
163
|
-
if not ok:
|
|
164
|
-
print("Docker build failed. See logs above.")
|
|
165
|
-
return 1
|
|
166
|
-
if not pytest_target:
|
|
167
|
-
print("Error: Failed to resolve a pytest target to run.")
|
|
168
|
-
return 1
|
|
169
|
-
return _run_pytest_in_docker(project_root, image_tag, pytest_target, run_extras=run_extras)
|
|
170
|
-
|
|
171
|
-
# No Dockerfile: run on host
|
|
172
|
-
if not pytest_target:
|
|
173
|
-
print("Error: Failed to resolve a pytest target to run.")
|
|
174
|
-
return 1
|
|
175
|
-
return _run_pytest_host(pytest_target)
|
|
178
|
+
return run_evaluator_test(
|
|
179
|
+
project_root,
|
|
180
|
+
pytest_target,
|
|
181
|
+
ignore_docker=ignore_docker,
|
|
182
|
+
docker_build_extra=build_extras_str,
|
|
183
|
+
docker_run_extra=run_extras_str,
|
|
184
|
+
)
|