eval-protocol 0.2.93.dev2__py3-none-any.whl → 0.2.93.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_protocol/_version.py +3 -3
- eval_protocol/cli.py +20 -0
- eval_protocol/cli_commands/create_rft.py +435 -337
- eval_protocol/cli_commands/local_test.py +65 -56
- eval_protocol/cli_commands/upload.py +18 -455
- eval_protocol/cli_commands/utils.py +511 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/METADATA +1 -1
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/RECORD +12 -11
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/WHEEL +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/entry_points.txt +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/licenses/LICENSE +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,511 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import time
|
|
4
|
+
import inspect
|
|
5
|
+
import subprocess
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
from ..auth import (
|
|
13
|
+
get_fireworks_account_id,
|
|
14
|
+
get_fireworks_api_base,
|
|
15
|
+
get_fireworks_api_key,
|
|
16
|
+
verify_api_key_and_get_account_id,
|
|
17
|
+
)
|
|
18
|
+
from ..fireworks_rft import _map_api_host_to_app_host
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class DiscoveredTest:
|
|
23
|
+
module_path: str
|
|
24
|
+
module_name: str
|
|
25
|
+
qualname: str
|
|
26
|
+
file_path: str
|
|
27
|
+
lineno: int | None
|
|
28
|
+
has_parametrize: bool
|
|
29
|
+
param_count: int
|
|
30
|
+
nodeids: List[str]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _is_eval_protocol_test(obj: Any) -> bool:
|
|
34
|
+
"""Return True if the given object looks like an eval_protocol evaluation test."""
|
|
35
|
+
# evaluation_test decorator returns a dual_mode_wrapper with _origin_func and pytest marks
|
|
36
|
+
if not callable(obj):
|
|
37
|
+
return False
|
|
38
|
+
origin = getattr(obj, "_origin_func", None)
|
|
39
|
+
if origin is None:
|
|
40
|
+
return False
|
|
41
|
+
# Must have pytest marks from evaluation_test
|
|
42
|
+
marks = getattr(obj, "pytestmark", [])
|
|
43
|
+
# Handle pytest proxy objects (APIRemovedInV1Proxy)
|
|
44
|
+
if not isinstance(marks, (list, tuple)):
|
|
45
|
+
try:
|
|
46
|
+
marks = list(marks) if marks else []
|
|
47
|
+
except (TypeError, AttributeError):
|
|
48
|
+
return False
|
|
49
|
+
return len(marks) > 0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _extract_param_info_from_marks(obj: Any) -> tuple[bool, int, list[str]]:
|
|
53
|
+
"""Extract parametrization info from pytest marks.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
(has_parametrize, param_count, param_ids)
|
|
57
|
+
"""
|
|
58
|
+
marks = getattr(obj, "pytestmark", [])
|
|
59
|
+
|
|
60
|
+
# Handle pytest proxy objects (APIRemovedInV1Proxy) - same as _is_eval_protocol_test
|
|
61
|
+
if not isinstance(marks, (list, tuple)):
|
|
62
|
+
try:
|
|
63
|
+
marks = list(marks) if marks else []
|
|
64
|
+
except (TypeError, AttributeError):
|
|
65
|
+
marks = []
|
|
66
|
+
|
|
67
|
+
has_parametrize = False
|
|
68
|
+
total_combinations = 0
|
|
69
|
+
all_param_ids: list[str] = []
|
|
70
|
+
|
|
71
|
+
for m in marks:
|
|
72
|
+
if getattr(m, "name", "") == "parametrize":
|
|
73
|
+
has_parametrize = True
|
|
74
|
+
# The data is in kwargs for eval_protocol's parametrization
|
|
75
|
+
kwargs = getattr(m, "kwargs", {})
|
|
76
|
+
argnames = kwargs.get("argnames", m.args[0] if m.args else "")
|
|
77
|
+
argvalues = kwargs.get("argvalues", m.args[1] if len(m.args) > 1 else [])
|
|
78
|
+
ids = kwargs.get("ids", [])
|
|
79
|
+
|
|
80
|
+
# Count this dimension of parameters
|
|
81
|
+
if isinstance(argvalues, (list, tuple)):
|
|
82
|
+
count = len(argvalues)
|
|
83
|
+
total_combinations = count # For now, just use the count from this mark
|
|
84
|
+
|
|
85
|
+
# Use provided IDs
|
|
86
|
+
if ids and isinstance(ids, (list, tuple)):
|
|
87
|
+
all_param_ids = list(ids[:count])
|
|
88
|
+
else:
|
|
89
|
+
# Generate IDs based on argnames
|
|
90
|
+
if isinstance(argnames, str) and "," not in argnames:
|
|
91
|
+
# Single parameter
|
|
92
|
+
all_param_ids = [f"{argnames}={i}" for i in range(count)]
|
|
93
|
+
else:
|
|
94
|
+
# Multiple parameters
|
|
95
|
+
all_param_ids = [f"variant_{i}" for i in range(count)]
|
|
96
|
+
|
|
97
|
+
return has_parametrize, total_combinations, all_param_ids
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _discover_tests(root: str) -> list[DiscoveredTest]:
|
|
101
|
+
"""Discover eval_protocol tests under the given root directory."""
|
|
102
|
+
abs_root = os.path.abspath(root)
|
|
103
|
+
if abs_root not in sys.path:
|
|
104
|
+
sys.path.insert(0, abs_root)
|
|
105
|
+
|
|
106
|
+
discovered: list[DiscoveredTest] = []
|
|
107
|
+
|
|
108
|
+
class CollectionPlugin:
|
|
109
|
+
"""Plugin to capture collected items without running code."""
|
|
110
|
+
|
|
111
|
+
def __init__(self) -> None:
|
|
112
|
+
self.items: list[Any] = []
|
|
113
|
+
|
|
114
|
+
def pytest_ignore_collect(self, collection_path, config): # type: ignore[override]
|
|
115
|
+
"""Ignore problematic files before pytest tries to import them."""
|
|
116
|
+
# Ignore specific files
|
|
117
|
+
ignored_files = ["setup.py", "versioneer.py", "conf.py", "__main__.py"]
|
|
118
|
+
if collection_path.name in ignored_files:
|
|
119
|
+
return True
|
|
120
|
+
|
|
121
|
+
# Ignore hidden files (starting with .)
|
|
122
|
+
if collection_path.name.startswith("."):
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
# Ignore test_discovery files
|
|
126
|
+
if collection_path.name.startswith("test_discovery"):
|
|
127
|
+
return True
|
|
128
|
+
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
def pytest_collection_modifyitems(self, items): # type: ignore[override]
|
|
132
|
+
"""Hook called after collection is done."""
|
|
133
|
+
self.items = items
|
|
134
|
+
|
|
135
|
+
plugin = CollectionPlugin()
|
|
136
|
+
|
|
137
|
+
# Run pytest collection only (--collect-only prevents code execution)
|
|
138
|
+
# Override python_files to collect from ANY .py file
|
|
139
|
+
args = [
|
|
140
|
+
abs_root,
|
|
141
|
+
"--collect-only",
|
|
142
|
+
"-q",
|
|
143
|
+
"--pythonwarnings=ignore",
|
|
144
|
+
"-o",
|
|
145
|
+
"python_files=*.py", # Override to collect all .py files
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
# Suppress pytest output
|
|
150
|
+
import io
|
|
151
|
+
import contextlib
|
|
152
|
+
|
|
153
|
+
with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
|
|
154
|
+
pytest.main(args, plugins=[plugin])
|
|
155
|
+
except Exception:
|
|
156
|
+
# If pytest collection fails, fall back to empty list
|
|
157
|
+
return []
|
|
158
|
+
|
|
159
|
+
# Process collected items
|
|
160
|
+
for item in plugin.items:
|
|
161
|
+
if not hasattr(item, "obj"):
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
obj = item.obj
|
|
165
|
+
if not _is_eval_protocol_test(obj):
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
origin = getattr(obj, "_origin_func", obj)
|
|
169
|
+
try:
|
|
170
|
+
src_file = inspect.getsourcefile(origin) or str(item.path)
|
|
171
|
+
_, lineno = inspect.getsourcelines(origin)
|
|
172
|
+
except Exception:
|
|
173
|
+
src_file, lineno = str(item.path), None
|
|
174
|
+
|
|
175
|
+
# Extract parametrization info from marks
|
|
176
|
+
has_parametrize, param_count, param_ids = _extract_param_info_from_marks(obj)
|
|
177
|
+
|
|
178
|
+
# Get module name and function name
|
|
179
|
+
module_name = (
|
|
180
|
+
item.module.__name__ # type: ignore[attr-defined]
|
|
181
|
+
if hasattr(item, "module")
|
|
182
|
+
else item.nodeid.split("::")[0].replace("/", ".").replace(".py", "")
|
|
183
|
+
)
|
|
184
|
+
func_name = item.name.split("[")[0] if "[" in item.name else item.name
|
|
185
|
+
|
|
186
|
+
# Generate nodeids
|
|
187
|
+
base_nodeid = f"{os.path.basename(src_file)}::{func_name}"
|
|
188
|
+
if param_ids:
|
|
189
|
+
nodeids = [f"{base_nodeid}[{pid}]" for pid in param_ids]
|
|
190
|
+
else:
|
|
191
|
+
nodeids = [base_nodeid]
|
|
192
|
+
|
|
193
|
+
discovered.append(
|
|
194
|
+
DiscoveredTest(
|
|
195
|
+
module_path=module_name,
|
|
196
|
+
module_name=module_name,
|
|
197
|
+
qualname=f"{module_name}.{func_name}",
|
|
198
|
+
file_path=os.path.abspath(src_file),
|
|
199
|
+
lineno=lineno,
|
|
200
|
+
has_parametrize=has_parametrize,
|
|
201
|
+
param_count=param_count,
|
|
202
|
+
nodeids=nodeids,
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Deduplicate by qualname (in case same test appears multiple times)
|
|
207
|
+
by_qual: dict[str, DiscoveredTest] = {}
|
|
208
|
+
for t in discovered:
|
|
209
|
+
existing = by_qual.get(t.qualname)
|
|
210
|
+
if not existing or t.param_count > existing.param_count:
|
|
211
|
+
by_qual[t.qualname] = t
|
|
212
|
+
return sorted(by_qual.values(), key=lambda x: (x.file_path, x.lineno or 0))
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _format_test_choice(test: DiscoveredTest, idx: int) -> str:
|
|
216
|
+
"""Format a test as a choice string for display."""
|
|
217
|
+
# Shorten the qualname for display
|
|
218
|
+
name = test.qualname.split(".")[-1]
|
|
219
|
+
location = f"{Path(test.file_path).name}:{test.lineno}" if test.lineno else Path(test.file_path).name
|
|
220
|
+
|
|
221
|
+
if test.has_parametrize and test.param_count > 1:
|
|
222
|
+
return f"{name} ({test.param_count} variants) - {location}"
|
|
223
|
+
else:
|
|
224
|
+
return f"{name} - {location}"
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _prompt_select_interactive(tests: list[DiscoveredTest]) -> list[DiscoveredTest]:
|
|
228
|
+
"""Interactive selection with arrow keys using questionary."""
|
|
229
|
+
try:
|
|
230
|
+
import questionary
|
|
231
|
+
from questionary import Style
|
|
232
|
+
|
|
233
|
+
# Custom style similar to Vercel CLI
|
|
234
|
+
custom_style = Style(
|
|
235
|
+
[
|
|
236
|
+
("qmark", "fg:#673ab7 bold"),
|
|
237
|
+
("question", "bold"),
|
|
238
|
+
("answer", "fg:#f44336 bold"),
|
|
239
|
+
("pointer", "fg:#673ab7 bold"),
|
|
240
|
+
("highlighted", "fg:#673ab7 bold"),
|
|
241
|
+
("selected", "fg:#cc5454"),
|
|
242
|
+
("separator", "fg:#cc5454"),
|
|
243
|
+
("instruction", ""),
|
|
244
|
+
("text", ""),
|
|
245
|
+
]
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# Check if only one test - auto-select it
|
|
249
|
+
if len(tests) == 1:
|
|
250
|
+
print(f"\nFound 1 test: {_format_test_choice(tests[0], 1)}")
|
|
251
|
+
confirm = questionary.confirm("Select this test?", default=True, style=custom_style).ask()
|
|
252
|
+
if confirm:
|
|
253
|
+
return tests
|
|
254
|
+
else:
|
|
255
|
+
return []
|
|
256
|
+
|
|
257
|
+
# Single-select UX
|
|
258
|
+
print("\n")
|
|
259
|
+
print("Tip: Use ↑/↓ arrows to navigate and press ENTER to select.\n")
|
|
260
|
+
|
|
261
|
+
choices = []
|
|
262
|
+
for idx, t in enumerate(tests, 1):
|
|
263
|
+
choice_text = _format_test_choice(t, idx)
|
|
264
|
+
choices.append({"name": choice_text, "value": idx - 1})
|
|
265
|
+
|
|
266
|
+
selected = questionary.select(
|
|
267
|
+
"Select an evaluation test to upload:", choices=choices, style=custom_style
|
|
268
|
+
).ask()
|
|
269
|
+
|
|
270
|
+
if selected is None: # Ctrl+C
|
|
271
|
+
print("\nUpload cancelled.")
|
|
272
|
+
return []
|
|
273
|
+
|
|
274
|
+
print("\n✓ Selected 1 test")
|
|
275
|
+
return [tests[selected]]
|
|
276
|
+
|
|
277
|
+
except ImportError:
|
|
278
|
+
# Fallback to simpler implementation
|
|
279
|
+
return _prompt_select_fallback(tests)
|
|
280
|
+
except KeyboardInterrupt:
|
|
281
|
+
print("\n\nUpload cancelled.")
|
|
282
|
+
return []
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _prompt_select_fallback(tests: list[DiscoveredTest]) -> list[DiscoveredTest]:
|
|
286
|
+
"""Fallback prompt selection for when questionary is not available."""
|
|
287
|
+
print("\n" + "=" * 80)
|
|
288
|
+
print("Discovered evaluation tests:")
|
|
289
|
+
print("=" * 80)
|
|
290
|
+
print("\nTip: Install questionary for better UX: pip install questionary\n")
|
|
291
|
+
|
|
292
|
+
for idx, t in enumerate(tests, 1):
|
|
293
|
+
loc = f"{t.file_path}:{t.lineno}" if t.lineno else t.file_path
|
|
294
|
+
print(f" [{idx}] {t.qualname}")
|
|
295
|
+
print(f" Location: {loc}")
|
|
296
|
+
|
|
297
|
+
if t.has_parametrize and t.nodeids:
|
|
298
|
+
print(f" Parameterized: {t.param_count} variant(s)")
|
|
299
|
+
# Show first few variants as examples
|
|
300
|
+
example_nodeids = t.nodeids[:3]
|
|
301
|
+
for nodeid in example_nodeids:
|
|
302
|
+
# Extract just the parameter part for display
|
|
303
|
+
if "[" in nodeid:
|
|
304
|
+
param_part = nodeid.split("[", 1)[1].rstrip("]")
|
|
305
|
+
print(f" - {param_part}")
|
|
306
|
+
if len(t.nodeids) > 3:
|
|
307
|
+
print(f" ... and {len(t.nodeids) - 3} more")
|
|
308
|
+
else:
|
|
309
|
+
print(" Type: Single test (no parametrization)")
|
|
310
|
+
print()
|
|
311
|
+
|
|
312
|
+
print("=" * 80)
|
|
313
|
+
try:
|
|
314
|
+
choice = input("Enter the number to select: ").strip()
|
|
315
|
+
except KeyboardInterrupt:
|
|
316
|
+
print("\n\nUpload cancelled.")
|
|
317
|
+
return []
|
|
318
|
+
|
|
319
|
+
if not choice.isdigit():
|
|
320
|
+
print("\n⚠️ Invalid selection.")
|
|
321
|
+
return []
|
|
322
|
+
n = int(choice)
|
|
323
|
+
if not (1 <= n <= len(tests)):
|
|
324
|
+
print("\n⚠️ Selection out of range.")
|
|
325
|
+
return []
|
|
326
|
+
return [tests[n - 1]]
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _prompt_select(tests: list[DiscoveredTest], non_interactive: bool) -> list[DiscoveredTest]:
|
|
330
|
+
"""Prompt user to select tests to upload."""
|
|
331
|
+
if non_interactive:
|
|
332
|
+
return tests
|
|
333
|
+
|
|
334
|
+
return _prompt_select_interactive(tests)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _discover_and_select_tests(project_root: str, non_interactive: bool) -> Optional[list[DiscoveredTest]]:
|
|
338
|
+
"""Discover evaluation tests under the given root and prompt the user to select some.
|
|
339
|
+
|
|
340
|
+
Returns a list of selected tests, or None if discovery/selection failed or the user
|
|
341
|
+
cancelled. Callers are responsible for enforcing additional constraints (e.g. exactly
|
|
342
|
+
one selection).
|
|
343
|
+
"""
|
|
344
|
+
print("Scanning for evaluation tests...")
|
|
345
|
+
tests = _discover_tests(project_root)
|
|
346
|
+
if not tests:
|
|
347
|
+
print("No evaluation tests found.")
|
|
348
|
+
print("\nHint: Make sure your tests use the @evaluation_test decorator.")
|
|
349
|
+
return None
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
selected_tests = _prompt_select(tests, non_interactive=non_interactive)
|
|
353
|
+
except Exception:
|
|
354
|
+
print("Error: Failed to open selector UI. Please pass --evaluator or --entry explicitly.")
|
|
355
|
+
return None
|
|
356
|
+
|
|
357
|
+
if not selected_tests:
|
|
358
|
+
print("No tests selected.")
|
|
359
|
+
return None
|
|
360
|
+
|
|
361
|
+
return selected_tests
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _normalize_evaluator_id(evaluator_id: str) -> str:
|
|
365
|
+
"""
|
|
366
|
+
Normalize evaluator ID to meet Fireworks requirements:
|
|
367
|
+
- Only lowercase a-z, 0-9, and hyphen (-)
|
|
368
|
+
- Maximum 63 characters
|
|
369
|
+
"""
|
|
370
|
+
import re
|
|
371
|
+
|
|
372
|
+
# Convert to lowercase
|
|
373
|
+
normalized = evaluator_id.lower()
|
|
374
|
+
|
|
375
|
+
# Replace underscores with hyphens
|
|
376
|
+
normalized = normalized.replace("_", "-")
|
|
377
|
+
|
|
378
|
+
# Remove any characters that aren't alphanumeric or hyphen
|
|
379
|
+
normalized = re.sub(r"[^a-z0-9-]", "", normalized)
|
|
380
|
+
|
|
381
|
+
# Remove consecutive hyphens
|
|
382
|
+
normalized = re.sub(r"-+", "-", normalized)
|
|
383
|
+
|
|
384
|
+
# Remove leading/trailing hyphens
|
|
385
|
+
normalized = normalized.strip("-")
|
|
386
|
+
|
|
387
|
+
# Ensure it starts with a letter (Fireworks requirement)
|
|
388
|
+
if normalized and not normalized[0].isalpha():
|
|
389
|
+
normalized = "eval-" + normalized
|
|
390
|
+
|
|
391
|
+
# Truncate to 63 characters
|
|
392
|
+
if len(normalized) > 63:
|
|
393
|
+
normalized = normalized[:63].rstrip("-")
|
|
394
|
+
|
|
395
|
+
return normalized
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _ensure_account_id() -> Optional[str]:
|
|
399
|
+
"""Resolve and cache FIREWORKS_ACCOUNT_ID if possible."""
|
|
400
|
+
account_id = get_fireworks_account_id()
|
|
401
|
+
api_key = get_fireworks_api_key()
|
|
402
|
+
if not account_id and api_key:
|
|
403
|
+
resolved = verify_api_key_and_get_account_id(api_key=api_key, api_base=get_fireworks_api_base())
|
|
404
|
+
if resolved:
|
|
405
|
+
os.environ["FIREWORKS_ACCOUNT_ID"] = resolved
|
|
406
|
+
return resolved
|
|
407
|
+
return account_id
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _extract_terminal_segment(resource_name: str) -> str:
|
|
411
|
+
"""Return the last path segment if a fully-qualified resource name is provided."""
|
|
412
|
+
try:
|
|
413
|
+
return resource_name.strip("/").split("/")[-1]
|
|
414
|
+
except Exception:
|
|
415
|
+
return resource_name
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def _build_evaluator_dashboard_url(evaluator_id: str) -> str:
|
|
419
|
+
"""Build the evaluator dashboard URL for the given evaluator id or resource name."""
|
|
420
|
+
api_base = get_fireworks_api_base()
|
|
421
|
+
app_base = _map_api_host_to_app_host(api_base)
|
|
422
|
+
evaluator_slug = _extract_terminal_segment(evaluator_id)
|
|
423
|
+
return f"{app_base}/dashboard/evaluators/{evaluator_slug}"
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _print_links(evaluator_id: str, dataset_id: str, job_name: Optional[str]) -> None:
|
|
427
|
+
"""Print dashboard links for evaluator, dataset, and optional RFT job."""
|
|
428
|
+
evaluator_url = _build_evaluator_dashboard_url(evaluator_id)
|
|
429
|
+
print("\n📊 Dashboard Links:")
|
|
430
|
+
print(f" Evaluator: {evaluator_url}")
|
|
431
|
+
if dataset_id:
|
|
432
|
+
api_base = get_fireworks_api_base()
|
|
433
|
+
app_base = _map_api_host_to_app_host(api_base)
|
|
434
|
+
print(f" Dataset: {app_base}/dashboard/datasets/{dataset_id}")
|
|
435
|
+
if job_name:
|
|
436
|
+
# job_name likely like accounts/{account}/reinforcementFineTuningJobs/{id}
|
|
437
|
+
try:
|
|
438
|
+
job_id = job_name.strip().split("/")[-1]
|
|
439
|
+
print(f" RFT Job: {app_base}/dashboard/fine-tuning/reinforcement/{job_id}")
|
|
440
|
+
except Exception:
|
|
441
|
+
pass
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _build_trimmed_dataset_id(evaluator_id: str) -> str:
|
|
445
|
+
"""Build a dataset id derived from evaluator_id, trimmed to 63 chars.
|
|
446
|
+
|
|
447
|
+
Format: <normalized-base>-dataset-YYYYMMDDHHMMSS, where base is trimmed to fit.
|
|
448
|
+
"""
|
|
449
|
+
base = _normalize_evaluator_id(evaluator_id)
|
|
450
|
+
suffix = f"-dataset-{time.strftime('%Y%m%d%H%M%S')}"
|
|
451
|
+
max_total = 63
|
|
452
|
+
max_base_len = max_total - len(suffix)
|
|
453
|
+
if max_base_len < 1:
|
|
454
|
+
max_base_len = 1
|
|
455
|
+
if len(base) > max_base_len:
|
|
456
|
+
base = base[:max_base_len].rstrip("-")
|
|
457
|
+
if not base:
|
|
458
|
+
base = "dataset"
|
|
459
|
+
# Ensure first char is a letter
|
|
460
|
+
if not base:
|
|
461
|
+
base = "dataset"
|
|
462
|
+
if not base[0].isalpha():
|
|
463
|
+
base = f"eval-{base}"
|
|
464
|
+
if len(base) > max_base_len:
|
|
465
|
+
base = base[:max_base_len]
|
|
466
|
+
base = base.rstrip("-") or "dataset"
|
|
467
|
+
return f"{base}{suffix}"
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def _resolve_selected_test(
|
|
471
|
+
project_root: str,
|
|
472
|
+
evaluator_id: Optional[str],
|
|
473
|
+
selected_tests: Optional[list[DiscoveredTest]] = None,
|
|
474
|
+
) -> tuple[Optional[str], Optional[str]]:
|
|
475
|
+
"""
|
|
476
|
+
Resolve a single test's source file path and function name to use downstream.
|
|
477
|
+
Priority:
|
|
478
|
+
1) If selected_tests provided and length == 1, use it.
|
|
479
|
+
2) Else discover tests; if exactly one test, use it.
|
|
480
|
+
3) Else, if evaluator_id provided, match by normalized '<file-stem>-<func-name>'.
|
|
481
|
+
Returns: (file_path, func_name) or (None, None) if unresolved.
|
|
482
|
+
"""
|
|
483
|
+
try:
|
|
484
|
+
tests = selected_tests if selected_tests is not None else _discover_tests(project_root)
|
|
485
|
+
if not tests:
|
|
486
|
+
return None, None
|
|
487
|
+
if len(tests) == 1:
|
|
488
|
+
return tests[0].file_path, tests[0].qualname.split(".")[-1]
|
|
489
|
+
if evaluator_id:
|
|
490
|
+
for t in tests:
|
|
491
|
+
func_name = t.qualname.split(".")[-1]
|
|
492
|
+
source_file_name = os.path.splitext(os.path.basename(t.file_path))[0]
|
|
493
|
+
candidate = _normalize_evaluator_id(f"{source_file_name}-{func_name}")
|
|
494
|
+
if candidate == evaluator_id:
|
|
495
|
+
return t.file_path, func_name
|
|
496
|
+
return None, None
|
|
497
|
+
except Exception:
|
|
498
|
+
return None, None
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _build_entry_point(project_root: str, source_file_path: Optional[str], func_name: str) -> str:
|
|
502
|
+
"""Build a pytest-style entry point (path::func) relative to the given root."""
|
|
503
|
+
if source_file_path:
|
|
504
|
+
abs_path = os.path.abspath(source_file_path)
|
|
505
|
+
try:
|
|
506
|
+
rel = os.path.relpath(abs_path, project_root)
|
|
507
|
+
except Exception:
|
|
508
|
+
rel = abs_path
|
|
509
|
+
return f"{rel}::{func_name}"
|
|
510
|
+
# Fallback: use filename only
|
|
511
|
+
return f"{func_name}.py::{func_name}"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.93.
|
|
3
|
+
Version: 0.2.93.dev3
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -5,9 +5,9 @@ development/utils/generate_api_key.py,sha256=hHCMFkzW4yxqwcn2ct5diDm-PR9cMX9XP7I
|
|
|
5
5
|
development/utils/subprocess_manager.py,sha256=7n7rT9ji7h93i79SMrGS5RNesrnLFjdFON9_eQCmYNE,18937
|
|
6
6
|
eval_protocol/__init__.py,sha256=I24OX8MFZBIXWeCh8Jhd8e5Qka8M4iXGKuXWme0hFD4,5150
|
|
7
7
|
eval_protocol/__main__.py,sha256=FIW5fo2X2rsPThurSlZEuqvE1u0XNwJW1uoej_OhAAs,161
|
|
8
|
-
eval_protocol/_version.py,sha256=
|
|
8
|
+
eval_protocol/_version.py,sha256=jZaVFJNxW2MyRud7OvnxsrZapkjSTpAqWDfau76fiT8,504
|
|
9
9
|
eval_protocol/auth.py,sha256=fxM8ZBUQ5t6WsNBiXSoxqpHAcwscwGe8wYLdWnPrSV4,13414
|
|
10
|
-
eval_protocol/cli.py,sha256=
|
|
10
|
+
eval_protocol/cli.py,sha256=zBPjlkOty2jR3q2zgc29ZK9IrqM_dJEuJszGy83sNuM,30277
|
|
11
11
|
eval_protocol/common_utils.py,sha256=2K7c2nMW7B3-wECbmAHBdZRIA1NjnUZIZnhCsNMRKu0,2883
|
|
12
12
|
eval_protocol/config.py,sha256=b6mWbw-IJhu8iWgCJqcFp14vPJl7iXH7FnKe7cNIgSc,6438
|
|
13
13
|
eval_protocol/directory_utils.py,sha256=DfGk28dnXKpbXXTJxxo3VyJGceQGAoD97ql3kItnzMk,1096
|
|
@@ -71,14 +71,15 @@ eval_protocol/benchmarks/data/retail_dataset.jsonl,sha256=9fGs1U6lvnpKBNjERWPmJH
|
|
|
71
71
|
eval_protocol/cli_commands/__init__.py,sha256=XYAFq-qJ787pmrMiouG_jd9uwZcRhxLtNLSTxKcHzCM,85
|
|
72
72
|
eval_protocol/cli_commands/agent_eval_cmd.py,sha256=ezENJzLsUHvpm-FXrDpIV4fBHZbr3WqQOQiIL_rcVEw,11032
|
|
73
73
|
eval_protocol/cli_commands/common.py,sha256=UuV5AZ02Rp6oioOblr9mfIrFcfTAKJOuxzhOqadBcHE,10155
|
|
74
|
-
eval_protocol/cli_commands/create_rft.py,sha256=
|
|
74
|
+
eval_protocol/cli_commands/create_rft.py,sha256=lWIfluLbHN51tye879XYFMIpj_mVT6IxfUMXrPa-DIk,35057
|
|
75
75
|
eval_protocol/cli_commands/deploy.py,sha256=WwwyK80exi1dBFjcIXEj_odaGOfe1byiWgnLGiUUQvM,24054
|
|
76
76
|
eval_protocol/cli_commands/deploy_mcp.py,sha256=_6S7NcbCb_hGVqaBUgKLURO53i2UAI7xXUkBVXP6cLo,9961
|
|
77
|
-
eval_protocol/cli_commands/local_test.py,sha256=
|
|
77
|
+
eval_protocol/cli_commands/local_test.py,sha256=VZzs7j3w043ZADHKpaN1iL7aZZeR8mkf3c-YZz5tlHo,6895
|
|
78
78
|
eval_protocol/cli_commands/logs.py,sha256=Py7WOeQKBkbBHTm-2ZjyB70EIAbgr3JwF2Vs_qTTKng,1833
|
|
79
79
|
eval_protocol/cli_commands/preview.py,sha256=1iXru9V8QBGx7tLDJ28iG7bjV2ICFroEKaQ8EJsGciU,8072
|
|
80
80
|
eval_protocol/cli_commands/run_eval_cmd.py,sha256=xLf7Adfjt-5jmfKHh9ioay3n0MDo78tofeZgTnZ-C7w,9730
|
|
81
|
-
eval_protocol/cli_commands/upload.py,sha256=
|
|
81
|
+
eval_protocol/cli_commands/upload.py,sha256=6ngPbGHb-OIoo2QkL7yNGzeo0MO_F_Q0ASN22SDSRLg,12560
|
|
82
|
+
eval_protocol/cli_commands/utils.py,sha256=QIjGCab0wNw76yinybFAG6X5JMOCTm6TC8rle3UOa54,17933
|
|
82
83
|
eval_protocol/data_loader/__init__.py,sha256=mjl725y1nW_l8g5mnHJt9CXTj-knKoJc15eD35XBTH4,245
|
|
83
84
|
eval_protocol/data_loader/dynamic_data_loader.py,sha256=tMSqKiECls1ySHv5eHpAlc9PwsRLzE5Rp4Ko3kIYUuA,1442
|
|
84
85
|
eval_protocol/data_loader/factory_data_loader.py,sha256=9mqkWGyQde8s8O5KmN8vg7TQJLlLn61-RVHAuUTTnD8,1428
|
|
@@ -256,7 +257,7 @@ eval_protocol/utils/show_results_url.py,sha256=PHM6dWtCUiuV5WQgvHegnxY7ofkE4b9wO
|
|
|
256
257
|
eval_protocol/utils/static_policy.py,sha256=fiKnOS06EG5OB6p5An_yY_dLAvVboYnC4Sqx5z_v3-g,10716
|
|
257
258
|
eval_protocol/utils/subprocess_utils.py,sha256=2EcoVNLSlfdxwQn-2pscqjiGpBR4Ho8kfRnmzmew-1w,3504
|
|
258
259
|
eval_protocol/utils/vite_server.py,sha256=0Tfh1LfTqYpFZxkO2syrF5I0cBEJHFmYXd2N4CWkca8,5051
|
|
259
|
-
eval_protocol-0.2.93.
|
|
260
|
+
eval_protocol-0.2.93.dev3.dist-info/licenses/LICENSE,sha256=OzeIb507xW9AVhGMqqHpoL_EFRJUo8Sb7A3LN5NqFfQ,1075
|
|
260
261
|
vendor/tau2/__init__.py,sha256=EQMX_v8x-YBV24ia35_nLkf5MrC6aAuT_M5m7IJcl3k,541
|
|
261
262
|
vendor/tau2/cli.py,sha256=lhJocXCDxEfdv7gIxya5b0w5J5qebpgrg_ZTpjGp_ww,7515
|
|
262
263
|
vendor/tau2/config.py,sha256=LrkKRGSFH4Cvf9CNO-MttJMvIia0a2zP1uKVnUQi6B8,1278
|
|
@@ -356,8 +357,8 @@ vite-app/dist/assets/index-BIhepl19.css,sha256=ysBSOQ88bSXVLRcwiqn5OLxblkjZ7HME5
|
|
|
356
357
|
vite-app/dist/assets/index-DaovgarD.js,sha256=paGj2HBU1n50fx4WuSIHDYxO1N_8-UEVSovFa57kdas,870270
|
|
357
358
|
vite-app/dist/assets/index-DaovgarD.js.map,sha256=ifPNQ8TO-xaEmz691557CQZGuoNcTGdMiZYuial8Wx0,3875135
|
|
358
359
|
vite-app/dist/assets/logo-light-BprIBJQW.png,sha256=rRXC24eqrQO3y--N493THrD48WQVAhSVMHM_iDKy250,21694
|
|
359
|
-
eval_protocol-0.2.93.
|
|
360
|
-
eval_protocol-0.2.93.
|
|
361
|
-
eval_protocol-0.2.93.
|
|
362
|
-
eval_protocol-0.2.93.
|
|
363
|
-
eval_protocol-0.2.93.
|
|
360
|
+
eval_protocol-0.2.93.dev3.dist-info/METADATA,sha256=OZ3-jdDCv2iIKJRhWAPxi7BxQCvpFwzb4g1lqaAJ2Zw,6741
|
|
361
|
+
eval_protocol-0.2.93.dev3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
362
|
+
eval_protocol-0.2.93.dev3.dist-info/entry_points.txt,sha256=CebRaxbWXly21zPN1fbyAw26kNUU2dv7zZyGkXxtFVw,183
|
|
363
|
+
eval_protocol-0.2.93.dev3.dist-info/top_level.txt,sha256=8jjn7dpvLPL4RX2JBeAfPPMOR6x6f7E4o4yFiKLEHuw,33
|
|
364
|
+
eval_protocol-0.2.93.dev3.dist-info/RECORD,,
|
|
File without changes
|
{eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|