eval-protocol 0.2.93.dev2__py3-none-any.whl → 0.2.93.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_protocol/_version.py +3 -3
- eval_protocol/cli.py +20 -0
- eval_protocol/cli_commands/create_rft.py +435 -337
- eval_protocol/cli_commands/local_test.py +65 -56
- eval_protocol/cli_commands/upload.py +18 -455
- eval_protocol/cli_commands/utils.py +511 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/METADATA +1 -1
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/RECORD +12 -11
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/WHEEL +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/entry_points.txt +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/licenses/LICENSE +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev3.dist-info}/top_level.txt +0 -0
|
@@ -1,255 +1,24 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import importlib.util
|
|
3
|
-
import inspect
|
|
4
|
-
import json
|
|
5
3
|
import os
|
|
6
|
-
import pkgutil
|
|
7
4
|
import re
|
|
8
|
-
import runpy
|
|
9
5
|
import sys
|
|
10
|
-
from dataclasses import dataclass
|
|
11
6
|
from pathlib import Path
|
|
12
|
-
from typing import Any, Dict
|
|
13
|
-
|
|
14
|
-
import
|
|
15
|
-
from eval_protocol.auth import (
|
|
16
|
-
get_fireworks_account_id,
|
|
17
|
-
get_fireworks_api_key,
|
|
18
|
-
get_fireworks_api_base,
|
|
19
|
-
verify_api_key_and_get_account_id,
|
|
20
|
-
)
|
|
7
|
+
from typing import Any, Dict
|
|
8
|
+
|
|
9
|
+
from eval_protocol.auth import get_fireworks_api_key
|
|
21
10
|
from eval_protocol.platform_api import create_or_update_fireworks_secret
|
|
22
11
|
|
|
23
12
|
from eval_protocol.evaluation import create_evaluation
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
has_parametrize: bool
|
|
34
|
-
param_count: int
|
|
35
|
-
nodeids: list[str]
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _iter_python_files(root: str) -> Iterable[str]:
|
|
39
|
-
# Don't follow symlinks to avoid infinite loops
|
|
40
|
-
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
|
|
41
|
-
# Skip common virtualenv and node paths
|
|
42
|
-
if any(
|
|
43
|
-
skip in dirpath
|
|
44
|
-
for skip in [
|
|
45
|
-
"/.venv",
|
|
46
|
-
"/venv",
|
|
47
|
-
"/node_modules",
|
|
48
|
-
"/.git",
|
|
49
|
-
"/dist",
|
|
50
|
-
"/build",
|
|
51
|
-
"/__pycache__",
|
|
52
|
-
".egg-info",
|
|
53
|
-
"/vendor",
|
|
54
|
-
]
|
|
55
|
-
):
|
|
56
|
-
continue
|
|
57
|
-
# Also skip specific directories by modifying dirnames in-place
|
|
58
|
-
dirnames[:] = [
|
|
59
|
-
d
|
|
60
|
-
for d in dirnames
|
|
61
|
-
if not d.startswith(".") and d not in ["venv", "node_modules", "__pycache__", "dist", "build", "vendor"]
|
|
62
|
-
]
|
|
63
|
-
|
|
64
|
-
for name in filenames:
|
|
65
|
-
# Skip setup files, test discovery scripts, __init__, and hidden files
|
|
66
|
-
if (
|
|
67
|
-
name.endswith(".py")
|
|
68
|
-
and not name.startswith(".")
|
|
69
|
-
and not name.startswith("test_discovery")
|
|
70
|
-
and name not in ["setup.py", "versioneer.py", "conf.py", "__main__.py"]
|
|
71
|
-
):
|
|
72
|
-
yield os.path.join(dirpath, name)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def _is_eval_protocol_test(obj: Any) -> bool:
|
|
76
|
-
# evaluation_test decorator returns a dual_mode_wrapper with _origin_func and pytest marks
|
|
77
|
-
if not callable(obj):
|
|
78
|
-
return False
|
|
79
|
-
origin = getattr(obj, "_origin_func", None)
|
|
80
|
-
if origin is None:
|
|
81
|
-
return False
|
|
82
|
-
# Must have pytest marks from evaluation_test
|
|
83
|
-
marks = getattr(obj, "pytestmark", [])
|
|
84
|
-
# Handle pytest proxy objects (APIRemovedInV1Proxy)
|
|
85
|
-
if not isinstance(marks, (list, tuple)):
|
|
86
|
-
try:
|
|
87
|
-
marks = list(marks) if marks else []
|
|
88
|
-
except (TypeError, AttributeError):
|
|
89
|
-
return False
|
|
90
|
-
return len(marks) > 0
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def _extract_param_info_from_marks(obj: Any) -> tuple[bool, int, list[str]]:
|
|
94
|
-
"""Extract parametrization info from pytest marks.
|
|
95
|
-
|
|
96
|
-
Returns:
|
|
97
|
-
(has_parametrize, param_count, param_ids)
|
|
98
|
-
"""
|
|
99
|
-
marks = getattr(obj, "pytestmark", [])
|
|
100
|
-
|
|
101
|
-
# Handle pytest proxy objects (APIRemovedInV1Proxy) - same as _is_eval_protocol_test
|
|
102
|
-
if not isinstance(marks, (list, tuple)):
|
|
103
|
-
try:
|
|
104
|
-
marks = list(marks) if marks else []
|
|
105
|
-
except (TypeError, AttributeError):
|
|
106
|
-
marks = []
|
|
107
|
-
|
|
108
|
-
has_parametrize = False
|
|
109
|
-
total_combinations = 0
|
|
110
|
-
all_param_ids: list[str] = []
|
|
111
|
-
|
|
112
|
-
for m in marks:
|
|
113
|
-
if getattr(m, "name", "") == "parametrize":
|
|
114
|
-
has_parametrize = True
|
|
115
|
-
# The data is in kwargs for eval_protocol's parametrization
|
|
116
|
-
kwargs = getattr(m, "kwargs", {})
|
|
117
|
-
argnames = kwargs.get("argnames", m.args[0] if m.args else "")
|
|
118
|
-
argvalues = kwargs.get("argvalues", m.args[1] if len(m.args) > 1 else [])
|
|
119
|
-
ids = kwargs.get("ids", [])
|
|
120
|
-
|
|
121
|
-
# Count this dimension of parameters
|
|
122
|
-
if isinstance(argvalues, (list, tuple)):
|
|
123
|
-
count = len(argvalues)
|
|
124
|
-
total_combinations = count # For now, just use the count from this mark
|
|
125
|
-
|
|
126
|
-
# Use provided IDs
|
|
127
|
-
if ids and isinstance(ids, (list, tuple)):
|
|
128
|
-
all_param_ids = list(ids[:count])
|
|
129
|
-
else:
|
|
130
|
-
# Generate IDs based on argnames
|
|
131
|
-
if isinstance(argnames, str) and "," not in argnames:
|
|
132
|
-
# Single parameter
|
|
133
|
-
all_param_ids = [f"{argnames}={i}" for i in range(count)]
|
|
134
|
-
else:
|
|
135
|
-
# Multiple parameters
|
|
136
|
-
all_param_ids = [f"variant_{i}" for i in range(count)]
|
|
137
|
-
|
|
138
|
-
return has_parametrize, total_combinations, all_param_ids
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def _discover_tests(root: str) -> list[DiscoveredTest]:
|
|
142
|
-
abs_root = os.path.abspath(root)
|
|
143
|
-
if abs_root not in sys.path:
|
|
144
|
-
sys.path.insert(0, abs_root)
|
|
145
|
-
|
|
146
|
-
discovered: list[DiscoveredTest] = []
|
|
147
|
-
|
|
148
|
-
class CollectionPlugin:
|
|
149
|
-
"""Plugin to capture collected items without running code."""
|
|
150
|
-
|
|
151
|
-
def __init__(self):
|
|
152
|
-
self.items = []
|
|
153
|
-
|
|
154
|
-
def pytest_ignore_collect(self, collection_path, config):
|
|
155
|
-
"""Ignore problematic files before pytest tries to import them."""
|
|
156
|
-
# Ignore specific files
|
|
157
|
-
ignored_files = ["setup.py", "versioneer.py", "conf.py", "__main__.py"]
|
|
158
|
-
if collection_path.name in ignored_files:
|
|
159
|
-
return True
|
|
160
|
-
|
|
161
|
-
# Ignore hidden files (starting with .)
|
|
162
|
-
if collection_path.name.startswith("."):
|
|
163
|
-
return True
|
|
164
|
-
|
|
165
|
-
# Ignore test_discovery files
|
|
166
|
-
if collection_path.name.startswith("test_discovery"):
|
|
167
|
-
return True
|
|
168
|
-
|
|
169
|
-
return None
|
|
170
|
-
|
|
171
|
-
def pytest_collection_modifyitems(self, items):
|
|
172
|
-
"""Hook called after collection is done."""
|
|
173
|
-
self.items = items
|
|
174
|
-
|
|
175
|
-
plugin = CollectionPlugin()
|
|
176
|
-
|
|
177
|
-
# Run pytest collection only (--collect-only prevents code execution)
|
|
178
|
-
# Override python_files to collect from ANY .py file
|
|
179
|
-
args = [
|
|
180
|
-
abs_root,
|
|
181
|
-
"--collect-only",
|
|
182
|
-
"-q",
|
|
183
|
-
"--pythonwarnings=ignore",
|
|
184
|
-
"-o",
|
|
185
|
-
"python_files=*.py", # Override to collect all .py files
|
|
186
|
-
]
|
|
187
|
-
|
|
188
|
-
try:
|
|
189
|
-
# Suppress pytest output
|
|
190
|
-
import io
|
|
191
|
-
import contextlib
|
|
192
|
-
|
|
193
|
-
with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
|
|
194
|
-
pytest.main(args, plugins=[plugin])
|
|
195
|
-
except Exception:
|
|
196
|
-
# If pytest collection fails, fall back to empty list
|
|
197
|
-
return []
|
|
198
|
-
|
|
199
|
-
# Process collected items
|
|
200
|
-
for item in plugin.items:
|
|
201
|
-
if not hasattr(item, "obj"):
|
|
202
|
-
continue
|
|
203
|
-
|
|
204
|
-
obj = item.obj
|
|
205
|
-
if not _is_eval_protocol_test(obj):
|
|
206
|
-
continue
|
|
207
|
-
|
|
208
|
-
origin = getattr(obj, "_origin_func", obj)
|
|
209
|
-
try:
|
|
210
|
-
src_file = inspect.getsourcefile(origin) or str(item.path)
|
|
211
|
-
_, lineno = inspect.getsourcelines(origin)
|
|
212
|
-
except Exception:
|
|
213
|
-
src_file, lineno = str(item.path), None
|
|
214
|
-
|
|
215
|
-
# Extract parametrization info from marks
|
|
216
|
-
has_parametrize, param_count, param_ids = _extract_param_info_from_marks(obj)
|
|
217
|
-
|
|
218
|
-
# Get module name and function name
|
|
219
|
-
module_name = (
|
|
220
|
-
item.module.__name__
|
|
221
|
-
if hasattr(item, "module")
|
|
222
|
-
else item.nodeid.split("::")[0].replace("/", ".").replace(".py", "")
|
|
223
|
-
)
|
|
224
|
-
func_name = item.name.split("[")[0] if "[" in item.name else item.name
|
|
225
|
-
|
|
226
|
-
# Generate nodeids
|
|
227
|
-
base_nodeid = f"{os.path.basename(src_file)}::{func_name}"
|
|
228
|
-
if param_ids:
|
|
229
|
-
nodeids = [f"{base_nodeid}[{pid}]" for pid in param_ids]
|
|
230
|
-
else:
|
|
231
|
-
nodeids = [base_nodeid]
|
|
232
|
-
|
|
233
|
-
discovered.append(
|
|
234
|
-
DiscoveredTest(
|
|
235
|
-
module_path=module_name,
|
|
236
|
-
module_name=module_name,
|
|
237
|
-
qualname=f"{module_name}.{func_name}",
|
|
238
|
-
file_path=os.path.abspath(src_file),
|
|
239
|
-
lineno=lineno,
|
|
240
|
-
has_parametrize=has_parametrize,
|
|
241
|
-
param_count=param_count,
|
|
242
|
-
nodeids=nodeids,
|
|
243
|
-
)
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
# Deduplicate by qualname (in case same test appears multiple times)
|
|
247
|
-
by_qual: dict[str, DiscoveredTest] = {}
|
|
248
|
-
for t in discovered:
|
|
249
|
-
existing = by_qual.get(t.qualname)
|
|
250
|
-
if not existing or t.param_count > existing.param_count:
|
|
251
|
-
by_qual[t.qualname] = t
|
|
252
|
-
return sorted(by_qual.values(), key=lambda x: (x.file_path, x.lineno or 0))
|
|
13
|
+
from .utils import (
|
|
14
|
+
_build_entry_point,
|
|
15
|
+
_build_evaluator_dashboard_url,
|
|
16
|
+
_discover_and_select_tests,
|
|
17
|
+
_discover_tests,
|
|
18
|
+
_ensure_account_id,
|
|
19
|
+
_normalize_evaluator_id,
|
|
20
|
+
_prompt_select,
|
|
21
|
+
)
|
|
253
22
|
|
|
254
23
|
|
|
255
24
|
def _to_pyargs_nodeid(file_path: str, func_name: str) -> str | None:
|
|
@@ -364,165 +133,6 @@ def _resolve_entry_to_qual_and_source(entry: str, cwd: str) -> tuple[str, str]:
|
|
|
364
133
|
return qualname, os.path.abspath(source_file_path) if source_file_path else ""
|
|
365
134
|
|
|
366
135
|
|
|
367
|
-
def _generate_ts_mode_code(test: DiscoveredTest) -> tuple[str, str]:
|
|
368
|
-
# Deprecated: we no longer generate a shim; keep stub for import compatibility
|
|
369
|
-
return ("", "main.py")
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
def _normalize_evaluator_id(evaluator_id: str) -> str:
|
|
373
|
-
"""
|
|
374
|
-
Normalize evaluator ID to meet Fireworks requirements:
|
|
375
|
-
- Only lowercase a-z, 0-9, and hyphen (-)
|
|
376
|
-
- Maximum 63 characters
|
|
377
|
-
"""
|
|
378
|
-
# Convert to lowercase
|
|
379
|
-
normalized = evaluator_id.lower()
|
|
380
|
-
|
|
381
|
-
# Replace underscores with hyphens
|
|
382
|
-
normalized = normalized.replace("_", "-")
|
|
383
|
-
|
|
384
|
-
# Remove any characters that aren't alphanumeric or hyphen
|
|
385
|
-
normalized = re.sub(r"[^a-z0-9-]", "", normalized)
|
|
386
|
-
|
|
387
|
-
# Remove consecutive hyphens
|
|
388
|
-
normalized = re.sub(r"-+", "-", normalized)
|
|
389
|
-
|
|
390
|
-
# Remove leading/trailing hyphens
|
|
391
|
-
normalized = normalized.strip("-")
|
|
392
|
-
|
|
393
|
-
# Ensure it starts with a letter (Fireworks requirement)
|
|
394
|
-
if normalized and not normalized[0].isalpha():
|
|
395
|
-
normalized = "eval-" + normalized
|
|
396
|
-
|
|
397
|
-
# Truncate to 63 characters
|
|
398
|
-
if len(normalized) > 63:
|
|
399
|
-
normalized = normalized[:63].rstrip("-")
|
|
400
|
-
|
|
401
|
-
return normalized
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
def _format_test_choice(test: DiscoveredTest, idx: int) -> str:
|
|
405
|
-
"""Format a test as a choice string for display."""
|
|
406
|
-
# Shorten the qualname for display
|
|
407
|
-
name = test.qualname.split(".")[-1]
|
|
408
|
-
location = f"{Path(test.file_path).name}:{test.lineno}" if test.lineno else Path(test.file_path).name
|
|
409
|
-
|
|
410
|
-
if test.has_parametrize and test.param_count > 1:
|
|
411
|
-
return f"{name} ({test.param_count} variants) - {location}"
|
|
412
|
-
else:
|
|
413
|
-
return f"{name} - {location}"
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
def _prompt_select_interactive(tests: list[DiscoveredTest]) -> list[DiscoveredTest]:
|
|
417
|
-
"""Interactive selection with arrow keys using questionary."""
|
|
418
|
-
try:
|
|
419
|
-
import questionary
|
|
420
|
-
from questionary import Style
|
|
421
|
-
|
|
422
|
-
# Custom style similar to Vercel CLI
|
|
423
|
-
custom_style = Style(
|
|
424
|
-
[
|
|
425
|
-
("qmark", "fg:#673ab7 bold"),
|
|
426
|
-
("question", "bold"),
|
|
427
|
-
("answer", "fg:#f44336 bold"),
|
|
428
|
-
("pointer", "fg:#673ab7 bold"),
|
|
429
|
-
("highlighted", "fg:#673ab7 bold"),
|
|
430
|
-
("selected", "fg:#cc5454"),
|
|
431
|
-
("separator", "fg:#cc5454"),
|
|
432
|
-
("instruction", ""),
|
|
433
|
-
("text", ""),
|
|
434
|
-
]
|
|
435
|
-
)
|
|
436
|
-
|
|
437
|
-
# Check if only one test - auto-select it
|
|
438
|
-
if len(tests) == 1:
|
|
439
|
-
print(f"\nFound 1 test: {_format_test_choice(tests[0], 1)}")
|
|
440
|
-
confirm = questionary.confirm("Select this test?", default=True, style=custom_style).ask()
|
|
441
|
-
if confirm:
|
|
442
|
-
return tests
|
|
443
|
-
else:
|
|
444
|
-
return []
|
|
445
|
-
|
|
446
|
-
# Single-select UX
|
|
447
|
-
print("\n")
|
|
448
|
-
print("Tip: Use ↑/↓ arrows to navigate and press ENTER to select.\n")
|
|
449
|
-
|
|
450
|
-
choices = []
|
|
451
|
-
for idx, t in enumerate(tests, 1):
|
|
452
|
-
choice_text = _format_test_choice(t, idx)
|
|
453
|
-
choices.append({"name": choice_text, "value": idx - 1})
|
|
454
|
-
|
|
455
|
-
selected = questionary.select(
|
|
456
|
-
"Select an evaluation test to upload:", choices=choices, style=custom_style
|
|
457
|
-
).ask()
|
|
458
|
-
|
|
459
|
-
if selected is None: # Ctrl+C
|
|
460
|
-
print("\nUpload cancelled.")
|
|
461
|
-
return []
|
|
462
|
-
|
|
463
|
-
print("\n✓ Selected 1 test")
|
|
464
|
-
return [tests[selected]]
|
|
465
|
-
|
|
466
|
-
except ImportError:
|
|
467
|
-
# Fallback to simpler implementation
|
|
468
|
-
return _prompt_select_fallback(tests)
|
|
469
|
-
except KeyboardInterrupt:
|
|
470
|
-
print("\n\nUpload cancelled.")
|
|
471
|
-
return []
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
def _prompt_select_fallback(tests: list[DiscoveredTest]) -> list[DiscoveredTest]:
|
|
475
|
-
"""Fallback prompt selection for when questionary is not available."""
|
|
476
|
-
print("\n" + "=" * 80)
|
|
477
|
-
print("Discovered evaluation tests:")
|
|
478
|
-
print("=" * 80)
|
|
479
|
-
print("\nTip: Install questionary for better UX: pip install questionary\n")
|
|
480
|
-
|
|
481
|
-
for idx, t in enumerate(tests, 1):
|
|
482
|
-
loc = f"{t.file_path}:{t.lineno}" if t.lineno else t.file_path
|
|
483
|
-
print(f" [{idx}] {t.qualname}")
|
|
484
|
-
print(f" Location: {loc}")
|
|
485
|
-
|
|
486
|
-
if t.has_parametrize and t.nodeids:
|
|
487
|
-
print(f" Parameterized: {t.param_count} variant(s)")
|
|
488
|
-
# Show first few variants as examples
|
|
489
|
-
example_nodeids = t.nodeids[:3]
|
|
490
|
-
for nodeid in example_nodeids:
|
|
491
|
-
# Extract just the parameter part for display
|
|
492
|
-
if "[" in nodeid:
|
|
493
|
-
param_part = nodeid.split("[", 1)[1].rstrip("]")
|
|
494
|
-
print(f" - {param_part}")
|
|
495
|
-
if len(t.nodeids) > 3:
|
|
496
|
-
print(f" ... and {len(t.nodeids) - 3} more")
|
|
497
|
-
else:
|
|
498
|
-
print(" Type: Single test (no parametrization)")
|
|
499
|
-
print()
|
|
500
|
-
|
|
501
|
-
print("=" * 80)
|
|
502
|
-
try:
|
|
503
|
-
choice = input("Enter the number to select: ").strip()
|
|
504
|
-
except KeyboardInterrupt:
|
|
505
|
-
print("\n\nUpload cancelled.")
|
|
506
|
-
return []
|
|
507
|
-
|
|
508
|
-
if not choice.isdigit():
|
|
509
|
-
print("\n⚠️ Invalid selection.")
|
|
510
|
-
return []
|
|
511
|
-
n = int(choice)
|
|
512
|
-
if not (1 <= n <= len(tests)):
|
|
513
|
-
print("\n⚠️ Selection out of range.")
|
|
514
|
-
return []
|
|
515
|
-
return [tests[n - 1]]
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
def _prompt_select(tests: list[DiscoveredTest], non_interactive: bool) -> list[DiscoveredTest]:
|
|
519
|
-
"""Prompt user to select tests to upload."""
|
|
520
|
-
if non_interactive:
|
|
521
|
-
return tests
|
|
522
|
-
|
|
523
|
-
return _prompt_select_interactive(tests)
|
|
524
|
-
|
|
525
|
-
|
|
526
136
|
def _load_secrets_from_env_file(env_file_path: str) -> Dict[str, str]:
|
|
527
137
|
"""
|
|
528
138
|
Load secrets from a .env file that should be uploaded to Fireworks.
|
|
@@ -572,6 +182,7 @@ def _mask_secret_value(value: str) -> str:
|
|
|
572
182
|
def upload_command(args: argparse.Namespace) -> int:
|
|
573
183
|
root = os.path.abspath(getattr(args, "path", "."))
|
|
574
184
|
entries_arg = getattr(args, "entry", None)
|
|
185
|
+
non_interactive: bool = bool(getattr(args, "yes", False))
|
|
575
186
|
if entries_arg:
|
|
576
187
|
entries = [e.strip() for e in re.split(r"[,\s]+", entries_arg) if e.strip()]
|
|
577
188
|
selected_specs: list[tuple[str, str]] = []
|
|
@@ -579,17 +190,9 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
579
190
|
qualname, resolved_path = _resolve_entry_to_qual_and_source(e, root)
|
|
580
191
|
selected_specs.append((qualname, resolved_path))
|
|
581
192
|
else:
|
|
582
|
-
|
|
583
|
-
tests = _discover_tests(root)
|
|
584
|
-
if not tests:
|
|
585
|
-
print("No evaluation tests found.")
|
|
586
|
-
print("\nHint: Make sure your tests use the @evaluation_test decorator.")
|
|
587
|
-
return 1
|
|
588
|
-
selected_tests = _prompt_select(tests, non_interactive=bool(getattr(args, "yes", False)))
|
|
193
|
+
selected_tests = _discover_and_select_tests(root, non_interactive=non_interactive)
|
|
589
194
|
if not selected_tests:
|
|
590
|
-
print("No tests selected.")
|
|
591
195
|
return 1
|
|
592
|
-
|
|
593
196
|
# Warn about parameterized tests
|
|
594
197
|
parameterized_tests = [t for t in selected_tests if t.has_parametrize]
|
|
595
198
|
if parameterized_tests:
|
|
@@ -607,7 +210,7 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
607
210
|
|
|
608
211
|
# Load secrets from .env file and ensure they're available on Fireworks
|
|
609
212
|
try:
|
|
610
|
-
fw_account_id =
|
|
213
|
+
fw_account_id = _ensure_account_id()
|
|
611
214
|
|
|
612
215
|
# Determine .env file path
|
|
613
216
|
if env_file:
|
|
@@ -624,15 +227,6 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
624
227
|
if fw_api_key_value and "FIREWORKS_API_KEY" not in secrets_from_file:
|
|
625
228
|
secrets_from_file["FIREWORKS_API_KEY"] = fw_api_key_value
|
|
626
229
|
|
|
627
|
-
if not fw_account_id and fw_api_key_value:
|
|
628
|
-
# Attempt to verify and resolve account id from server headers
|
|
629
|
-
resolved = verify_api_key_and_get_account_id(api_key=fw_api_key_value, api_base=get_fireworks_api_base())
|
|
630
|
-
if resolved:
|
|
631
|
-
fw_account_id = resolved
|
|
632
|
-
# Propagate to environment so downstream calls use it if needed
|
|
633
|
-
os.environ["FIREWORKS_ACCOUNT_ID"] = fw_account_id
|
|
634
|
-
print(f"Resolved FIREWORKS_ACCOUNT_ID via API verification: {fw_account_id}")
|
|
635
|
-
|
|
636
230
|
if fw_account_id and secrets_from_file:
|
|
637
231
|
print(f"Found {len(secrets_from_file)} API keys to upload as Fireworks secrets...")
|
|
638
232
|
if secrets_from_env_file and os.path.exists(env_file_path):
|
|
@@ -684,18 +278,7 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
684
278
|
# Compute entry point metadata for backend as a pytest nodeid usable with `pytest <entrypoint>`
|
|
685
279
|
# Always prefer a path-based nodeid to work in plain pytest environments (server may not use --pyargs)
|
|
686
280
|
func_name = qualname.split(".")[-1]
|
|
687
|
-
entry_point =
|
|
688
|
-
if source_file_path:
|
|
689
|
-
# Use path relative to current working directory if possible
|
|
690
|
-
abs_path = os.path.abspath(source_file_path)
|
|
691
|
-
try:
|
|
692
|
-
rel = os.path.relpath(abs_path, root)
|
|
693
|
-
except Exception:
|
|
694
|
-
rel = abs_path
|
|
695
|
-
entry_point = f"{rel}::{func_name}"
|
|
696
|
-
else:
|
|
697
|
-
# Fallback: use filename from qualname only (rare)
|
|
698
|
-
entry_point = f"{func_name}.py::{func_name}"
|
|
281
|
+
entry_point = _build_entry_point(root, source_file_path, func_name)
|
|
699
282
|
|
|
700
283
|
print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...")
|
|
701
284
|
try:
|
|
@@ -714,28 +297,8 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
714
297
|
# Print success message with Fireworks dashboard link
|
|
715
298
|
print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}")
|
|
716
299
|
print("📊 View in Fireworks Dashboard:")
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
|
|
721
|
-
try:
|
|
722
|
-
parsed = urlparse(api_base)
|
|
723
|
-
host = parsed.netloc or parsed.path # handle cases where scheme may be missing
|
|
724
|
-
# Mapping rules:
|
|
725
|
-
# - dev.api.fireworks.ai → dev.fireworks.ai
|
|
726
|
-
# - *.api.fireworks.ai → *.app.fireworks.ai (default)
|
|
727
|
-
if host.startswith("dev.api.fireworks.ai"):
|
|
728
|
-
app_host = "dev.fireworks.ai"
|
|
729
|
-
elif host.startswith("api."):
|
|
730
|
-
app_host = host.replace("api.", "app.", 1)
|
|
731
|
-
else:
|
|
732
|
-
app_host = host
|
|
733
|
-
scheme = parsed.scheme or "https"
|
|
734
|
-
dashboard_url = f"{scheme}://{app_host}/dashboard/evaluators/{evaluator_id}"
|
|
735
|
-
except Exception:
|
|
736
|
-
dashboard_url = f"https://app.fireworks.ai/dashboard/evaluators/{evaluator_id}"
|
|
737
|
-
print(f" {dashboard_url}")
|
|
738
|
-
print()
|
|
300
|
+
dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
|
|
301
|
+
print(f" {dashboard_url}\n")
|
|
739
302
|
except Exception as e:
|
|
740
303
|
print(f"Failed to upload {qualname}: {e}")
|
|
741
304
|
exit_code = 2
|