eval-protocol 0.2.93.dev2__py3-none-any.whl → 0.2.93.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,255 +1,24 @@
1
1
  import argparse
2
2
  import importlib.util
3
- import inspect
4
- import json
5
3
  import os
6
- import pkgutil
7
4
  import re
8
- import runpy
9
5
  import sys
10
- from dataclasses import dataclass
11
6
  from pathlib import Path
12
- from typing import Any, Dict, Iterable
13
-
14
- import pytest
15
- from eval_protocol.auth import (
16
- get_fireworks_account_id,
17
- get_fireworks_api_key,
18
- get_fireworks_api_base,
19
- verify_api_key_and_get_account_id,
20
- )
7
+ from typing import Any, Dict
8
+
9
+ from eval_protocol.auth import get_fireworks_api_key
21
10
  from eval_protocol.platform_api import create_or_update_fireworks_secret
22
11
 
23
12
  from eval_protocol.evaluation import create_evaluation
24
-
25
-
26
- @dataclass
27
- class DiscoveredTest:
28
- module_path: str
29
- module_name: str
30
- qualname: str
31
- file_path: str
32
- lineno: int | None
33
- has_parametrize: bool
34
- param_count: int
35
- nodeids: list[str]
36
-
37
-
38
- def _iter_python_files(root: str) -> Iterable[str]:
39
- # Don't follow symlinks to avoid infinite loops
40
- for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
41
- # Skip common virtualenv and node paths
42
- if any(
43
- skip in dirpath
44
- for skip in [
45
- "/.venv",
46
- "/venv",
47
- "/node_modules",
48
- "/.git",
49
- "/dist",
50
- "/build",
51
- "/__pycache__",
52
- ".egg-info",
53
- "/vendor",
54
- ]
55
- ):
56
- continue
57
- # Also skip specific directories by modifying dirnames in-place
58
- dirnames[:] = [
59
- d
60
- for d in dirnames
61
- if not d.startswith(".") and d not in ["venv", "node_modules", "__pycache__", "dist", "build", "vendor"]
62
- ]
63
-
64
- for name in filenames:
65
- # Skip setup files, test discovery scripts, __init__, and hidden files
66
- if (
67
- name.endswith(".py")
68
- and not name.startswith(".")
69
- and not name.startswith("test_discovery")
70
- and name not in ["setup.py", "versioneer.py", "conf.py", "__main__.py"]
71
- ):
72
- yield os.path.join(dirpath, name)
73
-
74
-
75
- def _is_eval_protocol_test(obj: Any) -> bool:
76
- # evaluation_test decorator returns a dual_mode_wrapper with _origin_func and pytest marks
77
- if not callable(obj):
78
- return False
79
- origin = getattr(obj, "_origin_func", None)
80
- if origin is None:
81
- return False
82
- # Must have pytest marks from evaluation_test
83
- marks = getattr(obj, "pytestmark", [])
84
- # Handle pytest proxy objects (APIRemovedInV1Proxy)
85
- if not isinstance(marks, (list, tuple)):
86
- try:
87
- marks = list(marks) if marks else []
88
- except (TypeError, AttributeError):
89
- return False
90
- return len(marks) > 0
91
-
92
-
93
- def _extract_param_info_from_marks(obj: Any) -> tuple[bool, int, list[str]]:
94
- """Extract parametrization info from pytest marks.
95
-
96
- Returns:
97
- (has_parametrize, param_count, param_ids)
98
- """
99
- marks = getattr(obj, "pytestmark", [])
100
-
101
- # Handle pytest proxy objects (APIRemovedInV1Proxy) - same as _is_eval_protocol_test
102
- if not isinstance(marks, (list, tuple)):
103
- try:
104
- marks = list(marks) if marks else []
105
- except (TypeError, AttributeError):
106
- marks = []
107
-
108
- has_parametrize = False
109
- total_combinations = 0
110
- all_param_ids: list[str] = []
111
-
112
- for m in marks:
113
- if getattr(m, "name", "") == "parametrize":
114
- has_parametrize = True
115
- # The data is in kwargs for eval_protocol's parametrization
116
- kwargs = getattr(m, "kwargs", {})
117
- argnames = kwargs.get("argnames", m.args[0] if m.args else "")
118
- argvalues = kwargs.get("argvalues", m.args[1] if len(m.args) > 1 else [])
119
- ids = kwargs.get("ids", [])
120
-
121
- # Count this dimension of parameters
122
- if isinstance(argvalues, (list, tuple)):
123
- count = len(argvalues)
124
- total_combinations = count # For now, just use the count from this mark
125
-
126
- # Use provided IDs
127
- if ids and isinstance(ids, (list, tuple)):
128
- all_param_ids = list(ids[:count])
129
- else:
130
- # Generate IDs based on argnames
131
- if isinstance(argnames, str) and "," not in argnames:
132
- # Single parameter
133
- all_param_ids = [f"{argnames}={i}" for i in range(count)]
134
- else:
135
- # Multiple parameters
136
- all_param_ids = [f"variant_{i}" for i in range(count)]
137
-
138
- return has_parametrize, total_combinations, all_param_ids
139
-
140
-
141
- def _discover_tests(root: str) -> list[DiscoveredTest]:
142
- abs_root = os.path.abspath(root)
143
- if abs_root not in sys.path:
144
- sys.path.insert(0, abs_root)
145
-
146
- discovered: list[DiscoveredTest] = []
147
-
148
- class CollectionPlugin:
149
- """Plugin to capture collected items without running code."""
150
-
151
- def __init__(self):
152
- self.items = []
153
-
154
- def pytest_ignore_collect(self, collection_path, config):
155
- """Ignore problematic files before pytest tries to import them."""
156
- # Ignore specific files
157
- ignored_files = ["setup.py", "versioneer.py", "conf.py", "__main__.py"]
158
- if collection_path.name in ignored_files:
159
- return True
160
-
161
- # Ignore hidden files (starting with .)
162
- if collection_path.name.startswith("."):
163
- return True
164
-
165
- # Ignore test_discovery files
166
- if collection_path.name.startswith("test_discovery"):
167
- return True
168
-
169
- return None
170
-
171
- def pytest_collection_modifyitems(self, items):
172
- """Hook called after collection is done."""
173
- self.items = items
174
-
175
- plugin = CollectionPlugin()
176
-
177
- # Run pytest collection only (--collect-only prevents code execution)
178
- # Override python_files to collect from ANY .py file
179
- args = [
180
- abs_root,
181
- "--collect-only",
182
- "-q",
183
- "--pythonwarnings=ignore",
184
- "-o",
185
- "python_files=*.py", # Override to collect all .py files
186
- ]
187
-
188
- try:
189
- # Suppress pytest output
190
- import io
191
- import contextlib
192
-
193
- with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
194
- pytest.main(args, plugins=[plugin])
195
- except Exception:
196
- # If pytest collection fails, fall back to empty list
197
- return []
198
-
199
- # Process collected items
200
- for item in plugin.items:
201
- if not hasattr(item, "obj"):
202
- continue
203
-
204
- obj = item.obj
205
- if not _is_eval_protocol_test(obj):
206
- continue
207
-
208
- origin = getattr(obj, "_origin_func", obj)
209
- try:
210
- src_file = inspect.getsourcefile(origin) or str(item.path)
211
- _, lineno = inspect.getsourcelines(origin)
212
- except Exception:
213
- src_file, lineno = str(item.path), None
214
-
215
- # Extract parametrization info from marks
216
- has_parametrize, param_count, param_ids = _extract_param_info_from_marks(obj)
217
-
218
- # Get module name and function name
219
- module_name = (
220
- item.module.__name__
221
- if hasattr(item, "module")
222
- else item.nodeid.split("::")[0].replace("/", ".").replace(".py", "")
223
- )
224
- func_name = item.name.split("[")[0] if "[" in item.name else item.name
225
-
226
- # Generate nodeids
227
- base_nodeid = f"{os.path.basename(src_file)}::{func_name}"
228
- if param_ids:
229
- nodeids = [f"{base_nodeid}[{pid}]" for pid in param_ids]
230
- else:
231
- nodeids = [base_nodeid]
232
-
233
- discovered.append(
234
- DiscoveredTest(
235
- module_path=module_name,
236
- module_name=module_name,
237
- qualname=f"{module_name}.{func_name}",
238
- file_path=os.path.abspath(src_file),
239
- lineno=lineno,
240
- has_parametrize=has_parametrize,
241
- param_count=param_count,
242
- nodeids=nodeids,
243
- )
244
- )
245
-
246
- # Deduplicate by qualname (in case same test appears multiple times)
247
- by_qual: dict[str, DiscoveredTest] = {}
248
- for t in discovered:
249
- existing = by_qual.get(t.qualname)
250
- if not existing or t.param_count > existing.param_count:
251
- by_qual[t.qualname] = t
252
- return sorted(by_qual.values(), key=lambda x: (x.file_path, x.lineno or 0))
13
+ from .utils import (
14
+ _build_entry_point,
15
+ _build_evaluator_dashboard_url,
16
+ _discover_and_select_tests,
17
+ _discover_tests,
18
+ _ensure_account_id,
19
+ _normalize_evaluator_id,
20
+ _prompt_select,
21
+ )
253
22
 
254
23
 
255
24
  def _to_pyargs_nodeid(file_path: str, func_name: str) -> str | None:
@@ -364,165 +133,6 @@ def _resolve_entry_to_qual_and_source(entry: str, cwd: str) -> tuple[str, str]:
364
133
  return qualname, os.path.abspath(source_file_path) if source_file_path else ""
365
134
 
366
135
 
367
- def _generate_ts_mode_code(test: DiscoveredTest) -> tuple[str, str]:
368
- # Deprecated: we no longer generate a shim; keep stub for import compatibility
369
- return ("", "main.py")
370
-
371
-
372
- def _normalize_evaluator_id(evaluator_id: str) -> str:
373
- """
374
- Normalize evaluator ID to meet Fireworks requirements:
375
- - Only lowercase a-z, 0-9, and hyphen (-)
376
- - Maximum 63 characters
377
- """
378
- # Convert to lowercase
379
- normalized = evaluator_id.lower()
380
-
381
- # Replace underscores with hyphens
382
- normalized = normalized.replace("_", "-")
383
-
384
- # Remove any characters that aren't alphanumeric or hyphen
385
- normalized = re.sub(r"[^a-z0-9-]", "", normalized)
386
-
387
- # Remove consecutive hyphens
388
- normalized = re.sub(r"-+", "-", normalized)
389
-
390
- # Remove leading/trailing hyphens
391
- normalized = normalized.strip("-")
392
-
393
- # Ensure it starts with a letter (Fireworks requirement)
394
- if normalized and not normalized[0].isalpha():
395
- normalized = "eval-" + normalized
396
-
397
- # Truncate to 63 characters
398
- if len(normalized) > 63:
399
- normalized = normalized[:63].rstrip("-")
400
-
401
- return normalized
402
-
403
-
404
- def _format_test_choice(test: DiscoveredTest, idx: int) -> str:
405
- """Format a test as a choice string for display."""
406
- # Shorten the qualname for display
407
- name = test.qualname.split(".")[-1]
408
- location = f"{Path(test.file_path).name}:{test.lineno}" if test.lineno else Path(test.file_path).name
409
-
410
- if test.has_parametrize and test.param_count > 1:
411
- return f"{name} ({test.param_count} variants) - {location}"
412
- else:
413
- return f"{name} - {location}"
414
-
415
-
416
- def _prompt_select_interactive(tests: list[DiscoveredTest]) -> list[DiscoveredTest]:
417
- """Interactive selection with arrow keys using questionary."""
418
- try:
419
- import questionary
420
- from questionary import Style
421
-
422
- # Custom style similar to Vercel CLI
423
- custom_style = Style(
424
- [
425
- ("qmark", "fg:#673ab7 bold"),
426
- ("question", "bold"),
427
- ("answer", "fg:#f44336 bold"),
428
- ("pointer", "fg:#673ab7 bold"),
429
- ("highlighted", "fg:#673ab7 bold"),
430
- ("selected", "fg:#cc5454"),
431
- ("separator", "fg:#cc5454"),
432
- ("instruction", ""),
433
- ("text", ""),
434
- ]
435
- )
436
-
437
- # Check if only one test - auto-select it
438
- if len(tests) == 1:
439
- print(f"\nFound 1 test: {_format_test_choice(tests[0], 1)}")
440
- confirm = questionary.confirm("Select this test?", default=True, style=custom_style).ask()
441
- if confirm:
442
- return tests
443
- else:
444
- return []
445
-
446
- # Single-select UX
447
- print("\n")
448
- print("Tip: Use ↑/↓ arrows to navigate and press ENTER to select.\n")
449
-
450
- choices = []
451
- for idx, t in enumerate(tests, 1):
452
- choice_text = _format_test_choice(t, idx)
453
- choices.append({"name": choice_text, "value": idx - 1})
454
-
455
- selected = questionary.select(
456
- "Select an evaluation test to upload:", choices=choices, style=custom_style
457
- ).ask()
458
-
459
- if selected is None: # Ctrl+C
460
- print("\nUpload cancelled.")
461
- return []
462
-
463
- print("\n✓ Selected 1 test")
464
- return [tests[selected]]
465
-
466
- except ImportError:
467
- # Fallback to simpler implementation
468
- return _prompt_select_fallback(tests)
469
- except KeyboardInterrupt:
470
- print("\n\nUpload cancelled.")
471
- return []
472
-
473
-
474
- def _prompt_select_fallback(tests: list[DiscoveredTest]) -> list[DiscoveredTest]:
475
- """Fallback prompt selection for when questionary is not available."""
476
- print("\n" + "=" * 80)
477
- print("Discovered evaluation tests:")
478
- print("=" * 80)
479
- print("\nTip: Install questionary for better UX: pip install questionary\n")
480
-
481
- for idx, t in enumerate(tests, 1):
482
- loc = f"{t.file_path}:{t.lineno}" if t.lineno else t.file_path
483
- print(f" [{idx}] {t.qualname}")
484
- print(f" Location: {loc}")
485
-
486
- if t.has_parametrize and t.nodeids:
487
- print(f" Parameterized: {t.param_count} variant(s)")
488
- # Show first few variants as examples
489
- example_nodeids = t.nodeids[:3]
490
- for nodeid in example_nodeids:
491
- # Extract just the parameter part for display
492
- if "[" in nodeid:
493
- param_part = nodeid.split("[", 1)[1].rstrip("]")
494
- print(f" - {param_part}")
495
- if len(t.nodeids) > 3:
496
- print(f" ... and {len(t.nodeids) - 3} more")
497
- else:
498
- print(" Type: Single test (no parametrization)")
499
- print()
500
-
501
- print("=" * 80)
502
- try:
503
- choice = input("Enter the number to select: ").strip()
504
- except KeyboardInterrupt:
505
- print("\n\nUpload cancelled.")
506
- return []
507
-
508
- if not choice.isdigit():
509
- print("\n⚠️ Invalid selection.")
510
- return []
511
- n = int(choice)
512
- if not (1 <= n <= len(tests)):
513
- print("\n⚠️ Selection out of range.")
514
- return []
515
- return [tests[n - 1]]
516
-
517
-
518
- def _prompt_select(tests: list[DiscoveredTest], non_interactive: bool) -> list[DiscoveredTest]:
519
- """Prompt user to select tests to upload."""
520
- if non_interactive:
521
- return tests
522
-
523
- return _prompt_select_interactive(tests)
524
-
525
-
526
136
  def _load_secrets_from_env_file(env_file_path: str) -> Dict[str, str]:
527
137
  """
528
138
  Load secrets from a .env file that should be uploaded to Fireworks.
@@ -572,6 +182,7 @@ def _mask_secret_value(value: str) -> str:
572
182
  def upload_command(args: argparse.Namespace) -> int:
573
183
  root = os.path.abspath(getattr(args, "path", "."))
574
184
  entries_arg = getattr(args, "entry", None)
185
+ non_interactive: bool = bool(getattr(args, "yes", False))
575
186
  if entries_arg:
576
187
  entries = [e.strip() for e in re.split(r"[,\s]+", entries_arg) if e.strip()]
577
188
  selected_specs: list[tuple[str, str]] = []
@@ -579,17 +190,9 @@ def upload_command(args: argparse.Namespace) -> int:
579
190
  qualname, resolved_path = _resolve_entry_to_qual_and_source(e, root)
580
191
  selected_specs.append((qualname, resolved_path))
581
192
  else:
582
- print("Scanning for evaluation tests...")
583
- tests = _discover_tests(root)
584
- if not tests:
585
- print("No evaluation tests found.")
586
- print("\nHint: Make sure your tests use the @evaluation_test decorator.")
587
- return 1
588
- selected_tests = _prompt_select(tests, non_interactive=bool(getattr(args, "yes", False)))
193
+ selected_tests = _discover_and_select_tests(root, non_interactive=non_interactive)
589
194
  if not selected_tests:
590
- print("No tests selected.")
591
195
  return 1
592
-
593
196
  # Warn about parameterized tests
594
197
  parameterized_tests = [t for t in selected_tests if t.has_parametrize]
595
198
  if parameterized_tests:
@@ -607,7 +210,7 @@ def upload_command(args: argparse.Namespace) -> int:
607
210
 
608
211
  # Load secrets from .env file and ensure they're available on Fireworks
609
212
  try:
610
- fw_account_id = get_fireworks_account_id()
213
+ fw_account_id = _ensure_account_id()
611
214
 
612
215
  # Determine .env file path
613
216
  if env_file:
@@ -624,15 +227,6 @@ def upload_command(args: argparse.Namespace) -> int:
624
227
  if fw_api_key_value and "FIREWORKS_API_KEY" not in secrets_from_file:
625
228
  secrets_from_file["FIREWORKS_API_KEY"] = fw_api_key_value
626
229
 
627
- if not fw_account_id and fw_api_key_value:
628
- # Attempt to verify and resolve account id from server headers
629
- resolved = verify_api_key_and_get_account_id(api_key=fw_api_key_value, api_base=get_fireworks_api_base())
630
- if resolved:
631
- fw_account_id = resolved
632
- # Propagate to environment so downstream calls use it if needed
633
- os.environ["FIREWORKS_ACCOUNT_ID"] = fw_account_id
634
- print(f"Resolved FIREWORKS_ACCOUNT_ID via API verification: {fw_account_id}")
635
-
636
230
  if fw_account_id and secrets_from_file:
637
231
  print(f"Found {len(secrets_from_file)} API keys to upload as Fireworks secrets...")
638
232
  if secrets_from_env_file and os.path.exists(env_file_path):
@@ -684,18 +278,7 @@ def upload_command(args: argparse.Namespace) -> int:
684
278
  # Compute entry point metadata for backend as a pytest nodeid usable with `pytest <entrypoint>`
685
279
  # Always prefer a path-based nodeid to work in plain pytest environments (server may not use --pyargs)
686
280
  func_name = qualname.split(".")[-1]
687
- entry_point = None
688
- if source_file_path:
689
- # Use path relative to current working directory if possible
690
- abs_path = os.path.abspath(source_file_path)
691
- try:
692
- rel = os.path.relpath(abs_path, root)
693
- except Exception:
694
- rel = abs_path
695
- entry_point = f"{rel}::{func_name}"
696
- else:
697
- # Fallback: use filename from qualname only (rare)
698
- entry_point = f"{func_name}.py::{func_name}"
281
+ entry_point = _build_entry_point(root, source_file_path, func_name)
699
282
 
700
283
  print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...")
701
284
  try:
@@ -714,28 +297,8 @@ def upload_command(args: argparse.Namespace) -> int:
714
297
  # Print success message with Fireworks dashboard link
715
298
  print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}")
716
299
  print("📊 View in Fireworks Dashboard:")
717
- # Map API base to app host (e.g., dev.api.fireworks.ai -> dev.app.fireworks.ai)
718
- from urllib.parse import urlparse
719
-
720
- api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
721
- try:
722
- parsed = urlparse(api_base)
723
- host = parsed.netloc or parsed.path # handle cases where scheme may be missing
724
- # Mapping rules:
725
- # - dev.api.fireworks.ai → dev.fireworks.ai
726
- # - *.api.fireworks.ai → *.app.fireworks.ai (default)
727
- if host.startswith("dev.api.fireworks.ai"):
728
- app_host = "dev.fireworks.ai"
729
- elif host.startswith("api."):
730
- app_host = host.replace("api.", "app.", 1)
731
- else:
732
- app_host = host
733
- scheme = parsed.scheme or "https"
734
- dashboard_url = f"{scheme}://{app_host}/dashboard/evaluators/{evaluator_id}"
735
- except Exception:
736
- dashboard_url = f"https://app.fireworks.ai/dashboard/evaluators/{evaluator_id}"
737
- print(f" {dashboard_url}")
738
- print()
300
+ dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
301
+ print(f" {dashboard_url}\n")
739
302
  except Exception as e:
740
303
  print(f"Failed to upload {qualname}: {e}")
741
304
  exit_code = 2