PyPI - eval-protocol - Versions diffs - 0.2.93.dev2__py3-none-any.whl → 0.2.93.dev3__py3-none-any.whl - Mend

eval-protocol 0.2.93.dev2py3-none-any.whl → 0.2.93.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

eval_protocol/cli_commands/upload.py CHANGED Viewed

@@ -1,255 +1,24 @@
 import argparse
 import importlib.util
-import inspect
-import json
 import os
-import pkgutil
 import re
-import runpy
 import sys
-from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Dict, Iterable
-import pytest
-from eval_protocol.auth import (
-    get_fireworks_account_id,
-    get_fireworks_api_key,
-    get_fireworks_api_base,
-    verify_api_key_and_get_account_id,
-)
+from typing import Any, Dict
+from eval_protocol.auth import get_fireworks_api_key
 from eval_protocol.platform_api import create_or_update_fireworks_secret
 from eval_protocol.evaluation import create_evaluation
-@dataclass
-class DiscoveredTest:
-    module_path: str
-    module_name: str
-    qualname: str
-    file_path: str
-    lineno: int | None
-    has_parametrize: bool
-    param_count: int
-    nodeids: list[str]
-def _iter_python_files(root: str) -> Iterable[str]:
-    # Don't follow symlinks to avoid infinite loops
-    for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
-        # Skip common virtualenv and node paths
-        if any(
-            skip in dirpath
-            for skip in [
-                "/.venv",
-                "/venv",
-                "/node_modules",
-                "/.git",
-                "/dist",
-                "/build",
-                "/__pycache__",
-                ".egg-info",
-                "/vendor",
-            ]
-        ):
-            continue
-        # Also skip specific directories by modifying dirnames in-place
-        dirnames[:] = [
-            d
-            for d in dirnames
-            if not d.startswith(".") and d not in ["venv", "node_modules", "__pycache__", "dist", "build", "vendor"]
-        ]
-        for name in filenames:
-            # Skip setup files, test discovery scripts, __init__, and hidden files
-            if (
-                name.endswith(".py")
-                and not name.startswith(".")
-                and not name.startswith("test_discovery")
-                and name not in ["setup.py", "versioneer.py", "conf.py", "__main__.py"]
-            ):
-                yield os.path.join(dirpath, name)
-def _is_eval_protocol_test(obj: Any) -> bool:
-    # evaluation_test decorator returns a dual_mode_wrapper with _origin_func and pytest marks
-    if not callable(obj):
-        return False
-    origin = getattr(obj, "_origin_func", None)
-    if origin is None:
-        return False
-    # Must have pytest marks from evaluation_test
-    marks = getattr(obj, "pytestmark", [])
-    # Handle pytest proxy objects (APIRemovedInV1Proxy)
-    if not isinstance(marks, (list, tuple)):
-        try:
-            marks = list(marks) if marks else []
-        except (TypeError, AttributeError):
-            return False
-    return len(marks) > 0
-def _extract_param_info_from_marks(obj: Any) -> tuple[bool, int, list[str]]:
-    """Extract parametrization info from pytest marks.
-    Returns:
-        (has_parametrize, param_count, param_ids)
-    """
-    marks = getattr(obj, "pytestmark", [])
-    # Handle pytest proxy objects (APIRemovedInV1Proxy) - same as _is_eval_protocol_test
-    if not isinstance(marks, (list, tuple)):
-        try:
-            marks = list(marks) if marks else []
-        except (TypeError, AttributeError):
-            marks = []
-    has_parametrize = False
-    total_combinations = 0
-    all_param_ids: list[str] = []
-    for m in marks:
-        if getattr(m, "name", "") == "parametrize":
-            has_parametrize = True
-            # The data is in kwargs for eval_protocol's parametrization
-            kwargs = getattr(m, "kwargs", {})
-            argnames = kwargs.get("argnames", m.args[0] if m.args else "")
-            argvalues = kwargs.get("argvalues", m.args[1] if len(m.args) > 1 else [])
-            ids = kwargs.get("ids", [])
-            # Count this dimension of parameters
-            if isinstance(argvalues, (list, tuple)):
-                count = len(argvalues)
-                total_combinations = count  # For now, just use the count from this mark
-                # Use provided IDs
-                if ids and isinstance(ids, (list, tuple)):
-                    all_param_ids = list(ids[:count])
-                else:
-                    # Generate IDs based on argnames
-                    if isinstance(argnames, str) and "," not in argnames:
-                        # Single parameter
-                        all_param_ids = [f"{argnames}={i}" for i in range(count)]
-                    else:
-                        # Multiple parameters
-                        all_param_ids = [f"variant_{i}" for i in range(count)]
-    return has_parametrize, total_combinations, all_param_ids
-def _discover_tests(root: str) -> list[DiscoveredTest]:
-    abs_root = os.path.abspath(root)
-    if abs_root not in sys.path:
-        sys.path.insert(0, abs_root)
-    discovered: list[DiscoveredTest] = []
-    class CollectionPlugin:
-        """Plugin to capture collected items without running code."""
-        def __init__(self):
-            self.items = []
-        def pytest_ignore_collect(self, collection_path, config):
-            """Ignore problematic files before pytest tries to import them."""
-            # Ignore specific files
-            ignored_files = ["setup.py", "versioneer.py", "conf.py", "__main__.py"]
-            if collection_path.name in ignored_files:
-                return True
-            # Ignore hidden files (starting with .)
-            if collection_path.name.startswith("."):
-                return True
-            # Ignore test_discovery files
-            if collection_path.name.startswith("test_discovery"):
-                return True
-            return None
-        def pytest_collection_modifyitems(self, items):
-            """Hook called after collection is done."""
-            self.items = items
-    plugin = CollectionPlugin()
-    # Run pytest collection only (--collect-only prevents code execution)
-    # Override python_files to collect from ANY .py file
-    args = [
-        abs_root,
-        "--collect-only",
-        "-q",
-        "--pythonwarnings=ignore",
-        "-o",
-        "python_files=*.py",  # Override to collect all .py files
-    ]
-    try:
-        # Suppress pytest output
-        import io
-        import contextlib
-        with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
-            pytest.main(args, plugins=[plugin])
-    except Exception:
-        # If pytest collection fails, fall back to empty list
-        return []
-    # Process collected items
-    for item in plugin.items:
-        if not hasattr(item, "obj"):
-            continue
-        obj = item.obj
-        if not _is_eval_protocol_test(obj):
-            continue
-        origin = getattr(obj, "_origin_func", obj)
-        try:
-            src_file = inspect.getsourcefile(origin) or str(item.path)
-            _, lineno = inspect.getsourcelines(origin)
-        except Exception:
-            src_file, lineno = str(item.path), None
-        # Extract parametrization info from marks
-        has_parametrize, param_count, param_ids = _extract_param_info_from_marks(obj)
-        # Get module name and function name
-        module_name = (
-            item.module.__name__
-            if hasattr(item, "module")
-            else item.nodeid.split("::")[0].replace("/", ".").replace(".py", "")
-        )
-        func_name = item.name.split("[")[0] if "[" in item.name else item.name
-        # Generate nodeids
-        base_nodeid = f"{os.path.basename(src_file)}::{func_name}"
-        if param_ids:
-            nodeids = [f"{base_nodeid}[{pid}]" for pid in param_ids]
-        else:
-            nodeids = [base_nodeid]
-        discovered.append(
-            DiscoveredTest(
-                module_path=module_name,
-                module_name=module_name,
-                qualname=f"{module_name}.{func_name}",
-                file_path=os.path.abspath(src_file),
-                lineno=lineno,
-                has_parametrize=has_parametrize,
-                param_count=param_count,
-                nodeids=nodeids,
-            )
-        )
-    # Deduplicate by qualname (in case same test appears multiple times)
-    by_qual: dict[str, DiscoveredTest] = {}
-    for t in discovered:
-        existing = by_qual.get(t.qualname)
-        if not existing or t.param_count > existing.param_count:
-            by_qual[t.qualname] = t
-    return sorted(by_qual.values(), key=lambda x: (x.file_path, x.lineno or 0))
+from .utils import (
+    _build_entry_point,
+    _build_evaluator_dashboard_url,
+    _discover_and_select_tests,
+    _discover_tests,
+    _ensure_account_id,
+    _normalize_evaluator_id,
+    _prompt_select,
+)
 def _to_pyargs_nodeid(file_path: str, func_name: str) -> str | None:
@@ -364,165 +133,6 @@ def _resolve_entry_to_qual_and_source(entry: str, cwd: str) -> tuple[str, str]:
     return qualname, os.path.abspath(source_file_path) if source_file_path else ""
-def _generate_ts_mode_code(test: DiscoveredTest) -> tuple[str, str]:
-    # Deprecated: we no longer generate a shim; keep stub for import compatibility
-    return ("", "main.py")
-def _normalize_evaluator_id(evaluator_id: str) -> str:
-    """
-    Normalize evaluator ID to meet Fireworks requirements:
-    - Only lowercase a-z, 0-9, and hyphen (-)
-    - Maximum 63 characters
-    """
-    # Convert to lowercase
-    normalized = evaluator_id.lower()
-    # Replace underscores with hyphens
-    normalized = normalized.replace("_", "-")
-    # Remove any characters that aren't alphanumeric or hyphen
-    normalized = re.sub(r"[^a-z0-9-]", "", normalized)
-    # Remove consecutive hyphens
-    normalized = re.sub(r"-+", "-", normalized)
-    # Remove leading/trailing hyphens
-    normalized = normalized.strip("-")
-    # Ensure it starts with a letter (Fireworks requirement)
-    if normalized and not normalized[0].isalpha():
-        normalized = "eval-" + normalized
-    # Truncate to 63 characters
-    if len(normalized) > 63:
-        normalized = normalized[:63].rstrip("-")
-    return normalized
-def _format_test_choice(test: DiscoveredTest, idx: int) -> str:
-    """Format a test as a choice string for display."""
-    # Shorten the qualname for display
-    name = test.qualname.split(".")[-1]
-    location = f"{Path(test.file_path).name}:{test.lineno}" if test.lineno else Path(test.file_path).name
-    if test.has_parametrize and test.param_count > 1:
-        return f"{name} ({test.param_count} variants) - {location}"
-    else:
-        return f"{name} - {location}"
-def _prompt_select_interactive(tests: list[DiscoveredTest]) -> list[DiscoveredTest]:
-    """Interactive selection with arrow keys using questionary."""
-    try:
-        import questionary
-        from questionary import Style
-        # Custom style similar to Vercel CLI
-        custom_style = Style(
-            [
-                ("qmark", "fg:#673ab7 bold"),
-                ("question", "bold"),
-                ("answer", "fg:#f44336 bold"),
-                ("pointer", "fg:#673ab7 bold"),
-                ("highlighted", "fg:#673ab7 bold"),
-                ("selected", "fg:#cc5454"),
-                ("separator", "fg:#cc5454"),
-                ("instruction", ""),
-                ("text", ""),
-            ]
-        )
-        # Check if only one test - auto-select it
-        if len(tests) == 1:
-            print(f"\nFound 1 test: {_format_test_choice(tests[0], 1)}")
-            confirm = questionary.confirm("Select this test?", default=True, style=custom_style).ask()
-            if confirm:
-                return tests
-            else:
-                return []
-        # Single-select UX
-        print("\n")
-        print("Tip: Use ↑/↓ arrows to navigate and press ENTER to select.\n")
-        choices = []
-        for idx, t in enumerate(tests, 1):
-            choice_text = _format_test_choice(t, idx)
-            choices.append({"name": choice_text, "value": idx - 1})
-        selected = questionary.select(
-            "Select an evaluation test to upload:", choices=choices, style=custom_style
-        ).ask()
-        if selected is None:  # Ctrl+C
-            print("\nUpload cancelled.")
-            return []
-        print("\n✓ Selected 1 test")
-        return [tests[selected]]
-    except ImportError:
-        # Fallback to simpler implementation
-        return _prompt_select_fallback(tests)
-    except KeyboardInterrupt:
-        print("\n\nUpload cancelled.")
-        return []
-def _prompt_select_fallback(tests: list[DiscoveredTest]) -> list[DiscoveredTest]:
-    """Fallback prompt selection for when questionary is not available."""
-    print("\n" + "=" * 80)
-    print("Discovered evaluation tests:")
-    print("=" * 80)
-    print("\nTip: Install questionary for better UX: pip install questionary\n")
-    for idx, t in enumerate(tests, 1):
-        loc = f"{t.file_path}:{t.lineno}" if t.lineno else t.file_path
-        print(f"  [{idx}] {t.qualname}")
-        print(f"      Location: {loc}")
-        if t.has_parametrize and t.nodeids:
-            print(f"      Parameterized: {t.param_count} variant(s)")
-            # Show first few variants as examples
-            example_nodeids = t.nodeids[:3]
-            for nodeid in example_nodeids:
-                # Extract just the parameter part for display
-                if "[" in nodeid:
-                    param_part = nodeid.split("[", 1)[1].rstrip("]")
-                    print(f"        - {param_part}")
-            if len(t.nodeids) > 3:
-                print(f"        ... and {len(t.nodeids) - 3} more")
-        else:
-            print("      Type: Single test (no parametrization)")
-        print()
-    print("=" * 80)
-    try:
-        choice = input("Enter the number to select: ").strip()
-    except KeyboardInterrupt:
-        print("\n\nUpload cancelled.")
-        return []
-    if not choice.isdigit():
-        print("\n⚠️  Invalid selection.")
-        return []
-    n = int(choice)
-    if not (1 <= n <= len(tests)):
-        print("\n⚠️  Selection out of range.")
-        return []
-    return [tests[n - 1]]
-def _prompt_select(tests: list[DiscoveredTest], non_interactive: bool) -> list[DiscoveredTest]:
-    """Prompt user to select tests to upload."""
-    if non_interactive:
-        return tests
-    return _prompt_select_interactive(tests)
 def _load_secrets_from_env_file(env_file_path: str) -> Dict[str, str]:
     """
     Load secrets from a .env file that should be uploaded to Fireworks.
@@ -572,6 +182,7 @@ def _mask_secret_value(value: str) -> str:
 def upload_command(args: argparse.Namespace) -> int:
     root = os.path.abspath(getattr(args, "path", "."))
     entries_arg = getattr(args, "entry", None)
+    non_interactive: bool = bool(getattr(args, "yes", False))
     if entries_arg:
         entries = [e.strip() for e in re.split(r"[,\s]+", entries_arg) if e.strip()]
         selected_specs: list[tuple[str, str]] = []
@@ -579,17 +190,9 @@ def upload_command(args: argparse.Namespace) -> int:
             qualname, resolved_path = _resolve_entry_to_qual_and_source(e, root)
             selected_specs.append((qualname, resolved_path))
     else:
-        print("Scanning for evaluation tests...")
-        tests = _discover_tests(root)
-        if not tests:
-            print("No evaluation tests found.")
-            print("\nHint: Make sure your tests use the @evaluation_test decorator.")
-            return 1
-        selected_tests = _prompt_select(tests, non_interactive=bool(getattr(args, "yes", False)))
+        selected_tests = _discover_and_select_tests(root, non_interactive=non_interactive)
         if not selected_tests:
-            print("No tests selected.")
             return 1
         # Warn about parameterized tests
         parameterized_tests = [t for t in selected_tests if t.has_parametrize]
         if parameterized_tests:
@@ -607,7 +210,7 @@ def upload_command(args: argparse.Namespace) -> int:
     # Load secrets from .env file and ensure they're available on Fireworks
     try:
-        fw_account_id = get_fireworks_account_id()
+        fw_account_id = _ensure_account_id()
         # Determine .env file path
         if env_file:
@@ -624,15 +227,6 @@ def upload_command(args: argparse.Namespace) -> int:
         if fw_api_key_value and "FIREWORKS_API_KEY" not in secrets_from_file:
             secrets_from_file["FIREWORKS_API_KEY"] = fw_api_key_value
-        if not fw_account_id and fw_api_key_value:
-            # Attempt to verify and resolve account id from server headers
-            resolved = verify_api_key_and_get_account_id(api_key=fw_api_key_value, api_base=get_fireworks_api_base())
-            if resolved:
-                fw_account_id = resolved
-                # Propagate to environment so downstream calls use it if needed
-                os.environ["FIREWORKS_ACCOUNT_ID"] = fw_account_id
-                print(f"Resolved FIREWORKS_ACCOUNT_ID via API verification: {fw_account_id}")
         if fw_account_id and secrets_from_file:
             print(f"Found {len(secrets_from_file)} API keys to upload as Fireworks secrets...")
             if secrets_from_env_file and os.path.exists(env_file_path):
@@ -684,18 +278,7 @@ def upload_command(args: argparse.Namespace) -> int:
         # Compute entry point metadata for backend as a pytest nodeid usable with `pytest <entrypoint>`
         # Always prefer a path-based nodeid to work in plain pytest environments (server may not use --pyargs)
         func_name = qualname.split(".")[-1]
-        entry_point = None
-        if source_file_path:
-            # Use path relative to current working directory if possible
-            abs_path = os.path.abspath(source_file_path)
-            try:
-                rel = os.path.relpath(abs_path, root)
-            except Exception:
-                rel = abs_path
-            entry_point = f"{rel}::{func_name}"
-        else:
-            # Fallback: use filename from qualname only (rare)
-            entry_point = f"{func_name}.py::{func_name}"
+        entry_point = _build_entry_point(root, source_file_path, func_name)
         print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...")
         try:
@@ -714,28 +297,8 @@ def upload_command(args: argparse.Namespace) -> int:
             # Print success message with Fireworks dashboard link
             print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}")
             print("📊 View in Fireworks Dashboard:")
-            # Map API base to app host (e.g., dev.api.fireworks.ai -> dev.app.fireworks.ai)
-            from urllib.parse import urlparse
-            api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
-            try:
-                parsed = urlparse(api_base)
-                host = parsed.netloc or parsed.path  # handle cases where scheme may be missing
-                # Mapping rules:
-                # - dev.api.fireworks.ai → dev.fireworks.ai
-                # - *.api.fireworks.ai → *.app.fireworks.ai (default)
-                if host.startswith("dev.api.fireworks.ai"):
-                    app_host = "dev.fireworks.ai"
-                elif host.startswith("api."):
-                    app_host = host.replace("api.", "app.", 1)
-                else:
-                    app_host = host
-                scheme = parsed.scheme or "https"
-                dashboard_url = f"{scheme}://{app_host}/dashboard/evaluators/{evaluator_id}"
-            except Exception:
-                dashboard_url = f"https://app.fireworks.ai/dashboard/evaluators/{evaluator_id}"
-            print(f"   {dashboard_url}")
-            print()
+            dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
+            print(f"   {dashboard_url}\n")
         except Exception as e:
             print(f"Failed to upload {qualname}: {e}")
             exit_code = 2

eval-protocol 0.2.93.dev2__py3-none-any.whl → 0.2.93.dev3__py3-none-any.whl

eval-protocol 0.2.93.dev2py3-none-any.whl → 0.2.93.dev3py3-none-any.whl