PyPI - evalgate-sdk - Versions diffs - 3.3.1__py3-none-any.whl - Mend

evalgate-sdk 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

evalgate_sdk/__init__.py +707 -0
evalgate_sdk/_version.py +3 -0
evalgate_sdk/assertions.py +1362 -0
evalgate_sdk/auto.py +247 -0
evalgate_sdk/batch.py +174 -0
evalgate_sdk/cache.py +111 -0
evalgate_sdk/ci_context.py +123 -0
evalgate_sdk/cli/__init__.py +111 -0
evalgate_sdk/cli/api.py +261 -0
evalgate_sdk/cli/cli_constants.py +20 -0
evalgate_sdk/cli/commands.py +1041 -0
evalgate_sdk/cli/config.py +228 -0
evalgate_sdk/cli/env.py +43 -0
evalgate_sdk/cli/formatters/types.py +132 -0
evalgate_sdk/cli/golden_commands.py +322 -0
evalgate_sdk/cli/manifest.py +301 -0
evalgate_sdk/cli/new_commands.py +435 -0
evalgate_sdk/cli/policy_packs.py +103 -0
evalgate_sdk/cli/profiles.py +12 -0
evalgate_sdk/cli/regression_gate.py +312 -0
evalgate_sdk/cli/render/__init__.py +1 -0
evalgate_sdk/cli/render/snippet.py +18 -0
evalgate_sdk/cli/render/sort.py +29 -0
evalgate_sdk/cli/report/__init__.py +1 -0
evalgate_sdk/cli/report/build_check_report.py +209 -0
evalgate_sdk/cli/traces.py +186 -0
evalgate_sdk/cli/workspace.py +63 -0
evalgate_sdk/client.py +609 -0
evalgate_sdk/cluster.py +359 -0
evalgate_sdk/collector.py +161 -0
evalgate_sdk/constants.py +6 -0
evalgate_sdk/context.py +151 -0
evalgate_sdk/errors.py +236 -0
evalgate_sdk/export.py +238 -0
evalgate_sdk/formatters/__init__.py +11 -0
evalgate_sdk/formatters/github.py +51 -0
evalgate_sdk/formatters/human.py +68 -0
evalgate_sdk/formatters/json_fmt.py +11 -0
evalgate_sdk/formatters/pr_comment.py +80 -0
evalgate_sdk/golden.py +426 -0
evalgate_sdk/integrations/__init__.py +1 -0
evalgate_sdk/integrations/anthropic.py +99 -0
evalgate_sdk/integrations/autogen.py +62 -0
evalgate_sdk/integrations/crewai.py +61 -0
evalgate_sdk/integrations/langchain.py +100 -0
evalgate_sdk/integrations/openai.py +155 -0
evalgate_sdk/integrations/openai_eval.py +221 -0
evalgate_sdk/local.py +144 -0
evalgate_sdk/logger.py +123 -0
evalgate_sdk/matchers.py +62 -0
evalgate_sdk/otel.py +256 -0
evalgate_sdk/pagination.py +145 -0
evalgate_sdk/py.typed +0 -0
evalgate_sdk/pytest_plugin.py +96 -0
evalgate_sdk/reason_codes.py +103 -0
evalgate_sdk/regression.py +196 -0
evalgate_sdk/replay_decision.py +115 -0
evalgate_sdk/runtime/__init__.py +50 -0
evalgate_sdk/runtime/adapters/__init__.py +1 -0
evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
evalgate_sdk/runtime/context.py +68 -0
evalgate_sdk/runtime/eval.py +318 -0
evalgate_sdk/runtime/execution_mode.py +170 -0
evalgate_sdk/runtime/executor.py +92 -0
evalgate_sdk/runtime/registry.py +125 -0
evalgate_sdk/runtime/run_report.py +249 -0
evalgate_sdk/runtime/types.py +143 -0
evalgate_sdk/snapshot.py +219 -0
evalgate_sdk/streaming.py +124 -0
evalgate_sdk/synthesize.py +226 -0
evalgate_sdk/testing.py +128 -0
evalgate_sdk/types.py +666 -0
evalgate_sdk/utils/__init__.py +1 -0
evalgate_sdk/utils/input_hash.py +42 -0
evalgate_sdk/workflows.py +264 -0
evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0

evalgate_sdk/cli/__init__.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""EvalGate CLI — command-line interface for the EvalGate.
+This module lazily initializes the CLI app only when typer is available.
+Submodules like config.py, api.py, etc. can be imported without typer.
+"""
+from __future__ import annotations
+# Lazy initialization - only set up CLI when typer is available
+app = None  # Will be initialized on first access via get_app()
+def _ensure_typer() -> None:
+    """Check that typer is installed, raise helpful error if not."""
+    try:
+        import typer  # noqa: F401
+    except ImportError as exc:
+        raise SystemExit("CLI requires typer. Install with: pip install 'evalgate-sdk[cli]'") from exc
+def get_app():
+    """Get the CLI app, initializing it if needed."""
+    global app
+    if app is not None:
+        return app
+    _ensure_typer()
+    import typer
+    app = typer.Typer(
+        name="evalgate",
+        help="EvalGate CLI — run evals, manage baselines, gate regressions.",
+        no_args_is_help=True,
+    )
+    from evalgate_sdk.cli.commands import (
+        baseline,
+        check,
+        ci,
+        configure,
+        diff,
+        discover,
+        doctor,
+        explain,
+        gate,
+        impact_analysis,
+        init,
+        migrate,
+        print_config,
+        run,
+        share,
+        upgrade,
+    )
+    app.command("init")(init)
+    app.command("run")(run)
+    app.command("gate")(gate)
+    app.command("check")(check)
+    app.command("ci")(ci)
+    app.command("doctor")(doctor)
+    app.command("discover")(discover)
+    app.command("diff")(diff)
+    app.command("explain")(explain)
+    app.command("baseline")(baseline)
+    app.command("print-config")(print_config)
+    app.command("share")(share)
+    app.command("configure")(configure)
+    app.command("upgrade")(upgrade)
+    app.command("impact-analysis")(impact_analysis)
+    app.command("migrate")(migrate)
+    from evalgate_sdk.cli.new_commands import (
+        compare,
+        promote,
+        replay,
+        start,
+        validate,
+        watch,
+    )
+    app.command("start")(start)
+    app.command("watch")(watch)
+    app.command("compare")(compare)
+    app.command("validate")(validate)
+    app.command("promote")(promote)
+    app.command("replay")(replay)
+    from evalgate_sdk.cli.golden_commands import analyze, auto_app, cluster, label, replay_decision, synthesize
+    app.command("cluster")(cluster)
+    app.command("analyze")(analyze)
+    app.command("label")(label)
+    app.command("synthesize")(synthesize)
+    app.command("replay-decision")(replay_decision)
+    app.add_typer(auto_app, name="auto")
+    return app
+def __getattr__(name: str):
+    """Lazy attribute access for 'app'."""
+    if name == "app":
+        return get_app()
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+def main() -> None:
+    """CLI entry point."""
+    cli_app = get_app()
+    cli_app()

evalgate_sdk/cli/api.py ADDED Viewed

@@ -0,0 +1,261 @@
+"""API fetch helpers for evalgate CLI commands.
+Captures x-request-id from response headers.
+Sends X-EvalGate-SDK-Version and X-EvalGate-Spec-Version on all requests.
+Port of ``cli/api.ts``.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+from urllib.parse import quote
+import httpx
+from evalgate_sdk._version import SDK_VERSION, SPEC_VERSION
+DEFAULT_TIMEOUT = 30.0
+API_HEADERS = {
+    "X-EvalGate-SDK-Version": SDK_VERSION,
+    "X-EvalGate-Spec-Version": SPEC_VERSION,
+}
+@dataclass
+class QualityLatestData:
+    score: float | None = None
+    total: int | None = None
+    evidence_level: str | None = None
+    baseline_score: float | None = None
+    regression_delta: float | None = None
+    baseline_missing: bool | None = None
+    breakdown: dict[str, float] = field(default_factory=dict)
+    flags: list[str] = field(default_factory=list)
+    evaluation_run_id: int | None = None
+    evaluation_id: int | None = None
+    avg_latency_ms: float | None = None
+    cost_usd: float | None = None
+    baseline_cost_usd: float | None = None
+    baseline_run_id: int | None = None
+@dataclass
+class RunDetailsData:
+    results: list[dict[str, Any]] = field(default_factory=list)
+@dataclass
+class FetchOptions:
+    api_key: str = ""
+    base_url: str = ""
+    method: str = "GET"
+    body: dict[str, Any] | None = None
+@dataclass
+class ImportResult:
+    test_case_id: int = 0
+    status: str = "passed"
+    output: str = ""
+    latency_ms: float | None = None
+    cost_usd: float | None = None
+    assertions_json: dict[str, Any] | None = None
+@dataclass
+class PublishShareResult:
+    share_id: str = ""
+    share_url: str = ""
+    share_scope: str = ""
+def _require_api_key(api_key: str) -> str:
+    """Validate that the API key is present."""
+    if not api_key or not api_key.strip():
+        raise ValueError("API key is required but was empty. Set EVALGATE_API_KEY or pass --api-key.")
+    return api_key.strip()
+async def fetch_api(
+    path: str,
+    opts: FetchOptions,
+) -> dict[str, Any]:
+    """Generic authenticated fetch to any API endpoint."""
+    key = _require_api_key(opts.api_key)
+    headers = {
+        **API_HEADERS,
+        "Authorization": f"Bearer {key}",
+    }
+    url = f"{opts.base_url.rstrip('/')}{path}"
+    async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
+        if opts.body:
+            headers["Content-Type"] = "application/json"
+            resp = await client.request(opts.method, url, headers=headers, json=opts.body)
+        else:
+            resp = await client.request(opts.method, url, headers=headers)
+        if resp.status_code >= 400:
+            raise RuntimeError(f"API {resp.status_code}: {resp.text[:200]}")
+        return resp.json()
+async def fetch_quality_latest(
+    base_url: str,
+    api_key: str,
+    evaluation_id: str,
+    baseline: str,
+) -> dict[str, Any]:
+    """Fetch latest quality data for an evaluation.
+    Returns ``{"ok": True, "data": {...}, "request_id": ...}`` on success,
+    or ``{"ok": False, "status": ..., "body": ..., "request_id": ...}`` on failure.
+    """
+    key = _require_api_key(api_key)
+    headers = {**API_HEADERS, "Authorization": f"Bearer {key}"}
+    url = (
+        f"{base_url.rstrip('/')}/api/quality?evaluationId="
+        f"{quote(str(evaluation_id), safe='')}&action=latest&baseline="
+        f"{quote(str(baseline), safe='')}"
+    )
+    try:
+        async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
+            resp = await client.get(url, headers=headers)
+            request_id = resp.headers.get("x-request-id")
+            body = resp.text
+            if resp.status_code >= 400:
+                return {"ok": False, "status": resp.status_code, "body": body, "request_id": request_id}
+            data = resp.json()
+            return {"ok": True, "data": data, "request_id": request_id}
+    except Exception as exc:
+        return {"ok": False, "status": 0, "body": str(exc), "request_id": None}
+async def fetch_run_details(
+    base_url: str,
+    api_key: str,
+    evaluation_id: str,
+    run_id: int,
+) -> dict[str, Any]:
+    """Fetch run details for an evaluation run."""
+    key = _require_api_key(api_key)
+    headers = {**API_HEADERS, "Authorization": f"Bearer {key}"}
+    url = f"{base_url.rstrip('/')}/api/evaluations/{quote(str(evaluation_id), safe='')}/runs/{run_id}"
+    try:
+        async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
+            resp = await client.get(url, headers=headers)
+            if resp.status_code >= 400:
+                return {"ok": False}
+            return {"ok": True, "data": resp.json()}
+    except Exception:
+        return {"ok": False}
+async def fetch_run_export(
+    base_url: str,
+    api_key: str,
+    evaluation_id: str,
+    run_id: int,
+) -> dict[str, Any]:
+    """Fetch run export data."""
+    key = _require_api_key(api_key)
+    headers = {**API_HEADERS, "Authorization": f"Bearer {key}"}
+    url = f"{base_url.rstrip('/')}/api/evaluations/{quote(str(evaluation_id), safe='')}/runs/{run_id}/export"
+    try:
+        async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
+            resp = await client.get(url, headers=headers)
+            body = resp.text
+            if resp.status_code >= 400:
+                return {"ok": False, "status": resp.status_code, "body": body}
+            return {"ok": True, "export_data": resp.json()}
+    except Exception as exc:
+        return {"ok": False, "status": 0, "body": str(exc)}
+async def publish_share(
+    base_url: str,
+    api_key: str,
+    evaluation_id: str,
+    export_data: dict[str, Any],
+    evaluation_run_id: int,
+    expires_in_days: int | None = None,
+) -> dict[str, Any]:
+    """Publish a shared report."""
+    key = _require_api_key(api_key)
+    headers = {
+        **API_HEADERS,
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    body: dict[str, Any] = {
+        "exportData": export_data,
+        "shareScope": "run",
+        "evaluationRunId": evaluation_run_id,
+    }
+    if expires_in_days is not None:
+        body["expiresInDays"] = expires_in_days
+    url = f"{base_url.rstrip('/')}/api/evaluations/{quote(str(evaluation_id), safe='')}/publish"
+    try:
+        async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
+            resp = await client.post(url, headers=headers, json=body)
+            text = resp.text
+            if resp.status_code >= 400:
+                return {"ok": False, "status": resp.status_code, "body": text}
+            return {"ok": True, "data": resp.json()}
+    except Exception as exc:
+        return {"ok": False, "status": 0, "body": str(exc)}
+async def import_run_on_fail(
+    base_url: str,
+    api_key: str,
+    evaluation_id: str,
+    results: list[dict[str, Any]],
+    idempotency_key: str | None = None,
+    ci: dict[str, Any] | None = None,
+    import_client_version: str | None = None,
+    check_report: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Import run results on failure."""
+    key = _require_api_key(api_key)
+    headers: dict[str, str] = {
+        **API_HEADERS,
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    if idempotency_key:
+        headers["Idempotency-Key"] = idempotency_key
+    body: dict[str, Any] = {
+        "environment": "dev",
+        "results": results,
+        "importClientVersion": import_client_version or "evalgate-cli",
+    }
+    if ci:
+        body["ci"] = ci
+    if check_report:
+        body["checkReport"] = check_report
+    url = f"{base_url.rstrip('/')}/api/evaluations/{quote(str(evaluation_id), safe='')}/runs/import"
+    try:
+        async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
+            resp = await client.post(url, headers=headers, json=body)
+            text = resp.text
+            if resp.status_code >= 400:
+                return {"ok": False, "status": resp.status_code, "body": text}
+            data = resp.json()
+            return {"ok": True, "run_id": data.get("runId")}
+    except Exception as exc:
+        return {"ok": False, "status": 0, "body": str(exc)}

evalgate_sdk/cli/cli_constants.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""Standardized exit codes for evalgate check.
+Port of ``cli/constants.ts``.
+"""
+from __future__ import annotations
+class EXIT:
+    """Exit code constants for CLI commands."""
+    PASS = 0
+    SCORE_BELOW = 1
+    REGRESSION = 2
+    POLICY_VIOLATION = 3
+    API_ERROR = 4
+    BAD_ARGS = 5
+    LOW_N = 6
+    WEAK_EVIDENCE = 7
+    WARN_REGRESSION = 8