PyPI - evalguard-python - Versions diffs - 1.1.0__py3-none-any.whl - Mend

evalguard-python 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

evalguard/__init__.py +42 -0
evalguard/anthropic.py +182 -0
evalguard/bedrock.py +280 -0
evalguard/client.py +516 -0
evalguard/crewai.py +189 -0
evalguard/fastapi.py +273 -0
evalguard/guardrails.py +160 -0
evalguard/langchain.py +218 -0
evalguard/nemoclaw.py +251 -0
evalguard/openai.py +194 -0
evalguard/types.py +142 -0
evalguard_python-1.1.0.dist-info/METADATA +362 -0
evalguard_python-1.1.0.dist-info/RECORD +15 -0
evalguard_python-1.1.0.dist-info/WHEEL +5 -0
evalguard_python-1.1.0.dist-info/top_level.txt +1 -0

evalguard/client.py ADDED Viewed

@@ -0,0 +1,516 @@
+"""EvalGuard Python SDK — HTTP client for the EvalGuard API."""
+from __future__ import annotations
+import time
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
+import requests
+from .types import (
+    BenchmarkResult,
+    ComplianceReport,
+    DriftReport,
+    EvalResult,
+    FirewallResult,
+    SecurityScanResult,
+)
+class EvalGuardError(Exception):
+    """Base exception for EvalGuard API errors."""
+    def __init__(self, message: str, status_code: int | None = None, body: Any = None):
+        super().__init__(message)
+        self.status_code = status_code
+        self.body = body
+class EvalGuardClient:
+    """Client for the EvalGuard REST API.
+    Example::
+        from evalguard import EvalGuardClient
+        client = EvalGuardClient(api_key="eg_live_...")
+        result = client.run_eval({
+            "model": "gpt-4o",
+            "prompt": "Answer: {{input}}",
+            "cases": [{"input": "hello", "expectedOutput": "hello"}],
+            "scorers": ["exact-match"],
+        })
+        print(result)
+    """
+    def __init__(
+        self,
+        api_key: str,
+        base_url: str = "https://api.evalguard.ai",
+        timeout: float = 120.0,
+    ) -> None:
+        self.api_key = api_key
+        self.base_url = base_url.rstrip("/")
+        self.timeout = timeout
+        # Enforce HTTPS for non-local URLs
+        parsed = urlparse(self.base_url)
+        is_local = parsed.hostname in ('localhost', '127.0.0.1')
+        if parsed.scheme != 'https' and not is_local:
+            raise ValueError(
+                "EvalGuard: base_url must use HTTPS. "
+                "Only localhost/127.0.0.1 may use HTTP."
+            )
+        self.session = requests.Session()
+        self.session.headers.update(
+            {
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+                "User-Agent": "evalguard-python/1.0.0",
+            }
+        )
+    # ── Helpers ──────────────────────────────────────────────────────────
+    def _url(self, path: str) -> str:
+        return f"{self.base_url}{path}"
+    def _request(self, method: str, path: str, **kwargs: Any) -> Any:
+        kwargs.setdefault("timeout", self.timeout)
+        max_retries = 3
+        last_error = None
+        for attempt in range(max_retries + 1):
+            try:
+                response = self.session.request(method, self._url(path), **kwargs)
+                if response.status_code == 429:
+                    retry_after = int(response.headers.get("Retry-After", 0))
+                    delay = retry_after if retry_after > 0 else min(2 ** attempt, 60)
+                    if attempt < max_retries:
+                        time.sleep(delay)
+                        continue
+                if response.status_code >= 500 and attempt < max_retries:
+                    time.sleep(2 ** attempt)
+                    continue
+                if not response.ok:
+                    raise EvalGuardError(
+                        f"API error {response.status_code}: {response.text[:500]}",
+                        status_code=response.status_code,
+                        body=response.text[:500],
+                    )
+                if response.status_code == 204:
+                    return None
+                return response.json()
+            except EvalGuardError:
+                raise
+            except Exception as e:
+                last_error = e
+                if attempt < max_retries:
+                    time.sleep(2 ** attempt)
+                    continue
+                raise EvalGuardError(f"Request failed: {e}")
+        raise EvalGuardError(f"Request failed after {max_retries} retries: {last_error}")
+    def _get(self, path: str, params: Dict[str, Any] | None = None) -> Any:
+        return self._request("GET", path, params=params)
+    def _post(self, path: str, json: Any = None) -> Any:
+        return self._request("POST", path, json=json)
+    # ── Eval endpoints ───────────────────────────────────────────────────
+    def run_eval(self, config: Dict[str, Any]) -> Dict[str, Any]:
+        """Run an evaluation with the given config and return results."""
+        return self._post("/v1/evals/run", json=config)
+    def get_eval(self, run_id: str) -> Dict[str, Any]:
+        """Fetch a specific eval run by ID."""
+        return self._get(f"/v1/evals/{run_id}")
+    def list_evals(self, project_id: Optional[str] = None) -> List[Dict[str, Any]]:
+        """List eval runs, optionally filtered by project."""
+        params = {}
+        if project_id:
+            params["projectId"] = project_id
+        return self._get("/v1/evals", params=params)
+    # ── Security scan endpoints ──────────────────────────────────────────
+    def run_scan(self, config: Dict[str, Any]) -> Dict[str, Any]:
+        """Run a security scan (red-team) against a model."""
+        return self._post("/v1/scans/run", json=config)
+    def get_scan(self, scan_id: str) -> Dict[str, Any]:
+        """Fetch a specific security scan by ID."""
+        return self._get(f"/v1/scans/{scan_id}")
+    # ── Scorers & plugins ────────────────────────────────────────────────
+    def list_scorers(self) -> List[Dict[str, Any]]:
+        """List all available evaluation scorers."""
+        return self._get("/v1/scorers")
+    def list_plugins(self) -> List[Dict[str, Any]]:
+        """List all available security plugins."""
+        return self._get("/v1/plugins")
+    # ── Firewall ─────────────────────────────────────────────────────────
+    def check_firewall(
+        self,
+        input_text: str,
+        rules: Optional[List[Dict[str, Any]]] = None,
+    ) -> Dict[str, Any]:
+        """Check input text against firewall rules."""
+        payload: Dict[str, Any] = {"input": input_text}
+        if rules is not None:
+            payload["rules"] = rules
+        return self._post("/v1/firewall/check", json=payload)
+    # ── Benchmarks ───────────────────────────────────────────────────────
+    def run_benchmarks(
+        self,
+        suites: List[str],
+        model: str,
+    ) -> Dict[str, Any]:
+        """Run benchmark suites against a model."""
+        return self._post("/v1/benchmarks/run", json={"suites": suites, "model": model})
+    # ── Export ────────────────────────────────────────────────────────────
+    def export_dpo(self, run_id: str) -> str:
+        """Export eval results as DPO training data (JSONL)."""
+        resp = self.session.get(
+            self._url(f"/v1/evals/{run_id}/export/dpo"),
+            timeout=self.timeout,
+        )
+        if not resp.ok:
+            raise EvalGuardError(
+                f"Export error {resp.status_code}", status_code=resp.status_code
+            )
+        return resp.text
+    def export_burp(self, scan_id: str) -> str:
+        """Export security scan results as Burp Suite XML."""
+        resp = self.session.get(
+            self._url(f"/v1/scans/{scan_id}/export/burp"),
+            timeout=self.timeout,
+        )
+        if not resp.ok:
+            raise EvalGuardError(
+                f"Export error {resp.status_code}", status_code=resp.status_code
+            )
+        return resp.text
+    # ── Compliance ───────────────────────────────────────────────────────
+    def get_compliance_report(
+        self, scan_id: str, framework: str
+    ) -> Dict[str, Any]:
+        """Map scan results to a compliance framework report."""
+        return self._get(
+            f"/v1/scans/{scan_id}/compliance",
+            params={"framework": framework},
+        )
+    # ── Drift detection ──────────────────────────────────────────────────
+    def detect_drift(self, config: Dict[str, Any]) -> Dict[str, Any]:
+        """Detect performance drift between baseline and current results."""
+        return self._post("/v1/drift/detect", json=config)
+    # ── Guardrails ───────────────────────────────────────────────────────
+    def generate_guardrails(self, config: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Auto-generate firewall rules from scan findings."""
+        return self._post("/v1/guardrails/generate", json=config)
+    # ── Smart Routing ───────────────────────────────────────────────────
+    def smart_route(self, test_cases: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Route test cases to optimal model tiers by complexity."""
+        return self._post("/v1/smart-routing/test-cases", json={"testCases": test_cases})
+    # ── Autopilot ───────────────────────────────────────────────────────
+    def autopilot(self, description: str, depth: str, project_id: str, compliance_frameworks: Optional[List[str]] = None) -> Dict[str, Any]:
+        """Launch automated audit pipeline."""
+        return self._post("/v1/autopilot", json={"description": description, "depth": depth, "projectId": project_id, "complianceFrameworks": compliance_frameworks})
+    def get_autopilot_config(self) -> Dict[str, Any]:
+        """Get autopilot depth configurations."""
+        return self._get("/v1/autopilot")
+    # ── Pipelines ───────────────────────────────────────────────────────
+    def create_pipeline(self, template_id: str, project_id: str) -> Dict[str, Any]:
+        """Create eval pipeline from template."""
+        return self._post("/v1/pipelines", json={"templateId": template_id, "projectId": project_id})
+    def list_pipelines(self) -> List[Dict[str, Any]]:
+        """List pipeline templates."""
+        return self._get("/v1/pipelines")
+    # ── Leaderboard ─────────────────────────────────────────────────────
+    def get_leaderboard(self, category: str = "overall") -> Dict[str, Any]:
+        """Get model safety/performance leaderboard."""
+        return self._get("/v1/leaderboard", params={"category": category})
+    # ── Cost / FinOps ───────────────────────────────────────────────────
+    def get_cost(self, project_id: str, period: str = "30d") -> Dict[str, Any]:
+        """Get cost analytics."""
+        return self._get("/v1/cost", params={"projectId": project_id, "period": period})
+    def get_cost_savings(self, project_id: str, period: str = "30d") -> Dict[str, Any]:
+        """Get ROI / cost savings report."""
+        return self._get("/v1/cost/savings", params={"projectId": project_id, "period": period})
+    def get_cost_forecast(self, project_id: str) -> Dict[str, Any]:
+        """Get cost forecast."""
+        return self._get("/v1/cost/forecast", params={"projectId": project_id})
+    # ── Security (extended) ─────────────────────────────────────────────
+    def get_security_effectiveness(self, project_id: str) -> Dict[str, Any]:
+        """Get attack effectiveness analytics."""
+        return self._get("/v1/security/effectiveness", params={"projectId": project_id})
+    def get_security_report(self, scan_id: str) -> Dict[str, Any]:
+        """Get full security assessment report."""
+        return self._get("/v1/security/report", params={"scanId": scan_id})
+    # ── Support ─────────────────────────────────────────────────────────
+    def submit_ticket(self, type: str, subject: str, description: str, priority: str = "medium", metadata: Optional[Dict] = None) -> Dict[str, Any]:
+        """Submit a support ticket."""
+        return self._post("/v1/support", json={"type": type, "subject": subject, "description": description, "priority": priority, "metadata": metadata or {}})
+    def list_tickets(self, status: Optional[str] = None) -> Dict[str, Any]:
+        """List user support tickets."""
+        params = {"status": status} if status else {}
+        return self._get("/v1/support", params=params)
+    # ── Traces ──────────────────────────────────────────────────────────
+    def list_traces(self, project_id: str) -> List[Dict[str, Any]]:
+        """List traces for a project."""
+        return self._get("/v1/traces", params={"projectId": project_id})
+    def search_traces(self, project_id: str, query: str) -> List[Dict[str, Any]]:
+        """Search traces."""
+        return self._get("/v1/traces/search", params={"projectId": project_id, "q": query})
+    def ingest_otlp(self, resource_spans: List[Dict]) -> Dict[str, Any]:
+        """Ingest OTLP traces."""
+        return self._post("/v1/ingest/otlp/traces", json={"resourceSpans": resource_spans})
+    # ── Monitoring ──────────────────────────────────────────────────────
+    def get_monitoring_analytics(self, project_id: str) -> Dict[str, Any]:
+        """Get monitoring analytics."""
+        return self._get("/v1/monitoring/analytics", params={"projectId": project_id})
+    def get_monitoring_alerts(self, project_id: str) -> Dict[str, Any]:
+        """Get monitoring alerts."""
+        return self._get("/v1/monitoring/alerts", params={"projectId": project_id})
+    # ── Compliance (extended) ───────────────────────────────────────────
+    def check_compliance(self, project_id: str, framework: Optional[str] = None) -> Dict[str, Any]:
+        """Run compliance check."""
+        params = {"projectId": project_id}
+        if framework: params["framework"] = framework
+        return self._get("/v1/compliance/check", params=params)
+    def get_compliance_gaps(self, project_id: str) -> Dict[str, Any]:
+        """Get compliance gaps."""
+        return self._get("/v1/compliance/gaps", params={"projectId": project_id})
+    # ── Prompts ─────────────────────────────────────────────────────────
+    def create_prompt(self, project_id: str, name: str, content: str, model: str = "gpt-4o", tags: Optional[List[str]] = None) -> Dict[str, Any]:
+        """Create a prompt template."""
+        return self._post("/v1/prompts", json={"projectId": project_id, "name": name, "content": content, "model": model, "tags": tags or []})
+    def list_prompts(self, project_id: str) -> List[Dict[str, Any]]:
+        """List prompts."""
+        return self._get("/v1/prompts", params={"projectId": project_id})
+    # ── Datasets ────────────────────────────────────────────────────────
+    def create_dataset(self, project_id: str, name: str, cases: List[Dict] = None, description: str = "") -> Dict[str, Any]:
+        """Create a dataset."""
+        return self._post("/v1/datasets", json={"projectId": project_id, "name": name, "cases": cases or [], "description": description})
+    def list_datasets(self, project_id: str) -> List[Dict[str, Any]]:
+        """List datasets."""
+        return self._get("/v1/datasets", params={"projectId": project_id})
+    # ── NL Pipeline ─────────────────────────────────────────────────────
+    def ask(self, question: str, project_id: Optional[str] = None) -> Dict[str, Any]:
+        """Ask the AI copilot."""
+        return self._post("/v1/ask", json={"question": question, "projectId": project_id})
+    def generate_eval_suite(self, description: str, project_id: Optional[str] = None) -> Dict[str, Any]:
+        """Generate eval test suite from description."""
+        return self._post("/v1/generate-eval-suite", json={"description": description, "projectId": project_id})
+    # ── AI SBOM ─────────────────────────────────────────────────────────
+    def get_ai_sbom(self, project_id: str) -> Dict[str, Any]:
+        """Get AI System Bill of Materials."""
+        return self._get("/v1/ai-sbom", params={"projectId": project_id})
+    # ── Threat Intelligence ─────────────────────────────────────────────
+    def get_threat_intelligence(self, project_id: str) -> Dict[str, Any]:
+        """Get threat intelligence data."""
+        return self._get("/v1/threat-intelligence", params={"projectId": project_id})
+    # ── Audit Logs ──────────────────────────────────────────────────────
+    def get_audit_logs(self, org_id: str) -> List[Dict[str, Any]]:
+        """Get audit logs."""
+        return self._get("/v1/audit-logs", params={"orgId": org_id})
+    # ── Notifications ───────────────────────────────────────────────────
+    def list_notifications(self) -> List[Dict[str, Any]]:
+        """List notifications."""
+        return self._get("/v1/notifications")
+    # ── Templates ───────────────────────────────────────────────────────
+    def list_templates(self) -> List[Dict[str, Any]]:
+        """List eval templates."""
+        return self._get("/v1/templates")
+    # ── Marketplace ─────────────────────────────────────────────────────
+    def get_marketplace(self) -> Dict[str, Any]:
+        """Get marketplace."""
+        return self._get("/v1/marketplace")
+    # ── Missing methods (parity with JS SDK) ───────────────────────────
+    def get_eval_run(self, run_id: str) -> Dict[str, Any]:
+        """Get a specific eval run by ID."""
+        return self._get(f"/v1/evals/{run_id}")
+    def get_trace(self, trace_id: str) -> Dict[str, Any]:
+        """Get a specific trace by ID."""
+        return self._get(f"/v1/traces/{trace_id}")
+    def trace(self, project_id: str, session_id: str, steps: List[Dict] = None) -> Dict[str, Any]:
+        """Create a trace."""
+        return self._post("/v1/traces", json={"projectId": project_id, "sessionId": session_id, "steps": steps or []})
+    def security_scan(self, config: Dict[str, Any]) -> Dict[str, Any]:
+        """Start a security scan (alias for run_scan)."""
+        return self._post("/v1/security", json=config)
+    def create_annotation(self, project_id: str, log_id: str, label: str, score: float = None, notes: str = None) -> Dict[str, Any]:
+        """Create an annotation on a log entry."""
+        body: Dict[str, Any] = {"projectId": project_id, "logId": log_id, "label": label}
+        if score is not None:
+            body["score"] = score
+        if notes:
+            body["notes"] = notes
+        return self._post("/v1/annotations", json=body)
+    def list_annotations(self, project_id: str) -> List[Dict[str, Any]]:
+        """List annotations for a project."""
+        return self._get(f"/v1/annotations?projectId={project_id}")
+    def list_eval_schedules(self, project_id: str) -> List[Dict[str, Any]]:
+        """List eval schedules for a project."""
+        return self._get(f"/v1/eval-schedules?projectId={project_id}")
+    def list_incidents(self, project_id: str) -> List[Dict[str, Any]]:
+        """List incidents for a project."""
+        return self._get(f"/v1/incidents?projectId={project_id}")
+    def list_feature_flags(self, project_id: str) -> List[Dict[str, Any]]:
+        """List feature flags for a project."""
+        return self._get(f"/v1/feature-flags?projectId={project_id}")
+    def list_guardrails(self, project_id: str) -> Dict[str, Any]:
+        """List guardrails for a project."""
+        return self._get(f"/v1/guardrails?projectId={project_id}")
+    def list_team(self, org_id: str) -> List[Dict[str, Any]]:
+        """List team members for an organization."""
+        return self._get(f"/v1/team?orgId={org_id}")
+    def list_webhooks(self, org_id: str) -> List[Dict[str, Any]]:
+        """List webhooks for an organization."""
+        return self._get(f"/v1/webhooks?orgId={org_id}")
+    def get_gateway_health(self) -> Dict[str, Any]:
+        """Get gateway health status."""
+        return self._get("/v1/gateway/health")
+    def get_gateway_stats(self, project_id: str) -> Dict[str, Any]:
+        """Get gateway usage statistics."""
+        return self._get(f"/v1/gateway/stats?projectId={project_id}")
+    def get_gateway_config(self, project_id: str) -> Dict[str, Any]:
+        """Get gateway configuration."""
+        return self._get(f"/v1/gateway?projectId={project_id}")
+    def get_monitoring_drift(self, project_id: str) -> Dict[str, Any]:
+        """Get drift detection status."""
+        return self._get(f"/v1/monitoring/drift?projectId={project_id}")
+    def get_monitoring_sla(self, project_id: str) -> Dict[str, Any]:
+        """Get SLA monitoring data."""
+        return self._get(f"/v1/monitoring/sla?projectId={project_id}")
+    def get_cost_budget(self, project_id: str) -> Dict[str, Any]:
+        """Get cost budget for a project."""
+        return self._get(f"/v1/cost/budget?projectId={project_id}")
+    def get_siem_connectors(self, project_id: str) -> Dict[str, Any]:
+        """Get SIEM connector configuration."""
+        return self._get(f"/v1/siem?projectId={project_id}")
+    def get_settings(self, project_id: str) -> Dict[str, Any]:
+        """Get project settings."""
+        return self._get(f"/v1/settings?projectId={project_id}")
+    def get_model_cards(self, project_id: str) -> Dict[str, Any]:
+        """Get model cards for compliance."""
+        return self._get(f"/v1/compliance/model-cards?projectId={project_id}")
+    def export_compliance(self, project_id: str, format: str = "json") -> Dict[str, Any]:
+        """Export compliance report."""
+        return self._get(f"/v1/compliance/export?projectId={project_id}&format={format}")
+    def export_results(self, run_id: str, format: str, project_id: str) -> Dict[str, Any]:
+        """Export eval results in specified format."""
+        return self._get(f"/v1/exports?runId={run_id}&format={format}&projectId={project_id}")
+    def generate_ai_sbom(self, project_id: str) -> Dict[str, Any]:
+        """Generate AI Software Bill of Materials."""
+        return self._post("/v1/ai-sbom/generate", json={"projectId": project_id})
+    def ingest_otlp_traces(self, resource_spans: List[Dict]) -> Dict[str, Any]:
+        """Ingest OpenTelemetry traces."""
+        return self._post("/v1/ingest/otlp/traces", json={"resourceSpans": resource_spans})
+    def ingest_otlp_logs(self, resource_logs: List[Dict]) -> Dict[str, Any]:
+        """Ingest OpenTelemetry logs."""
+        return self._post("/v1/ingest/otlp/logs", json={"resourceLogs": resource_logs})
+    def ingest_otlp_metrics(self, resource_metrics: List[Dict]) -> Dict[str, Any]:
+        """Ingest OpenTelemetry metrics."""
+        return self._post("/v1/ingest/otlp/metrics", json={"resourceMetrics": resource_metrics})