PyPI - aicert - Versions diffs - 0.1.0__py3-none-any.whl - Mend

aicert 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

aicert/__init__.py +3 -0
aicert/__main__.py +6 -0
aicert/artifacts.py +104 -0
aicert/cli.py +1423 -0
aicert/config.py +193 -0
aicert/doctor.py +366 -0
aicert/hashing.py +28 -0
aicert/metrics.py +305 -0
aicert/providers/__init__.py +13 -0
aicert/providers/anthropic.py +182 -0
aicert/providers/base.py +36 -0
aicert/providers/openai.py +153 -0
aicert/providers/openai_compatible.py +152 -0
aicert/runner.py +620 -0
aicert/templating.py +83 -0
aicert/validation.py +322 -0
aicert-0.1.0.dist-info/METADATA +306 -0
aicert-0.1.0.dist-info/RECORD +22 -0
aicert-0.1.0.dist-info/WHEEL +5 -0
aicert-0.1.0.dist-info/entry_points.txt +2 -0
aicert-0.1.0.dist-info/licenses/LICENSE +21 -0
aicert-0.1.0.dist-info/top_level.txt +1 -0

aicert/config.py ADDED Viewed

@@ -0,0 +1,193 @@
+"""Configuration models for aicert."""
+from pathlib import Path
+from typing import Literal, Optional
+from pydantic import BaseModel, Field, field_validator
+class ChaosConfig(BaseModel):
+    """Configuration for FakeAdapter chaos mode.
+    All probability values should be between 0 and 1.
+    """
+    seed: int = Field(default=1337, description="Random seed for reproducible chaos")
+    p_invalid_json: float = Field(default=0.0, ge=0, le=1, description="Probability of returning invalid JSON")
+    p_wrong_schema: float = Field(default=0.0, ge=0, le=1, description="Probability of JSON with wrong schema")
+    p_extra_keys: float = Field(default=0.0, ge=0, le=1, description="Probability of JSON with extra keys")
+    p_wrapped_json: float = Field(default=0.0, ge=0, le=1, description="Probability of JSON wrapped in markdown fence")
+    p_non_json: float = Field(default=0.0, ge=0, le=1, description="Probability of non-JSON response")
+    p_timeout: float = Field(default=0.0, ge=0, le=1, description="Probability of timeout")
+    p_http_429: float = Field(default=0.0, ge=0, le=1, description="Probability of HTTP 429 error")
+    p_http_500: float = Field(default=0.0, ge=0, le=1, description="Probability of HTTP 500 error")
+class ProviderConfig(BaseModel):
+    """Configuration for an LLM provider."""
+    id: str = Field(..., description="Provider identifier")
+    provider: Literal["openai", "anthropic", "openai_compatible", "fake"] = Field(
+        ..., description="Provider type"
+    )
+    model: str = Field(..., description="Model identifier")
+    temperature: float = Field(..., description="Temperature for sampling")
+    base_url: Optional[str] = Field(
+        None, description="Base URL for openai_compatible provider"
+    )
+    chaos: Optional[ChaosConfig] = Field(
+        None, description="Chaos mode configuration (fake provider only)"
+    )
+class ValidationConfig(BaseModel):
+    """Configuration for output validation."""
+    extract_json: bool = Field(default=True, description="Extract JSON from response")
+    allow_extra_keys: bool = Field(
+        default=False, description="Allow extra keys in JSON output"
+    )
+class ThresholdsConfig(BaseModel):
+    """Configuration for pass/fail thresholds."""
+    min_stability: int = Field(default=85, description="Minimum stability percentage")
+    min_compliance: int = Field(default=95, description="Minimum compliance percentage")
+    max_cost_usd: Optional[float] = Field(None, description="Maximum cost in USD")
+    p95_latency_ms: Optional[int] = Field(None, description="P95 latency in milliseconds")
+class CIConfig(BaseModel):
+    """Configuration for CI mode."""
+    runs: int = Field(default=10, description="Number of runs in CI mode")
+    save_on_fail: bool = Field(
+        default=True, description="Save results on test failure"
+    )
+class Config(BaseModel):
+    """Main configuration for aicert."""
+    project: str = Field(..., description="Project name")
+    providers: list[ProviderConfig] = Field(
+        ..., description="List of LLM provider configurations"
+    )
+    prompt_file: str = Field(..., description="Path to prompt file")
+    cases_file: str = Field(..., description="Path to test cases file (JSONL)")
+    schema_file: str = Field(..., description="Path to JSON schema file")
+    runs: int = Field(default=50, description="Number of test runs")
+    concurrency: int = Field(default=10, description="Number of concurrent requests")
+    timeout_s: int = Field(default=30, description="Timeout for requests in seconds")
+    validation: ValidationConfig = Field(
+        default_factory=ValidationConfig, description="Validation settings"
+    )
+    thresholds: ThresholdsConfig = Field(
+        default_factory=ThresholdsConfig, description="Pass/fail thresholds"
+    )
+    ci: CIConfig = Field(default_factory=CIConfig, description="CI mode settings")
+    @field_validator("providers", mode="before")
+    @classmethod
+    def ensure_providers_list(cls, v):
+        """Ensure providers is a list."""
+        if isinstance(v, dict):
+            return [v]
+        return v
+    @property
+    def primary_provider(self) -> ProviderConfig:
+        """Get the primary provider (first one)."""
+        return self.providers[0]
+class ConfigLoadError(Exception):
+    """Error raised when configuration loading fails."""
+    def __init__(self, message: str, config_path: Optional[str] = None, hint: Optional[str] = None):
+        self.message = message
+        self.config_path = config_path
+        self.hint = hint
+        super().__init__(self._format_message())
+    def _format_message(self) -> str:
+        """Format the error message with context."""
+        parts = []
+        if self.config_path:
+            parts.append(f"[bold red]Config file: {self.config_path}[/bold red]")
+        parts.append(f"[bold red]Error:[/bold red] {self.message}")
+        if self.hint:
+            parts.append(f"[bold yellow]Hint:[/bold yellow] {self.hint}")
+        return "\n".join(parts)
+def load_config(path: str) -> Config:
+    """Load configuration from YAML file.
+    Args:
+        path: Path to the YAML config file.
+    Returns:
+        Config object with validated settings.
+    Raises:
+        ConfigLoadError: If the file cannot be loaded or validation fails.
+        FileNotFoundError: If referenced files don't exist.
+    """
+    import yaml
+    config_path = Path(path)
+    config_dir = config_path.parent
+    try:
+        with open(config_path, "r") as f:
+            config_data = yaml.safe_load(f)
+    except FileNotFoundError:
+        raise ConfigLoadError(
+            message=f"Config file not found: {path}",
+            config_path=str(config_path.resolve()),
+            hint="Make sure the path is correct and the file exists."
+        )
+    except yaml.YAMLError as e:
+        raise ConfigLoadError(
+            message=f"Invalid YAML in config file: {e}",
+            config_path=str(config_path.resolve()),
+            hint="Check for syntax errors like incorrect indentation or missing colons."
+        )
+    try:
+        config = Config(**config_data)
+    except Exception as e:
+        raise ConfigLoadError(
+            message=f"Configuration validation failed: {e}",
+            config_path=str(config_path.resolve()),
+            hint="Check that all required fields are present and have the correct types."
+        )
+    # Validate referenced files exist, resolving relative to config directory
+    errors: list[str] = []
+    for field_name in ["prompt_file", "cases_file", "schema_file"]:
+        file_path = getattr(config, field_name)
+        resolved_path = config_dir / file_path
+        if not resolved_path.exists():
+            errors.append(
+                f"{field_name}: '{file_path}' not found "
+                f"(resolved to: {resolved_path})"
+            )
+    if errors:
+        raise ConfigLoadError(
+            message="Referenced files not found:\n  - " + "\n  - ".join(errors),
+            config_path=str(config_path.resolve()),
+            hint="Make sure all file paths are correct and files exist. Paths are resolved relative to the config file directory."
+        )
+    return config

aicert/doctor.py ADDED Viewed

@@ -0,0 +1,366 @@
+"""Doctor command for validating aicert installation and configuration."""
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+import httpx
+from rich.console import Console
+from aicert.config import Config, ConfigLoadError, ProviderConfig, load_config
+from aicert.templating import build_schema_hint, render_prompt
+from aicert.validation import load_json_schema, validate_output
+console = Console()
+class DoctorCheck:
+    """Represents a single doctor check with result."""
+    def __init__(self, name: str):
+        self.name = name
+        self.passed = False
+        self.error: Optional[str] = None
+        self.details: List[str] = []
+    def pass_check(self, details: Optional[List[str]] = None) -> None:
+        """Mark check as passed."""
+        self.passed = True
+        if details:
+            self.details = details
+    def fail_check(self, error: str) -> None:
+        """Mark check as failed."""
+        self.passed = False
+        self.error = error
+    def add_detail(self, detail: str) -> None:
+        """Add a detail message."""
+        self.details.append(detail)
+def load_cases(cases_file: str) -> Tuple[List[Dict[str, Any]], List[str]]:
+    """Load test cases from JSONL file.
+    Returns:
+        Tuple of (cases, errors)
+    """
+    cases = []
+    errors = []
+    with open(cases_file, "r") as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                case = json.loads(line)
+                cases.append(case)
+                # Check for required 'id' field
+                if "id" not in case and "name" not in case:
+                    errors.append(f"Line {line_num}: Missing 'id' or 'name' field")
+            except json.JSONDecodeError as e:
+                errors.append(f"Line {line_num}: Invalid JSON - {e}")
+    return cases, errors
+def check_provider_env(provider: ProviderConfig) -> Tuple[str, Optional[str]]:
+    """Check provider environment readiness.
+    Returns:
+        Tuple of (status, message)
+        Status: "OK" | "MISSING_ENV" | "MISCONFIG"
+    """
+    if provider.provider == "fake":
+        return "OK", "No environment variables required for fake provider"
+    if provider.provider == "openai":
+        api_key = os.environ.get("OPENAI_API_KEY")
+        if not api_key:
+            return "MISSING_ENV", "OPENAI_API_KEY not set"
+        return "OK", "OPENAI_API_KEY is set"
+    if provider.provider == "anthropic":
+        api_key = os.environ.get("ANTHROPIC_API_KEY")
+        if not api_key:
+            return "MISSING_ENV", "ANTHROPIC_API_KEY not set"
+        return "OK", "ANTHROPIC_API_KEY is set"
+    if provider.provider == "openai_compatible":
+        base_url = provider.base_url
+        if not base_url:
+            return "MISCONFIG", "base_url not configured"
+        api_key_env = os.environ.get("OPENAI_COMPAT_API_KEY")
+        if api_key_env:
+            return "OK", f"base_url={base_url}, OPENAI_COMPAT_API_KEY is set"
+        else:
+            return "OK", f"base_url={base_url}, OPENAI_COMPAT_API_KEY not set (optional)"
+    return "MISCONFIG", f"Unknown provider type: {provider.provider}"
+async def check_connectivity(provider: ProviderConfig) -> Tuple[bool, str]:
+    """Check connectivity to openai_compatible provider.
+    Returns:
+        Tuple of (success, message)
+    """
+    base_url = provider.base_url
+    if not base_url:
+        return False, "No base_url configured"
+    # Try /models endpoint first, then fallback to base URL
+    test_urls = [
+        f"{base_url.rstrip('/')}/models",
+        base_url.rstrip('/'),
+    ]
+    for url in test_urls:
+        try:
+            async with httpx.AsyncClient(timeout=3.0) as client:
+                response = await client.get(url, follow_redirects=True)
+                if response.status_code < 500:
+                    return True, f"Connected to {url} (status {response.status_code})"
+        except httpx.TimeoutException:
+            continue
+        except Exception as e:
+            continue
+    return False, f"Could not connect to {base_url} (tried {len(test_urls)} endpoints)"
+def run_doctor(
+    config_path: str,
+    check_connectivity_flag: bool = False,
+) -> Tuple[int, int]:
+    """Run all doctor checks.
+    Args:
+        config_path: Path to configuration file.
+        check_connectivity_flag: Whether to check connectivity for openai_compatible providers.
+    Returns:
+        Tuple of (exit_code, failed_checks_count)
+    """
+    checks: List[DoctorCheck] = []
+    failed_count = 0
+    # === A. Load Config ===
+    check = DoctorCheck("Config")
+    checks.append(check)
+    try:
+        config = load_config(config_path)
+        check.pass_check([f"Config path: {config_path}", f"Project: {config.project}"])
+    except ConfigLoadError:
+        # Re-raise for CLI to handle exit code
+        raise
+    except Exception as e:
+        check.fail_check(f"Failed to load config: {e}")
+        failed_count += 1
+        # Can't continue without config
+        return 1, failed_count
+    # === B. Validate Files ===
+    config_dir = Path(config_path).parent
+    # B1. Prompt file
+    check = DoctorCheck("Files")
+    checks.append(check)
+    prompt_errors = []
+    prompt_file = config_dir / config.prompt_file
+    try:
+        with open(prompt_file, "r") as f:
+            prompt_content = f.read()
+        check.add_detail(f"prompt_file: {config.prompt_file} (readable, {len(prompt_content)} chars)")
+    except Exception as e:
+        prompt_errors.append(f"prompt_file: {e}")
+    # B2. Cases file
+    cases_file = config_dir / config.cases_file
+    cases: List[Dict[str, Any]] = []
+    try:
+        cases, case_errors = load_cases(str(cases_file))
+        if case_errors:
+            prompt_errors.extend(case_errors)
+        else:
+            check.add_detail(f"cases_file: {config.cases_file} ({len(cases)} cases)")
+    except Exception as e:
+        prompt_errors.append(f"cases_file: {e}")
+    # B3. Schema file
+    schema_file = config_dir / config.schema_file
+    schema: Dict[str, Any] = {}
+    try:
+        schema = load_json_schema(str(schema_file))
+        check.add_detail(f"schema_file: {config.schema_file} (valid JSON schema)")
+    except Exception as e:
+        prompt_errors.append(f"schema_file: {e}")
+    if prompt_errors:
+        check.fail_check("\n  ".join(prompt_errors))
+        failed_count += 1
+    else:
+        check.pass_check()
+    # === C. Template Render Validation ===
+    check = DoctorCheck("Template")
+    checks.append(check)
+    template_errors = []
+    if cases and schema:
+        schema_hint = build_schema_hint(schema)
+        # Test first 1-3 cases
+        test_cases = cases[:3]
+        for case in test_cases:
+            case_id = case.get("id") or case.get("name", "unknown")
+            prompt_template = case.get("prompt", "")
+            variables = case.get("variables", {})
+            try:
+                rendered = render_prompt(prompt_template, variables, schema_hint, case_id)
+                check.add_detail(f"Case '{case_id}': rendered successfully")
+            except ValueError as e:
+                template_errors.append(f"Case '{case_id}': {e}")
+    if template_errors:
+        check.fail_check("\n  ".join(template_errors))
+        failed_count += 1
+    else:
+        if cases:
+            check.pass_check([f"Rendered {min(3, len(cases))} case(s) successfully"])
+        else:
+            check.pass_check(["No cases to test"])
+    # === D. Validation Pipeline Sanity ===
+    check = DoctorCheck("Validation")
+    checks.append(check)
+    # Use FakeAdapter with deterministic output to test validation
+    try:
+        from aicert.runner import FakeAdapter
+        import asyncio
+        async def test_validation():
+            adapter = FakeAdapter(latency_ms=1)
+            result = await adapter.generate("Test prompt")
+            content = result["choices"][0]["message"]["content"]
+            return content
+        sample_output = asyncio.run(test_validation())
+        # Test validation with schema
+        validation_result = validate_output(
+            text=sample_output,
+            schema=schema,
+            extract_json=config.validation.extract_json,
+            allow_extra_keys=config.validation.allow_extra_keys,
+        )
+        if validation_result.ok_json and validation_result.ok_schema:
+            check.pass_check(["Validation pipeline working correctly"])
+        elif validation_result.ok_json:
+            check.fail_check(f"Schema validation failed: {validation_result.error}")
+            failed_count += 1
+        else:
+            check.fail_check(f"JSON parsing failed: {validation_result.error}")
+            failed_count += 1
+    except Exception as e:
+        check.fail_check(f"Validation pipeline error: {e}")
+        failed_count += 1
+    # === E. Provider Readiness ===
+    check = DoctorCheck("Providers")
+    checks.append(check)
+    provider_status = []
+    provider_issues = []
+    for provider in config.providers:
+        status, message = check_provider_env(provider)
+        provider_status.append(f"{provider.id} ({provider.provider}): {status}")
+        if status != "OK":
+            provider_issues.append(f"{provider.id}: {message}")
+    if provider_issues:
+        check.fail_check("\n  ".join(provider_issues))
+        failed_count += 1
+    else:
+        check.pass_check(provider_status)
+    # === E2. Connectivity Check (optional) ===
+    if check_connectivity_flag:
+        check = DoctorCheck("Connectivity")
+        checks.append(check)
+        connectivity_results = []
+        connectivity_issues = []
+        for provider in config.providers:
+            if provider.provider == "openai_compatible":
+                success, message = asyncio.run(check_connectivity(provider))
+                if success:
+                    connectivity_results.append(f"{provider.id}: {message}")
+                else:
+                    connectivity_issues.append(f"{provider.id}: {message}")
+        if connectivity_issues:
+            # Don't fail doctor for connectivity issues, just warn
+            connectivity_results.extend([f"[warn] {x}" for x in connectivity_issues])
+            check.pass_check(connectivity_results)
+        else:
+            check.pass_check(connectivity_results if connectivity_results else ["No openai_compatible providers"])
+    # === Print Summary ===
+    console.print("\n[bold]Doctor Summary[/bold]")
+    console.print("-" * 50)
+    for check in checks:
+        if check.passed:
+            icon = "✅"
+            console.print(f"  {icon} {check.name}")
+            for detail in check.details:
+                console.print(f"     {detail}")
+        else:
+            icon = "❌"
+            console.print(f"  {icon} {check.name}")
+            error = check.error or "Unknown error"
+            for line in error.split("\n"):
+                console.print(f"     {line}")
+    console.print("-" * 50)
+    # Final verdict
+    total_failed = len([c for c in checks if not c.passed])
+    if total_failed == 0:
+        console.print("[bold green]Doctor: OK[/bold green]")
+        return 0, 0
+    else:
+        console.print(f"[bold red]Doctor: Issues found ({total_failed})[/bold red]")
+        return 1, total_failed
+def print_dry_run_plan(config: Config, cases: List[Dict[str, Any]]) -> None:
+    """Print the dry-run execution plan."""
+    console.print("\n[bold]Dry Run Plan[/bold]")
+    console.print("-" * 50)
+    providers_count = len(config.providers)
+    cases_count = len(cases)
+    runs = config.runs
+    total_requests = providers_count * cases_count * runs
+    console.print(f"  Providers: {providers_count}")
+    for p in config.providers:
+        console.print(f"    - {p.id}: {p.provider}/{p.model}")
+    console.print(f"  Cases: {cases_count}")
+    console.print(f"  Runs per case: {runs}")
+    console.print(f"  [bold]Total requests: {total_requests}[/bold]")
+    console.print(f"  Concurrency: {config.concurrency}")
+    console.print(f"  Timeout: {config.timeout_s}s")
+    console.print(f"  Validation:")
+    console.print(f"    - extract_json: {config.validation.extract_json}")
+    console.print(f"    - allow_extra_keys: {config.validation.allow_extra_keys}")
+    console.print("-" * 50)

aicert/hashing.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Hashing helpers for aicert."""
+import hashlib
+from pathlib import Path
+def sha256_bytes(b: bytes) -> str:
+    """Compute SHA-256 hash of bytes and return as 'sha256:<hex>' format.
+    Args:
+        b: Bytes to hash.
+    Returns:
+        String in format "sha256:<hex>" where <hex> is the lowercase SHA-256 hex digest.
+    """
+    return f"sha256:{hashlib.sha256(b).hexdigest()}"
+def sha256_file(path: Path) -> str:
+    """Compute SHA-256 hash of a file and return as 'sha256:<hex>' format.
+    Args:
+        path: Path to the file to hash.
+    Returns:
+        String in format "sha256:<hex>" where <hex> is the lowercase SHA-256 hex digest.
+    """
+    return sha256_bytes(path.read_bytes())