PyPI - scroot - Versions diffs - 0.2.0__py3-none-any.whl - Mend

scroot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

scroot/__init__.py +109 -0
scroot/agents.py +345 -0
scroot/audit.py +131 -0
scroot/cli/__init__.py +167 -0
scroot/cli/download.py +49 -0
scroot/cli/eval.py +230 -0
scroot/cli/model_info.py +28 -0
scroot/composite.py +170 -0
scroot/config/__init__.py +0 -0
scroot/config/corrector.py +92 -0
scroot/connectors/__init__.py +5 -0
scroot/connectors/database.py +357 -0
scroot/context/__init__.py +9 -0
scroot/context/adapters.py +86 -0
scroot/context/builder.py +514 -0
scroot/context/dedup.py +99 -0
scroot/context/payload.py +66 -0
scroot/context/pii.py +101 -0
scroot/context/tokenizer.py +42 -0
scroot/core.py +349 -0
scroot/corrector/__init__.py +38 -0
scroot/corrector/api.py +145 -0
scroot/corrector/base.py +20 -0
scroot/corrector/disabled.py +13 -0
scroot/corrector/local.py +112 -0
scroot/corrector/models.py +69 -0
scroot/dashboard/__init__.py +0 -0
scroot/dashboard/__main__.py +37 -0
scroot/dashboard/routers/__init__.py +0 -0
scroot/dashboard/routers/analytics.py +236 -0
scroot/dashboard/routers/corrector.py +230 -0
scroot/dashboard/routers/export.py +150 -0
scroot/dashboard/routers/guardrails.py +41 -0
scroot/dashboard/routers/pipeline.py +218 -0
scroot/dashboard/routers/queue.py +188 -0
scroot/dashboard/routers/records.py +252 -0
scroot/dashboard/routers/settings.py +291 -0
scroot/dashboard/security.py +135 -0
scroot/dashboard/server.py +181 -0
scroot/evidence.py +228 -0
scroot/exceptions.py +62 -0
scroot/feedback/__init__.py +6 -0
scroot/feedback/injector.py +160 -0
scroot/feedback/sanitizer.py +56 -0
scroot/feedback/store.py +650 -0
scroot/flags.py +42 -0
scroot/metrics/__init__.py +15 -0
scroot/metrics/_utils.py +9 -0
scroot/metrics/completeness.py +139 -0
scroot/metrics/confidence.py +83 -0
scroot/metrics/consistency.py +125 -0
scroot/metrics/groundedness.py +193 -0
scroot/metrics/relevance.py +73 -0
scroot/models.py +214 -0
scroot/result.py +276 -0
scroot/sampling.py +306 -0
scroot/text_utils.py +136 -0
scroot/ui/dist/assets/index-DW1dLzDl.js +101 -0
scroot/ui/dist/assets/index-WOhrVVSM.css +2 -0
scroot/ui/dist/favicon.svg +27 -0
scroot/ui/dist/index.html +20 -0
scroot-0.2.0.dist-info/METADATA +832 -0
scroot-0.2.0.dist-info/RECORD +67 -0
scroot-0.2.0.dist-info/WHEEL +5 -0
scroot-0.2.0.dist-info/entry_points.txt +2 -0
scroot-0.2.0.dist-info/licenses/LICENSE +201 -0
scroot-0.2.0.dist-info/top_level.txt +1 -0

scroot/dashboard/routers/queue.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""Queue router - /api/queue endpoints."""
+from __future__ import annotations
+import threading
+from datetime import datetime, timezone
+from typing import Literal, Optional
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+class QueueItem(BaseModel):
+    id: str
+    agent_id: str
+    query: str
+    response: str
+    iqs: float
+    flags: list[str]
+    status: Literal["pending", "claimed", "reviewed", "rejected", "applied"]
+    created_at: str
+    claimed_at: Optional[str] = None
+    iqs_metric_count: int = 5
+    session_id: Optional[str] = None
+    context_checksum: Optional[str] = None
+class QueueResponse(BaseModel):
+    records: list[QueueItem]
+    total: int
+    page: int
+class StatsResponse(BaseModel):
+    pending: int
+    reviewed_today: int
+    avg_iqs: float
+    oldest_pending_hours: float
+# Atomic claim registry - single-session open-source tier
+_claims: dict[str, dict] = {}
+_claims_lock = threading.Lock()
+def queue_router(store):
+    router = APIRouter()
+    @router.get("", response_model=QueueResponse)
+    def list_queue(
+        status: str = Query("all"),
+        flag: Optional[str] = Query(None),
+        agent: Optional[str] = Query(None),
+        min_iqs: Optional[float] = Query(None),
+        max_iqs: Optional[float] = Query(None),
+        threshold: float = Query(0.70),
+        sort: str = Query("created_desc"),
+        page: int = Query(1),
+        limit: int = Query(50),
+        search: Optional[str] = Query(None),
+    ):
+        records = store.get_all()
+        # IQS status filter (pass / warn / fail) - quality-based
+        if status in ("pass", "warn", "fail"):
+            warn_floor = threshold * 0.7
+            def iqs_status(r):
+                iqs = r.scores.get("iqs", 0) if isinstance(r.scores, dict) else 0
+                if iqs >= threshold:
+                    return "pass"
+                if iqs >= warn_floor:
+                    return "warn"
+                return "fail"
+            records = [r for r in records if iqs_status(r) == status]
+        elif status != "all":
+            # Workflow status filter (pending / reviewed / rejected)
+            records = [r for r in records if getattr(r, "status", "pending") == status]
+        # Text search
+        if search:
+            q = search.lower()
+            records = [r for r in records if q in r.query.lower()]
+        # Filter by flag
+        if flag:
+            records = [r for r in records if flag in (r.flags or [])]
+        # Filter by IQS range
+        if min_iqs is not None:
+            records = [r for r in records if r.scores.get("iqs", 0) >= min_iqs]
+        if max_iqs is not None:
+            records = [r for r in records if r.scores.get("iqs", 1) <= max_iqs]
+        # Sort
+        reverse = sort.endswith("_desc")
+        key_map = {
+            "iqs_asc":      lambda r: r.scores.get("iqs", 0),
+            "iqs_desc":     lambda r: r.scores.get("iqs", 0),
+            "created_asc":  lambda r: r.timestamp,
+            "created_desc": lambda r: r.timestamp,
+            "newest":       lambda r: r.timestamp,
+            "oldest":       lambda r: r.timestamp,
+        }
+        reverse = sort in ("iqs_desc", "created_desc", "newest")
+        sort_key = key_map.get(sort, lambda r: r.timestamp)
+        records = sorted(records, key=sort_key, reverse=reverse)
+        total = len(records)
+        start = (page - 1) * limit
+        page_records = records[start: start + limit]
+        items = []
+        for r in page_records:
+            sc = r.scores if isinstance(r.scores, dict) else {}
+            iqs = sc.get("iqs", 0.0)
+            metric_count = sc.get(
+                "iqs_metric_count", 5 if sc.get("groundedness") is not None else 4
+            )
+            claim = _claims.get(r.id)
+            items.append(QueueItem(
+                id=r.id,
+                agent_id=r.corrected_by or "unknown",
+                query=r.query[:120],
+                response=r.response[:200],
+                iqs=iqs,
+                flags=r.flags or [],
+                status=getattr(r, "status", "pending"),
+                created_at=r.timestamp,
+                claimed_at=claim.get("claimed_at") if claim else None,
+                session_id=getattr(r, "session_id", None),
+                context_checksum=getattr(r, "context_checksum", None),
+                iqs_metric_count=metric_count,
+            ))
+        return QueueResponse(records=items, total=total, page=page)
+    @router.post("/claim/{record_id}")
+    def claim_record(record_id: str):
+        """Atomic claim - 409 if already claimed by another session."""
+        with _claims_lock:
+            if record_id in _claims:
+                raise HTTPException(
+                    status_code=409,
+                    detail=f"Record {record_id} is already claimed",
+                )
+            now = datetime.now(timezone.utc).isoformat()
+            _claims[record_id] = {"claimed_at": now}
+            return {"record_id": record_id, "claimed_at": now, "status": "claimed"}
+    @router.delete("/claim/{record_id}")
+    def unclaim_record(record_id: str):
+        """Release a claim when reviewer navigates away."""
+        with _claims_lock:
+            _claims.pop(record_id, None)
+        return {"record_id": record_id, "status": "released"}
+    @router.get("/stats", response_model=StatsResponse)
+    def queue_stats():
+        all_records = store.get_all()
+        pending = [r for r in all_records if getattr(r, "status", "pending") == "pending"]
+        today = datetime.now(timezone.utc).date().isoformat()
+        reviewed_today = sum(
+            1 for r in all_records
+            if getattr(r, "status", "pending") == "reviewed"
+            and r.timestamp[:10] == today
+        )
+        iqs_vals = [r.scores.get("iqs", 0) for r in all_records if isinstance(r.scores, dict)]
+        avg_iqs = sum(iqs_vals) / len(iqs_vals) if iqs_vals else 0.0
+        oldest_hours = 0.0
+        if pending:
+            oldest_ts = min(r.timestamp for r in pending)
+            try:
+                dt = datetime.fromisoformat(oldest_ts.replace("Z", "+00:00"))
+                delta = datetime.now(timezone.utc) - dt
+                oldest_hours = delta.total_seconds() / 3600
+            except (ValueError, AttributeError):
+                pass
+        return StatsResponse(
+            pending=len(pending),
+            reviewed_today=reviewed_today,
+            avg_iqs=round(avg_iqs, 3),
+            oldest_pending_hours=round(oldest_hours, 1),
+        )
+    return router

scroot/dashboard/routers/records.py ADDED Viewed

@@ -0,0 +1,252 @@
+"""Records router - /api/records/:id endpoints."""
+from __future__ import annotations
+from typing import Optional
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+class ReviewBody(BaseModel):
+    correction: str
+    category: str = "manual"
+    notes: Optional[str] = None
+class RejectBody(BaseModel):
+    reason: str
+# The 5 IQS metrics - used to whitelist `metrics` so derived to_dict() fields
+# (weakest_metric, score_variance, etc.) don't leak in as fake metric scores.
+_METRIC_KEYS = {"groundedness", "completeness", "relevance", "consistency", "confidence"}
+def _record_to_dict(r) -> dict:
+    """Serialize a CorrectionRecord to the shape the frontend expects."""
+    scores = r.scores if isinstance(r.scores, dict) else {}
+    iqs = scores.get("iqs", 0.0)
+    metrics = {k: v for k, v in scores.items() if k in _METRIC_KEYS}
+    return {
+        "id": r.id,
+        "timestamp": r.timestamp,
+        "created_at": r.timestamp,
+        "query": r.query,
+        "response": r.response,
+        "context": "\n".join(r.context_used or []),
+        "correction": r.correction,
+        "rejection_reason": r.reason,
+        "corrected_by": r.corrected_by,
+        "status": getattr(r, "status", "pending"),
+        "iqs": iqs,
+        "metrics": metrics,
+        "flags": r.flags or [],
+        "corrected_response_iqs": getattr(r, "corrected_response_iqs", None),
+        "agent_id": r.corrected_by or None,
+        "model": None,
+        "weakest_metric": scores.get("weakest_metric"),
+        "score_variance": scores.get("score_variance"),
+        "iqs_explanation": scores.get("iqs_explanation"),
+        "metric_explanations": scores.get("metric_explanations") or {},
+        "guardrail_applied_count": getattr(r, "guardrail_applied_count", 0),
+        "evidence_map": scores.get("evidence_map"),
+        # IQS transparency: whether groundedness was scored and how many metrics
+        # contributed (defaults derived for older records without these keys).
+        "context_used": scores.get("context_used", scores.get("groundedness") is not None),
+        "iqs_metric_count": scores.get(
+            "iqs_metric_count", 5 if scores.get("groundedness") is not None else 4
+        ),
+        "effective_weights": scores.get("effective_weights"),
+    }
+def records_router(store):
+    router = APIRouter()
+    @router.get("/{record_id}")
+    def get_record(record_id: str):
+        records = store.get_all()
+        match = next((r for r in records if r.id == record_id), None)
+        if not match:
+            raise HTTPException(status_code=404, detail=f"Record {record_id} not found")
+        return _record_to_dict(match)
+    @router.post("/{record_id}/review")
+    def submit_review(record_id: str, body: ReviewBody):
+        if not body.correction.strip():
+            raise HTTPException(status_code=422, detail="Correction cannot be empty")
+        ok = store.mark_reviewed(
+            record_id=record_id,
+            correction=body.correction,
+            corrected_by="reviewer",
+            status="reviewed",
+        )
+        if not ok:
+            raise HTTPException(status_code=404, detail=f"Record {record_id} not found")
+        # Release claim
+        from .queue import _claims, _claims_lock
+        with _claims_lock:
+            _claims.pop(record_id, None)
+        # Return updated record so frontend can setRecord() directly
+        records = store.get_all()
+        updated = next((r for r in records if r.id == record_id), None)
+        if updated:
+            return _record_to_dict(updated)
+        return {"record_id": record_id, "status": "reviewed"}
+    @router.post("/{record_id}/reject")
+    def reject_record(record_id: str, body: RejectBody):
+        ok = store.mark_reviewed(
+            record_id=record_id,
+            correction="",
+            reason=body.reason,
+            corrected_by="reviewer",
+            status="rejected",
+        )
+        if not ok:
+            raise HTTPException(status_code=404, detail=f"Record {record_id} not found")
+        from .queue import _claims, _claims_lock
+        with _claims_lock:
+            _claims.pop(record_id, None)
+        records = store.get_all()
+        updated = next((r for r in records if r.id == record_id), None)
+        if updated:
+            return _record_to_dict(updated)
+        return {"record_id": record_id, "status": "rejected"}
+    @router.delete("/{record_id}/correction")
+    def delete_correction(record_id: str):
+        """Reset a record to pending - undoes a correction or rejection."""
+        ok = store.mark_reviewed(
+            record_id=record_id,
+            correction="",
+            reason="",
+            corrected_by=None,
+            status="pending",
+        )
+        if not ok:
+            raise HTTPException(status_code=404, detail=f"Record {record_id} not found")
+        records = store.get_all()
+        updated = next((r for r in records if r.id == record_id), None)
+        if updated:
+            return _record_to_dict(updated)
+        return {"record_id": record_id, "status": "pending"}
+    @router.post("/{record_id}/generate-correction")
+    async def generate_correction(record_id: str):
+        """
+        Call the configured LLM and return a draft correction as JSON.
+        NEVER auto-populates the frontend - user must click Generate.
+        """
+        records = store.get_all()
+        match = next((r for r in records if r.id == record_id), None)
+        if not match:
+            raise HTTPException(status_code=404, detail="Record not found")
+        settings = _load_settings()
+        provider = settings.get("provider", "none")
+        if provider == "none":
+            raise HTTPException(status_code=400, detail="No LLM corrector configured. Set one in Settings.")
+        try:
+            draft = _call_llm(match, settings)
+            return {"draft": draft}
+        except Exception as e:
+            raise HTTPException(status_code=502, detail=str(e))
+    return router
+def _load_settings() -> dict:
+    """Load persisted LLM judge settings."""
+    import json
+    import os
+    settings_path = os.path.join(os.getcwd(), ".scroot_settings.json")
+    if os.path.exists(settings_path):
+        with open(settings_path) as f:
+            return json.load(f)
+    return {"provider": "none"}
+def _detect_provider(settings: dict) -> str:
+    """Infer the actual API provider from base_url and model name."""
+    base_url = (settings.get("base_url") or "").lower()
+    model = (settings.get("model") or "").lower()
+    if "localhost:11434" in base_url or "ollama" in base_url:
+        return "ollama"
+    if "anthropic" in base_url or model.startswith("claude"):
+        return "anthropic"
+    if "groq" in base_url:
+        return "groq"
+    if "openrouter" in base_url:
+        return "openrouter"
+    return "openai"
+def _call_llm(record, settings: dict) -> str:
+    """Call the configured LLM provider and return a correction draft."""
+    provider = settings.get("provider", "none")
+    if provider == "llm":
+        provider = _detect_provider(settings)
+    model = settings.get("model", "")
+    api_key = settings.get("api_key", "")
+    base_url = settings.get("base_url", "") or None
+    # Fall back to env var if direct key not stored
+    api_key_env = settings.get("api_key_env_var", "")
+    if not api_key and api_key_env:
+        import os
+        api_key = os.environ.get(api_key_env, "")
+    # M-2: refuse to send the API key to an unvetted/internal endpoint.
+    from scroot.dashboard.security import validate_base_url
+    validate_base_url(base_url)
+    context_text = "\n".join(record.context_used or [])
+    prompt = (
+        f"Query: {record.query}\n"
+        f"Context: {context_text}\n"
+        f"Problematic response: {record.response}\n"
+        f"Flags: {', '.join(record.flags or [])}\n\n"
+        f"Write a corrected, grounded response:"
+    )
+    if provider == "anthropic":
+        import anthropic
+        client = anthropic.Anthropic(api_key=api_key, base_url=base_url)
+        msg = client.messages.create(
+            model=model or "claude-haiku-4-5-20251001",
+            max_tokens=512,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        return msg.content[0].text
+    elif provider in ("openai", "groq", "openrouter"):
+        import openai
+        client = openai.OpenAI(api_key=api_key, base_url=base_url)
+        resp = client.chat.completions.create(
+            model=model or "gpt-4o-mini",
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=512,
+        )
+        return resp.choices[0].message.content
+    elif provider == "ollama":
+        import requests
+        url = (base_url or "http://localhost:11434") + "/api/generate"
+        resp = requests.post(
+            url,
+            json={"model": model or "llama3.2", "prompt": prompt, "stream": False},
+            timeout=60,
+        )
+        return resp.json().get("response", "")
+    return "No LLM provider configured."