PyPI - scroot - Versions diffs - 0.2.0__py3-none-any.whl - Mend

scroot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

scroot/__init__.py +109 -0
scroot/agents.py +345 -0
scroot/audit.py +131 -0
scroot/cli/__init__.py +167 -0
scroot/cli/download.py +49 -0
scroot/cli/eval.py +230 -0
scroot/cli/model_info.py +28 -0
scroot/composite.py +170 -0
scroot/config/__init__.py +0 -0
scroot/config/corrector.py +92 -0
scroot/connectors/__init__.py +5 -0
scroot/connectors/database.py +357 -0
scroot/context/__init__.py +9 -0
scroot/context/adapters.py +86 -0
scroot/context/builder.py +514 -0
scroot/context/dedup.py +99 -0
scroot/context/payload.py +66 -0
scroot/context/pii.py +101 -0
scroot/context/tokenizer.py +42 -0
scroot/core.py +349 -0
scroot/corrector/__init__.py +38 -0
scroot/corrector/api.py +145 -0
scroot/corrector/base.py +20 -0
scroot/corrector/disabled.py +13 -0
scroot/corrector/local.py +112 -0
scroot/corrector/models.py +69 -0
scroot/dashboard/__init__.py +0 -0
scroot/dashboard/__main__.py +37 -0
scroot/dashboard/routers/__init__.py +0 -0
scroot/dashboard/routers/analytics.py +236 -0
scroot/dashboard/routers/corrector.py +230 -0
scroot/dashboard/routers/export.py +150 -0
scroot/dashboard/routers/guardrails.py +41 -0
scroot/dashboard/routers/pipeline.py +218 -0
scroot/dashboard/routers/queue.py +188 -0
scroot/dashboard/routers/records.py +252 -0
scroot/dashboard/routers/settings.py +291 -0
scroot/dashboard/security.py +135 -0
scroot/dashboard/server.py +181 -0
scroot/evidence.py +228 -0
scroot/exceptions.py +62 -0
scroot/feedback/__init__.py +6 -0
scroot/feedback/injector.py +160 -0
scroot/feedback/sanitizer.py +56 -0
scroot/feedback/store.py +650 -0
scroot/flags.py +42 -0
scroot/metrics/__init__.py +15 -0
scroot/metrics/_utils.py +9 -0
scroot/metrics/completeness.py +139 -0
scroot/metrics/confidence.py +83 -0
scroot/metrics/consistency.py +125 -0
scroot/metrics/groundedness.py +193 -0
scroot/metrics/relevance.py +73 -0
scroot/models.py +214 -0
scroot/result.py +276 -0
scroot/sampling.py +306 -0
scroot/text_utils.py +136 -0
scroot/ui/dist/assets/index-DW1dLzDl.js +101 -0
scroot/ui/dist/assets/index-WOhrVVSM.css +2 -0
scroot/ui/dist/favicon.svg +27 -0
scroot/ui/dist/index.html +20 -0
scroot-0.2.0.dist-info/METADATA +832 -0
scroot-0.2.0.dist-info/RECORD +67 -0
scroot-0.2.0.dist-info/WHEEL +5 -0
scroot-0.2.0.dist-info/entry_points.txt +2 -0
scroot-0.2.0.dist-info/licenses/LICENSE +201 -0
scroot-0.2.0.dist-info/top_level.txt +1 -0

scroot/__init__.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""scroot - LLM-free response quality scoring."""
+from __future__ import annotations
+from .core import Auditor
+from .result import EntailmentResult
+from .agents import AgentRegistry, AgentConfig
+from .sampling import SamplingResult, SamplingStrategy, sample_and_score
+from .composite import RAG_WEIGHTS
+from .context import ContextBuilder, ContextEntry, ContextPayload
+from .evidence import EvidenceEntry, EvidenceMap, build_evidence_map
+from .audit import configure_audit_log
+from .exceptions import (
+    GroundednessComputationError,
+    GroundednessUnavailableWarning,
+    NoContextWarning,
+)
+__version__ = "0.2.0"
+__all__ = [
+    "Auditor",
+    "ContextBuilder",
+    "ContextPayload",
+    "ContextEntry",
+    "EntailmentResult",
+    "EvidenceEntry",
+    "EvidenceMap",
+    "build_evidence_map",
+    "AgentRegistry",
+    "AgentConfig",
+    "SamplingResult",
+    "SamplingStrategy",
+    "sample_and_score",
+    "RAG_WEIGHTS",
+    "configure_audit_log",
+    "setup_nltk",
+    "score",
+    "verify",
+    "NoContextWarning",
+    "GroundednessUnavailableWarning",
+    "GroundednessComputationError",
+]
+def setup_nltk() -> None:
+    """Download NLTK punkt_tab tokenizer data for improved sentence splitting.
+    Call this once after installation to enable NLTK-backed sentence
+    splitting (more accurate than the built-in regex fallback).
+    This is a one-time deployment step - not called at runtime.
+    Example:
+        python -c "import scroot; scroot.setup_nltk()"
+    """
+    import nltk
+    nltk.download("punkt_tab", quiet=False)
+def score(
+    query: str,
+    response: str,
+    context: "ContextPayload | str | list[str] | None" = None,
+    **kwargs,
+) -> EntailmentResult:
+    """Score a single LLM response using default settings.
+    Convenience wrapper around Auditor().score(). Creates a fresh Auditor
+    instance on each call. For repeated scoring, instantiate Auditor once
+    and reuse it to avoid reloading models.
+    Args:
+        query: The user's query/question.
+        response: The LLM-generated response.
+        context: Grounding context - a ContextPayload from
+            ContextBuilder.build(), a plain string, a list of source
+            context strings, or None.
+        **kwargs: Passed through to Auditor().
+    Returns:
+        EntailmentResult with all metric scores and flags.
+    """
+    auditor = Auditor(**kwargs)
+    return auditor.score(query=query, response=response, context=context)
+def verify(
+    query: str,
+    response: str,
+    context: "ContextPayload | str | list[str] | None" = None,
+    threshold: float = 0.7,
+    **kwargs,
+) -> bool:
+    """Check whether a response meets a minimum quality threshold.
+    Convenience wrapper that returns True if the IQS score meets or
+    exceeds the threshold.
+    Args:
+        query: The user's query/question.
+        response: The LLM-generated response.
+        context: Grounding context - ContextPayload, str, list[str], or None.
+        threshold: Minimum IQS score to pass. Default 0.7.
+        **kwargs: Passed through to Auditor().
+    Returns:
+        True if IQS >= threshold, False otherwise.
+    """
+    result = score(query=query, response=response, context=context, **kwargs)
+    return result.iqs >= threshold

scroot/agents.py ADDED Viewed

@@ -0,0 +1,345 @@
+"""AgentRegistry: per-agent configuration and statistics tracking.
+Routes scoring calls through agent-specific configs (custom weights,
+thresholds, context requirements) while sharing a single Auditor
+instance and its loaded models. One process, one model load, per-agent
+metrics.
+"""
+from __future__ import annotations
+import threading
+import warnings
+from dataclasses import dataclass, field
+from .composite import DEFAULT_WEIGHTS, compute_iqs
+from .result import EntailmentResult
+@dataclass
+class AgentConfig:
+    """Configuration for a single agent.
+    Args:
+        name: Unique agent identifier.
+        weights: Custom IQS weights. Missing keys filled from defaults.
+        iqs_threshold: IQS below this value triggers below_threshold tracking.
+        context_required: If True, warn when score() called without context.
+        metadata: Optional free-form dict (model name, team, description).
+    """
+    name: str
+    weights: dict | None = None
+    iqs_threshold: float = 0.7
+    context_required: bool = False
+    metadata: dict = field(default_factory=dict)
+@dataclass
+class AgentStats:
+    """Accumulated scoring statistics for a single agent."""
+    count: int = 0
+    iqs_sum: float = 0.0
+    iqs_min: float = 1.0
+    iqs_max: float = 0.0
+    flagged_count: int = 0
+    below_threshold_count: int = 0
+    flag_counts: dict = field(default_factory=dict)
+    def record(self, result: EntailmentResult, threshold: float) -> None:
+        """Update stats with a new scoring result.
+        Args:
+            result: EntailmentResult from a scoring call.
+            threshold: IQS threshold for below_threshold tracking.
+        """
+        self.count += 1
+        self.iqs_sum += result.iqs
+        self.iqs_min = min(self.iqs_min, result.iqs)
+        self.iqs_max = max(self.iqs_max, result.iqs)
+        if result.flags:
+            self.flagged_count += 1
+            for f in result.flags:
+                self.flag_counts[f] = self.flag_counts.get(f, 0) + 1
+        if result.iqs < threshold:
+            self.below_threshold_count += 1
+    def to_dict(self) -> dict:
+        """Serialize stats for API or logging."""
+        count = max(self.count, 1)
+        return {
+            "count": self.count,
+            "mean_iqs": round(self.iqs_sum / count, 4),
+            "min_iqs": self.iqs_min if self.count > 0 else None,
+            "max_iqs": self.iqs_max if self.count > 0 else None,
+            "flagged_count": self.flagged_count,
+            "flag_rate": round(self.flagged_count / count, 4),
+            "below_threshold_count": self.below_threshold_count,
+            "below_threshold_rate": round(self.below_threshold_count / count, 4),
+            "flag_counts": dict(self.flag_counts),
+        }
+class AgentRegistry:
+    """Per-agent configuration and statistics routing layer.
+    Wraps an Auditor instance. Each registered agent can have custom
+    IQS weights, thresholds, and metadata. Unregistered agents use
+    default config unless strict=True.
+    The registry is duck-type compatible with Auditor: ``score()`` can be
+    called with only ``query``, ``response``, and ``context`` kwargs (the
+    ``agent`` parameter defaults to ``"_default"``), so it works as a
+    drop-in for ``sample_and_score`` and ``DatabaseConnector``.
+    Args:
+        auditor: Auditor instance (shared across all agents).
+        strict: If True, scoring an unregistered agent raises ValueError.
+        default_iqs_threshold: Threshold for unregistered / default agents.
+    """
+    def __init__(
+        self,
+        auditor,
+        strict: bool = False,
+        default_iqs_threshold: float = 0.7,
+    ):
+        self._auditor = auditor
+        self._strict = strict
+        self._default_threshold = default_iqs_threshold
+        self._configs: dict[str, AgentConfig] = {}
+        self._stats: dict[str, AgentStats] = {}
+        self._lock = threading.Lock()
+    def register(
+        self,
+        name: str,
+        weights: dict | None = None,
+        iqs_threshold: float | None = None,
+        context_required: bool = False,
+        metadata: dict | None = None,
+    ) -> None:
+        """Register an agent with optional custom configuration.
+        Args:
+            name: Unique agent identifier string.
+            weights: Custom IQS weights dict. Partial dicts OK -
+                missing keys filled from DEFAULT_WEIGHTS.
+            iqs_threshold: Custom IQS threshold. Defaults to registry default.
+            context_required: If True, warn when score() is called without context.
+            metadata: Optional dict (model, team, description, etc).
+        Raises:
+            ValueError: If an agent with this name is already registered.
+        """
+        with self._lock:
+            if name in self._configs:
+                raise ValueError(
+                    f"Agent {name!r} already registered. "
+                    "Use update() to modify or unregister() first."
+                )
+            self._configs[name] = AgentConfig(
+                name=name,
+                weights=weights,
+                iqs_threshold=iqs_threshold if iqs_threshold is not None else self._default_threshold,
+                context_required=context_required,
+                metadata=metadata or {},
+            )
+            self._stats[name] = AgentStats()
+    def update(self, name: str, **kwargs) -> None:
+        """Update a registered agent's configuration.
+        Args:
+            name: Agent identifier.
+            **kwargs: AgentConfig fields to update (weights, iqs_threshold,
+                context_required, metadata).
+        Raises:
+            ValueError: If agent is not registered or field name is invalid.
+        """
+        with self._lock:
+            if name not in self._configs:
+                raise ValueError(f"Agent {name!r} not registered.")
+            config = self._configs[name]
+            for key, value in kwargs.items():
+                if hasattr(config, key):
+                    setattr(config, key, value)
+                else:
+                    raise ValueError(f"Unknown config field: {key!r}")
+    def unregister(self, name: str) -> None:
+        """Remove an agent and its accumulated stats.
+        Args:
+            name: Agent identifier to remove.
+        Raises:
+            ValueError: If agent is not registered.
+        """
+        with self._lock:
+            if name not in self._configs:
+                raise ValueError(f"Agent {name!r} not registered.")
+            del self._configs[name]
+            self._stats.pop(name, None)
+    def list_agents(self) -> list[str]:
+        """Return names of all registered agents."""
+        with self._lock:
+            return list(self._configs.keys())
+    def get_config(self, name: str) -> AgentConfig:
+        """Get an agent's configuration.
+        Args:
+            name: Agent identifier.
+        Returns:
+            AgentConfig for the named agent.
+        Raises:
+            ValueError: If agent is not registered.
+        """
+        with self._lock:
+            if name not in self._configs:
+                raise ValueError(f"Agent {name!r} not registered.")
+            return self._configs[name]
+    def score(
+        self,
+        agent: str = "_default",
+        *,
+        query: str,
+        response: str,
+        context: list[str] | None = None,
+    ) -> EntailmentResult:
+        """Score a response using agent-specific configuration.
+        The ``agent`` parameter defaults to ``"_default"``, making this
+        method duck-type compatible with ``Auditor.score()`` so that the
+        registry can be passed to ``sample_and_score()`` or
+        ``DatabaseConnector`` directly.
+        IQS is recomputed from the raw metric scores using the agent's
+        custom weights. The auditor's own weights attribute is never
+        mutated, so concurrent calls for different agents are safe.
+        Args:
+            agent: Agent identifier. Defaults to "_default".
+            query: User query.
+            response: LLM-generated response.
+            context: Optional source context list.
+        Returns:
+            EntailmentResult with agent-specific IQS and details["agent"] set.
+        Raises:
+            ValueError: If strict=True and agent is not registered.
+        """
+        with self._lock:
+            config = self._configs.get(agent)
+        if config is None:
+            if self._strict:
+                raise ValueError(
+                    f"Agent {agent!r} not registered. "
+                    "Call registry.register() first."
+                )
+            config = AgentConfig(name=agent, iqs_threshold=self._default_threshold)
+        if config.context_required and context is None:
+            warnings.warn(
+                f"Agent {agent!r} requires context but none was provided. "
+                "Groundedness will be skipped.",
+                stacklevel=2,
+            )
+        raw = self._auditor.score(query=query, response=response, context=context)
+        # Recompute IQS with agent-specific weights (no mutation of auditor state).
+        effective_weights = dict(DEFAULT_WEIGHTS)
+        if config.weights:
+            effective_weights.update(config.weights)
+        iqs = compute_iqs(
+            raw.groundedness, raw.completeness, raw.relevance,
+            raw.consistency, raw.confidence,
+            weights=effective_weights,
+            mode=self._auditor.iqs_mode,
+        )
+        result = EntailmentResult(
+            groundedness=raw.groundedness,
+            completeness=raw.completeness,
+            relevance=raw.relevance,
+            consistency=raw.consistency,
+            confidence=raw.confidence,
+            iqs=iqs,
+            flags=list(raw.flags),
+            details={
+                **raw.details,
+                "agent": agent,
+                "agent_config": {
+                    "weights": effective_weights,
+                    "iqs_threshold": config.iqs_threshold,
+                },
+            },
+        )
+        with self._lock:
+            if agent not in self._stats:
+                self._stats[agent] = AgentStats()
+            self._stats[agent].record(result, config.iqs_threshold)
+        return result
+    def score_batch(self, items: list[dict]) -> list[EntailmentResult]:
+        """Score a batch of responses, each routed to its agent config.
+        Items without an "agent" key are scored under "_default".
+        Args:
+            items: List of dicts with "agent", "query", "response",
+                and optionally "context".
+        Returns:
+            List of EntailmentResult, one per item, in order.
+        """
+        return [
+            self.score(
+                agent=item.get("agent", "_default"),
+                query=item["query"],
+                response=item["response"],
+                context=item.get("context"),
+            )
+            for item in items
+        ]
+    def get_stats(self, agent: str | None = None) -> dict:
+        """Get accumulated scoring statistics.
+        Args:
+            agent: If provided, return stats for this agent only.
+                Returns ``{}`` if agent has no stats yet.
+                If None, return stats for all agents.
+        """
+        with self._lock:
+            if agent is not None:
+                stats = self._stats.get(agent)
+                return stats.to_dict() if stats is not None else {}
+            return {name: s.to_dict() for name, s in self._stats.items()}
+    def reset_stats(self, agent: str | None = None) -> None:
+        """Reset accumulated statistics.
+        Args:
+            agent: If provided, reset only this agent. If None, reset all.
+        """
+        with self._lock:
+            if agent is not None:
+                if agent in self._stats:
+                    self._stats[agent] = AgentStats()
+            else:
+                for name in self._stats:
+                    self._stats[name] = AgentStats()

scroot/audit.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""Structured, content-free audit logging (SOC II CC7).
+Every ContextBuilder operation that touches content emits a structured
+audit event - metadata only (entity-type counts, token counts, sources,
+checksums), never the content itself.
+Default destination is structured stderr (no file write in the OSS tier).
+Enterprise deployments can route to a JSONL file with retention-based
+rotation via :func:`configure_audit_log`.
+"""
+from __future__ import annotations
+import json
+import os
+import sys
+import threading
+from datetime import datetime, timezone, timedelta
+_lock = threading.Lock()
+_config: dict = {
+    "destination": "stderr",   # "stderr" | "file" | "disabled"
+    "path": None,
+    "retention_days": 90,
+}
+def configure_audit_log(
+    destination: str = "stderr",
+    path: str | None = None,
+    retention_days: int = 90,
+) -> None:
+    """Configure where scroot audit events are written.
+    Args:
+        destination: "stderr" (default - structured JSON lines on stderr),
+            "file" (append to a JSONL file), or "disabled".
+        path: JSONL file path, required when destination="file".
+            ``~`` is expanded. Example: ``~/.scroot/audit.jsonl``.
+        retention_days: For file destination, events older than this are
+            pruned when the log is reconfigured or reopened. Default 90.
+    Raises:
+        ValueError: If destination is unknown, or destination="file"
+            without a path.
+    """
+    if destination not in ("stderr", "file", "disabled"):
+        raise ValueError(
+            f"Unknown audit destination {destination!r}. "
+            "Use 'stderr', 'file', or 'disabled'."
+        )
+    if destination == "file" and not path:
+        raise ValueError("destination='file' requires a path.")
+    resolved = os.path.expanduser(path) if path else None
+    with _lock:
+        _config["destination"] = destination
+        _config["path"] = resolved
+        _config["retention_days"] = retention_days
+    if destination == "file":
+        _rotate(resolved, retention_days)
+def _rotate(path: str, retention_days: int) -> None:
+    """Drop events older than retention_days. Never raises."""
+    try:
+        if not path or not os.path.exists(path):
+            return
+        cutoff = (
+            datetime.now(timezone.utc) - timedelta(days=retention_days)
+        ).isoformat()
+        kept = []
+        with open(path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    event = json.loads(line)
+                    if event.get("timestamp", "") >= cutoff:
+                        kept.append(line)
+                except json.JSONDecodeError:
+                    continue
+        with _lock:
+            with open(path, "w", encoding="utf-8") as f:
+                for line in kept:
+                    f.write(line + "\n")
+    except OSError:
+        pass
+def emit(event: str, **fields) -> None:
+    """Write one audit event. Metadata only - callers must never pass content.
+    Failures are swallowed: audit logging must never crash the client's
+    pipeline.
+    Args:
+        event: Event name, e.g. "context_entry_added", "context_built".
+        **fields: JSON-serialisable metadata (counts, flags, checksums).
+    """
+    with _lock:
+        destination = _config["destination"]
+        path = _config["path"]
+    if destination == "disabled":
+        return
+    from . import __version__
+    record = {
+        "event": event,
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        **fields,
+        "scroot_version": __version__,
+    }
+    try:
+        line = json.dumps(record, ensure_ascii=False, default=str)
+    except (TypeError, ValueError):
+        return
+    try:
+        if destination == "file" and path:
+            dir_name = os.path.dirname(os.path.abspath(path)) or "."
+            os.makedirs(dir_name, exist_ok=True)
+            with _lock:
+                with open(path, "a", encoding="utf-8") as f:
+                    f.write(line + "\n")
+        else:
+            print(line, file=sys.stderr)
+    except OSError:
+        pass