scroot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. scroot/__init__.py +109 -0
  2. scroot/agents.py +345 -0
  3. scroot/audit.py +131 -0
  4. scroot/cli/__init__.py +167 -0
  5. scroot/cli/download.py +49 -0
  6. scroot/cli/eval.py +230 -0
  7. scroot/cli/model_info.py +28 -0
  8. scroot/composite.py +170 -0
  9. scroot/config/__init__.py +0 -0
  10. scroot/config/corrector.py +92 -0
  11. scroot/connectors/__init__.py +5 -0
  12. scroot/connectors/database.py +357 -0
  13. scroot/context/__init__.py +9 -0
  14. scroot/context/adapters.py +86 -0
  15. scroot/context/builder.py +514 -0
  16. scroot/context/dedup.py +99 -0
  17. scroot/context/payload.py +66 -0
  18. scroot/context/pii.py +101 -0
  19. scroot/context/tokenizer.py +42 -0
  20. scroot/core.py +349 -0
  21. scroot/corrector/__init__.py +38 -0
  22. scroot/corrector/api.py +145 -0
  23. scroot/corrector/base.py +20 -0
  24. scroot/corrector/disabled.py +13 -0
  25. scroot/corrector/local.py +112 -0
  26. scroot/corrector/models.py +69 -0
  27. scroot/dashboard/__init__.py +0 -0
  28. scroot/dashboard/__main__.py +37 -0
  29. scroot/dashboard/routers/__init__.py +0 -0
  30. scroot/dashboard/routers/analytics.py +236 -0
  31. scroot/dashboard/routers/corrector.py +230 -0
  32. scroot/dashboard/routers/export.py +150 -0
  33. scroot/dashboard/routers/guardrails.py +41 -0
  34. scroot/dashboard/routers/pipeline.py +218 -0
  35. scroot/dashboard/routers/queue.py +188 -0
  36. scroot/dashboard/routers/records.py +252 -0
  37. scroot/dashboard/routers/settings.py +291 -0
  38. scroot/dashboard/security.py +135 -0
  39. scroot/dashboard/server.py +181 -0
  40. scroot/evidence.py +228 -0
  41. scroot/exceptions.py +62 -0
  42. scroot/feedback/__init__.py +6 -0
  43. scroot/feedback/injector.py +160 -0
  44. scroot/feedback/sanitizer.py +56 -0
  45. scroot/feedback/store.py +650 -0
  46. scroot/flags.py +42 -0
  47. scroot/metrics/__init__.py +15 -0
  48. scroot/metrics/_utils.py +9 -0
  49. scroot/metrics/completeness.py +139 -0
  50. scroot/metrics/confidence.py +83 -0
  51. scroot/metrics/consistency.py +125 -0
  52. scroot/metrics/groundedness.py +193 -0
  53. scroot/metrics/relevance.py +73 -0
  54. scroot/models.py +214 -0
  55. scroot/result.py +276 -0
  56. scroot/sampling.py +306 -0
  57. scroot/text_utils.py +136 -0
  58. scroot/ui/dist/assets/index-DW1dLzDl.js +101 -0
  59. scroot/ui/dist/assets/index-WOhrVVSM.css +2 -0
  60. scroot/ui/dist/favicon.svg +27 -0
  61. scroot/ui/dist/index.html +20 -0
  62. scroot-0.2.0.dist-info/METADATA +832 -0
  63. scroot-0.2.0.dist-info/RECORD +67 -0
  64. scroot-0.2.0.dist-info/WHEEL +5 -0
  65. scroot-0.2.0.dist-info/entry_points.txt +2 -0
  66. scroot-0.2.0.dist-info/licenses/LICENSE +201 -0
  67. scroot-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,20 @@
1
+ """Base corrector ABC."""
2
+ from __future__ import annotations
3
+
4
+ from abc import ABC, abstractmethod
5
+
6
+
7
+ class BaseCorrector(ABC):
8
+ @abstractmethod
9
+ def draft_correction(
10
+ self,
11
+ query: str,
12
+ response: str,
13
+ context: str | None,
14
+ ) -> str | None:
15
+ """Return a correction draft, or None if disabled."""
16
+
17
+ @property
18
+ def is_available(self) -> bool:
19
+ """True if this corrector can generate drafts right now."""
20
+ return True
@@ -0,0 +1,13 @@
1
+ """NullCorrector - no LLM call, returns None."""
2
+ from __future__ import annotations
3
+
4
+ from scroot.corrector.base import BaseCorrector
5
+
6
+
7
+ class NullCorrector(BaseCorrector):
8
+ @property
9
+ def is_available(self) -> bool:
10
+ return False
11
+
12
+ def draft_correction(self, query: str, response: str, context: str | None) -> None:
13
+ return None
@@ -0,0 +1,112 @@
1
+ """LocalLLMCorrector - llama-cpp-python inference, thread-safe, lazy-loaded."""
2
+ from __future__ import annotations
3
+
4
+ import threading
5
+
6
+ from scroot.corrector.base import BaseCorrector
7
+ from scroot.corrector.models import MODEL_REGISTRY, get_model_path, is_model_downloaded
8
+
9
+
10
+ class LocalLLMCorrector(BaseCorrector):
11
+ """
12
+ Wraps llama-cpp-python for in-process CPU (or GPU) inference.
13
+ Thread-safe via lock. Lazy-loaded: model is not loaded until first call.
14
+ """
15
+
16
+ _lock = threading.Lock()
17
+
18
+ def __init__(self, config) -> None:
19
+ self._config = config
20
+ self._llm = None
21
+
22
+ def _ensure_loaded(self) -> None:
23
+ if self._llm is not None:
24
+ return
25
+
26
+ try:
27
+ from llama_cpp import Llama
28
+ except ImportError:
29
+ raise RuntimeError(
30
+ "llama-cpp-python is not installed. "
31
+ "Run: pip install 'scroot[local]'"
32
+ )
33
+
34
+ model_id = self._config.model_id
35
+ if not is_model_downloaded(model_id):
36
+ spec = MODEL_REGISTRY[model_id]
37
+ raise RuntimeError(
38
+ f"Model '{spec.name}' is not downloaded. "
39
+ f"Run: scroot download-model --model {model_id}"
40
+ )
41
+
42
+ model_path = get_model_path(model_id)
43
+ import os
44
+ n_threads = self._config.n_threads
45
+ if n_threads == -1:
46
+ n_threads = os.cpu_count() or 4
47
+
48
+ self._llm = Llama(
49
+ model_path=str(model_path),
50
+ n_ctx=self._config.context_window,
51
+ n_threads=n_threads,
52
+ n_gpu_layers=self._config.n_gpu_layers,
53
+ verbose=False,
54
+ )
55
+
56
+ @property
57
+ def is_available(self) -> bool:
58
+ try:
59
+ import llama_cpp # noqa: F401
60
+ return is_model_downloaded(self._config.model_id)
61
+ except ImportError:
62
+ return False
63
+
64
+ def draft_correction(
65
+ self,
66
+ query: str,
67
+ response: str,
68
+ context: str | None,
69
+ ) -> str:
70
+ with self._lock:
71
+ self._ensure_loaded()
72
+ result = self._llm.create_chat_completion(
73
+ messages=[
74
+ {
75
+ "role": "system",
76
+ "content": (
77
+ "You are a correction assistant. "
78
+ "Rewrite the LLM response to be more accurate, "
79
+ "complete, and grounded in the provided context. "
80
+ "Return only the corrected response text. "
81
+ "Do not explain your changes. Do not add preamble."
82
+ ),
83
+ },
84
+ {"role": "user", "content": self._build_prompt(query, response, context)},
85
+ ],
86
+ max_tokens=512,
87
+ temperature=0.3,
88
+ top_p=0.9,
89
+ repeat_penalty=1.1,
90
+ )
91
+ return result["choices"][0]["message"]["content"].strip()
92
+
93
+ def _build_prompt(self, query: str, response: str, context: str | None) -> str:
94
+ parts = [f"Query:\n{query}", f"\nOriginal response:\n{response}"]
95
+ if context:
96
+ parts.append(f"\nContext / grounding documents:\n{context}")
97
+ parts.append("\nRewrite the response to be more accurate and complete.")
98
+ return "\n".join(parts)
99
+
100
+ def unload(self) -> None:
101
+ """Free the model from RAM. Call before deleting the GGUF file."""
102
+ with self._lock:
103
+ self._llm = None
104
+
105
+ @property
106
+ def model_spec(self):
107
+ return MODEL_REGISTRY[self._config.model_id]
108
+
109
+ def tok_per_sec(self) -> float | None:
110
+ """Rough throughput estimate based on model family."""
111
+ model_id = self._config.model_id
112
+ return {"phi4-mini": 16.0, "smollm3": 22.0}.get(model_id)
@@ -0,0 +1,69 @@
1
+ """Model registry - GGUF model specs and local storage helpers."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+
9
+ @dataclass
10
+ class ModelSpec:
11
+ id: str
12
+ name: str
13
+ hf_repo: str
14
+ hf_filename: str
15
+ size_gb: float
16
+ min_ram_gb: int
17
+ rec_ram_gb: int
18
+ context_window: int
19
+ license: str
20
+ description: str
21
+
22
+
23
+ MODEL_REGISTRY: dict[str, ModelSpec] = {
24
+ "phi4-mini": ModelSpec(
25
+ id="phi4-mini",
26
+ name="Qwen2.5-3B-Instruct",
27
+ hf_repo="Qwen/Qwen2.5-3B-Instruct-GGUF",
28
+ hf_filename="qwen2.5-3b-instruct-q4_k_m.gguf",
29
+ size_gb=2.0,
30
+ min_ram_gb=4,
31
+ rec_ram_gb=6,
32
+ context_window=32_768,
33
+ license="Apache 2.0",
34
+ description="Alibaba's efficient 3B instruction model. Strong "
35
+ "reasoning and instruction following. Default choice.",
36
+ ),
37
+ "smollm3": ModelSpec(
38
+ id="smollm3",
39
+ name="Qwen2.5-1.5B-Instruct",
40
+ hf_repo="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
41
+ hf_filename="qwen2.5-1.5b-instruct-q4_k_m.gguf",
42
+ size_gb=1.0,
43
+ min_ram_gb=2,
44
+ rec_ram_gb=4,
45
+ context_window=32_768,
46
+ license="Apache 2.0",
47
+ description="Compact 1.5B model. Fastest on CPU, lower RAM "
48
+ "requirement. Good for resource-constrained machines.",
49
+ ),
50
+ }
51
+
52
+ DEFAULT_MODEL_ID = "phi4-mini"
53
+
54
+
55
+ def get_model_dir() -> Path:
56
+ """Respects SCROOT_MODELS_DIR env override for custom storage."""
57
+ custom = os.environ.get("SCROOT_MODELS_DIR")
58
+ if custom:
59
+ return Path(custom)
60
+ return Path.home() / ".scroot" / "models"
61
+
62
+
63
+ def get_model_path(model_id: str) -> Path:
64
+ spec = MODEL_REGISTRY[model_id]
65
+ return get_model_dir() / model_id / spec.hf_filename
66
+
67
+
68
+ def is_model_downloaded(model_id: str) -> bool:
69
+ return get_model_path(model_id).exists()
File without changes
@@ -0,0 +1,37 @@
1
+ """python -m scroot.dashboard - launch the review console."""
2
+ import sys
3
+
4
+
5
+ def main():
6
+ import argparse
7
+ parser = argparse.ArgumentParser(
8
+ description="Scroot Review Console",
9
+ formatter_class=argparse.RawDescriptionHelpFormatter,
10
+ epilog="Open http://localhost:7432 in your browser after starting.",
11
+ )
12
+ parser.add_argument("--port", type=int, default=7432)
13
+ parser.add_argument("--store", default="./scroot_store.jsonl",
14
+ help="Path to JSONL feedback store")
15
+ parser.add_argument("--host", default="127.0.0.1")
16
+ parser.add_argument("--hosted", action="store_true",
17
+ help=argparse.SUPPRESS) # Enterprise only
18
+ args = parser.parse_args()
19
+
20
+ try:
21
+ import uvicorn
22
+ except ImportError:
23
+ print("ERROR: uvicorn is required. Install with: pip install 'scroot[dashboard]'")
24
+ sys.exit(1)
25
+
26
+ from .server import create_app
27
+ app = create_app(store_path=args.store, hosted=args.hosted)
28
+
29
+ print("\n ◆ SCROOT Review Console")
30
+ print(f" Store: {args.store}")
31
+ print(f" URL: http://{args.host}:{args.port}\n")
32
+
33
+ uvicorn.run(app, host=args.host, port=args.port, log_level="warning")
34
+
35
+
36
+ if __name__ == "__main__":
37
+ main()
File without changes
@@ -0,0 +1,236 @@
1
+ """Analytics router - /api/analytics endpoints."""
2
+ from __future__ import annotations
3
+
4
+ from collections import Counter, defaultdict
5
+ from datetime import datetime, timedelta, timezone
6
+ from typing import Optional
7
+
8
+ from fastapi import APIRouter, Query
9
+
10
+
11
+ def analytics_router(store):
12
+ router = APIRouter()
13
+
14
+ @router.get("")
15
+ def summary(time_range: str = Query("30d", alias="range")):
16
+ """Unified analytics endpoint - returns all charts in one call."""
17
+ records = store.get_all()
18
+ hours = {"24h": 24, "7d": 168, "30d": 720}.get(time_range, 720)
19
+ cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
20
+
21
+ total = len(records)
22
+ iqs_vals = [r.scores.get("iqs", 0) for r in records if isinstance(r.scores, dict)]
23
+ avg_iqs = round(sum(iqs_vals) / len(iqs_vals), 3) if iqs_vals else 0.0
24
+
25
+ pending_review = sum(1 for r in records if getattr(r, "status", "pending") == "pending")
26
+
27
+ # IQS trend - daily buckets
28
+ from collections import defaultdict
29
+ daily: dict[str, list[float]] = defaultdict(list)
30
+ for r in records:
31
+ try:
32
+ dt = datetime.fromisoformat(r.timestamp.replace("Z", "+00:00"))
33
+ except (ValueError, AttributeError):
34
+ continue
35
+ if dt >= cutoff:
36
+ key = dt.strftime("%Y-%m-%d")
37
+ daily[key].append(r.scores.get("iqs", 0) if isinstance(r.scores, dict) else 0)
38
+ iqs_trend = [
39
+ {"date": d, "avg_iqs": round(sum(v) / len(v), 3)}
40
+ for d, v in sorted(daily.items()) if v
41
+ ]
42
+
43
+ # Flag frequency - object keyed by metric name
44
+ flag_counter: Counter = Counter()
45
+ for r in records:
46
+ for f in (r.flags or []):
47
+ flag_counter[f] += 1
48
+ flag_metrics = ["groundedness", "completeness", "relevance", "consistency", "confidence"]
49
+ flag_frequency = {m: flag_counter.get(m, flag_counter.get(f"low_{m}", 0)) for m in flag_metrics}
50
+
51
+ # IQS distribution - 5 buckets
52
+ buckets = ["0.0–0.2", "0.2–0.4", "0.4–0.6", "0.6–0.8", "0.8–1.0"]
53
+ dist = Counter()
54
+ for v in iqs_vals:
55
+ idx = min(4, int(v * 5))
56
+ dist[idx] += 1
57
+ iqs_distribution = [{"bucket": buckets[i], "count": dist.get(i, 0)} for i in range(5)]
58
+
59
+ # Per-agent breakdown
60
+ agent_map: dict[str, list[float]] = defaultdict(list)
61
+ for r in records:
62
+ aid = r.corrected_by or "unknown"
63
+ iqs = r.scores.get("iqs", 0) if isinstance(r.scores, dict) else 0
64
+ agent_map[aid].append(iqs)
65
+ per_agent = sorted(
66
+ [
67
+ {"agent_id": aid, "avg_iqs": round(sum(v) / len(v), 3), "count": len(v)}
68
+ for aid, v in agent_map.items()
69
+ ],
70
+ key=lambda x: x["avg_iqs"],
71
+ )
72
+
73
+ # Avg IQS today
74
+ today = datetime.now(timezone.utc).date().isoformat()
75
+ today_vals = [
76
+ r.scores.get("iqs", 0) for r in records
77
+ if isinstance(r.scores, dict) and r.timestamp[:10] == today
78
+ ]
79
+ avg_iqs_today = round(sum(today_vals) / len(today_vals), 3) if today_vals else avg_iqs
80
+
81
+ threshold = 0.70
82
+ pass_count = sum(1 for v in iqs_vals if v >= threshold)
83
+ warn_count = sum(1 for v in iqs_vals if threshold * 0.7 <= v < threshold)
84
+ fail_count = sum(1 for v in iqs_vals if v < threshold * 0.7)
85
+
86
+ return {
87
+ "total_scored": total,
88
+ "avg_iqs": avg_iqs,
89
+ "avg_iqs_today": avg_iqs_today,
90
+ "avg_iqs_delta": 0.0,
91
+ "pending_review": pending_review,
92
+ "pass_count": pass_count,
93
+ "warn_count": warn_count,
94
+ "fail_count": fail_count,
95
+ "iqs_trend": iqs_trend,
96
+ "flag_frequency": flag_frequency,
97
+ "iqs_distribution": iqs_distribution,
98
+ "per_agent": per_agent,
99
+ }
100
+
101
+ @router.get("/iqs-trend")
102
+ def iqs_trend(time_range: str = Query("7d", alias="range"), agent: Optional[str] = Query(None)):
103
+ records = store.get_all()
104
+ if not records:
105
+ return {"points": []}
106
+
107
+ # Determine bucket size
108
+ hours = {"24h": 24, "7d": 168, "30d": 720}.get(time_range, 168)
109
+ now = datetime.now(timezone.utc)
110
+ start = now - timedelta(hours=hours)
111
+
112
+ # Bucket records by hour
113
+ hour_buckets: dict[str, list[float]] = defaultdict(list)
114
+ for r in records:
115
+ try:
116
+ dt = datetime.fromisoformat(r.timestamp.replace("Z", "+00:00"))
117
+ except (ValueError, AttributeError):
118
+ continue
119
+ if dt < start:
120
+ continue
121
+ if time_range in ("24h", "7d"):
122
+ key = dt.strftime("%Y-%m-%dT%H:00:00Z")
123
+ else:
124
+ key = dt.strftime("%Y-%m-%dT00:00:00Z")
125
+ iqs = r.scores.get("iqs", 0) if isinstance(r.scores, dict) else 0
126
+ hour_buckets[key].append(iqs)
127
+
128
+ points = []
129
+ for ts in sorted(hour_buckets):
130
+ vals = hour_buckets[ts]
131
+ if not vals:
132
+ continue
133
+ vals_sorted = sorted(vals)
134
+ n = len(vals_sorted)
135
+ p10 = vals_sorted[max(0, int(n * 0.1) - 1)]
136
+ p90 = vals_sorted[min(n - 1, int(n * 0.9))]
137
+ points.append({
138
+ "timestamp": ts,
139
+ "mean_iqs": round(sum(vals) / len(vals), 4),
140
+ "p10": round(p10, 4),
141
+ "p90": round(p90, 4),
142
+ "flagged_count": sum(1 for r in records
143
+ if r.timestamp[:len(ts)] >= ts[:10]
144
+ and r.flags),
145
+ })
146
+
147
+ return {"points": points}
148
+
149
+ @router.get("/flag-distribution")
150
+ def flag_distribution(time_range: str = Query("7d", alias="range")):
151
+ records = store.get_all()
152
+ hours = {"24h": 24, "7d": 168, "30d": 720}.get(time_range, 168)
153
+ cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
154
+
155
+ flag_counter: Counter = Counter()
156
+ total = 0
157
+ for r in records:
158
+ try:
159
+ dt = datetime.fromisoformat(r.timestamp.replace("Z", "+00:00"))
160
+ except (ValueError, AttributeError):
161
+ continue
162
+ if dt >= cutoff:
163
+ total += 1
164
+ for f in (r.flags or []):
165
+ flag_counter[f] += 1
166
+
167
+ if total == 0:
168
+ return {"flags": []}
169
+
170
+ flag_types = ["hallucination_risk", "incomplete", "off_topic", "self_contradictory", "ungrounded"]
171
+ return {
172
+ "flags": [
173
+ {
174
+ "type": ft,
175
+ "count": flag_counter.get(ft, 0),
176
+ "pct": round(flag_counter.get(ft, 0) / total * 100, 1) if total else 0,
177
+ "trend_pct": 0.0, # TODO: compare to previous period
178
+ }
179
+ for ft in flag_types
180
+ ]
181
+ }
182
+
183
+ @router.get("/before-after")
184
+ def before_after(correction_id: Optional[str] = Query(None)):
185
+ records = store.get_all()
186
+ reviewed = [r for r in records if getattr(r, "status", "pending") in ("reviewed", "applied")]
187
+
188
+ def histogram(vals, bins=10):
189
+ if not vals:
190
+ return []
191
+ step = 1.0 / bins
192
+ counts = [0] * bins
193
+ for v in vals:
194
+ idx = min(int(v * bins), bins - 1)
195
+ counts[idx] += 1
196
+ return [{"x": round(i * step, 1), "y": c} for i, c in enumerate(counts)]
197
+
198
+ before_iqs = [r.scores.get("iqs", 0) for r in records if isinstance(r.scores, dict)]
199
+ after_iqs = [r.corrected_response_iqs for r in reviewed
200
+ if getattr(r, "corrected_response_iqs", None) is not None]
201
+
202
+ before_mean = sum(before_iqs) / len(before_iqs) if before_iqs else 0
203
+ after_mean = sum(after_iqs) / len(after_iqs) if after_iqs else 0
204
+
205
+ return {
206
+ "before": {"histogram": histogram(before_iqs), "mean": round(before_mean, 3)},
207
+ "after": {"histogram": histogram(after_iqs), "mean": round(after_mean, 3)},
208
+ "delta": round(after_mean - before_mean, 3),
209
+ }
210
+
211
+ @router.get("/reviewer-throughput")
212
+ def reviewer_throughput():
213
+ records = store.get_all()
214
+ reviewed = [r for r in records if getattr(r, "status", "pending") in ("reviewed", "applied", "rejected")]
215
+
216
+ # Count by day for last 7 days
217
+ today = datetime.now(timezone.utc).date()
218
+ by_day = []
219
+ for i in range(6, -1, -1):
220
+ day = (today - timedelta(days=i)).isoformat()
221
+ count = sum(1 for r in reviewed if r.timestamp[:10] == day)
222
+ by_day.append(count)
223
+
224
+ today_count = by_day[-1]
225
+ week_total = sum(by_day)
226
+ avg_per_day = round(week_total / 7, 1)
227
+
228
+ return {
229
+ "reviews_today": today_count,
230
+ "avg_time_per_review_s": 0.0, # requires session tracking
231
+ "reviews_this_week": by_day,
232
+ "week_total": week_total,
233
+ "avg_per_day": avg_per_day,
234
+ }
235
+
236
+ return router