scroot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. scroot/__init__.py +109 -0
  2. scroot/agents.py +345 -0
  3. scroot/audit.py +131 -0
  4. scroot/cli/__init__.py +167 -0
  5. scroot/cli/download.py +49 -0
  6. scroot/cli/eval.py +230 -0
  7. scroot/cli/model_info.py +28 -0
  8. scroot/composite.py +170 -0
  9. scroot/config/__init__.py +0 -0
  10. scroot/config/corrector.py +92 -0
  11. scroot/connectors/__init__.py +5 -0
  12. scroot/connectors/database.py +357 -0
  13. scroot/context/__init__.py +9 -0
  14. scroot/context/adapters.py +86 -0
  15. scroot/context/builder.py +514 -0
  16. scroot/context/dedup.py +99 -0
  17. scroot/context/payload.py +66 -0
  18. scroot/context/pii.py +101 -0
  19. scroot/context/tokenizer.py +42 -0
  20. scroot/core.py +349 -0
  21. scroot/corrector/__init__.py +38 -0
  22. scroot/corrector/api.py +145 -0
  23. scroot/corrector/base.py +20 -0
  24. scroot/corrector/disabled.py +13 -0
  25. scroot/corrector/local.py +112 -0
  26. scroot/corrector/models.py +69 -0
  27. scroot/dashboard/__init__.py +0 -0
  28. scroot/dashboard/__main__.py +37 -0
  29. scroot/dashboard/routers/__init__.py +0 -0
  30. scroot/dashboard/routers/analytics.py +236 -0
  31. scroot/dashboard/routers/corrector.py +230 -0
  32. scroot/dashboard/routers/export.py +150 -0
  33. scroot/dashboard/routers/guardrails.py +41 -0
  34. scroot/dashboard/routers/pipeline.py +218 -0
  35. scroot/dashboard/routers/queue.py +188 -0
  36. scroot/dashboard/routers/records.py +252 -0
  37. scroot/dashboard/routers/settings.py +291 -0
  38. scroot/dashboard/security.py +135 -0
  39. scroot/dashboard/server.py +181 -0
  40. scroot/evidence.py +228 -0
  41. scroot/exceptions.py +62 -0
  42. scroot/feedback/__init__.py +6 -0
  43. scroot/feedback/injector.py +160 -0
  44. scroot/feedback/sanitizer.py +56 -0
  45. scroot/feedback/store.py +650 -0
  46. scroot/flags.py +42 -0
  47. scroot/metrics/__init__.py +15 -0
  48. scroot/metrics/_utils.py +9 -0
  49. scroot/metrics/completeness.py +139 -0
  50. scroot/metrics/confidence.py +83 -0
  51. scroot/metrics/consistency.py +125 -0
  52. scroot/metrics/groundedness.py +193 -0
  53. scroot/metrics/relevance.py +73 -0
  54. scroot/models.py +214 -0
  55. scroot/result.py +276 -0
  56. scroot/sampling.py +306 -0
  57. scroot/text_utils.py +136 -0
  58. scroot/ui/dist/assets/index-DW1dLzDl.js +101 -0
  59. scroot/ui/dist/assets/index-WOhrVVSM.css +2 -0
  60. scroot/ui/dist/favicon.svg +27 -0
  61. scroot/ui/dist/index.html +20 -0
  62. scroot-0.2.0.dist-info/METADATA +832 -0
  63. scroot-0.2.0.dist-info/RECORD +67 -0
  64. scroot-0.2.0.dist-info/WHEEL +5 -0
  65. scroot-0.2.0.dist-info/entry_points.txt +2 -0
  66. scroot-0.2.0.dist-info/licenses/LICENSE +201 -0
  67. scroot-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,188 @@
1
+ """Queue router - /api/queue endpoints."""
2
+ from __future__ import annotations
3
+
4
+ import threading
5
+ from datetime import datetime, timezone
6
+ from typing import Literal, Optional
7
+
8
+ from fastapi import APIRouter, HTTPException, Query
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class QueueItem(BaseModel):
13
+ id: str
14
+ agent_id: str
15
+ query: str
16
+ response: str
17
+ iqs: float
18
+ flags: list[str]
19
+ status: Literal["pending", "claimed", "reviewed", "rejected", "applied"]
20
+ created_at: str
21
+ claimed_at: Optional[str] = None
22
+ iqs_metric_count: int = 5
23
+ session_id: Optional[str] = None
24
+ context_checksum: Optional[str] = None
25
+
26
+
27
+ class QueueResponse(BaseModel):
28
+ records: list[QueueItem]
29
+ total: int
30
+ page: int
31
+
32
+
33
+ class StatsResponse(BaseModel):
34
+ pending: int
35
+ reviewed_today: int
36
+ avg_iqs: float
37
+ oldest_pending_hours: float
38
+
39
+
40
+ # Atomic claim registry - single-session open-source tier
41
+ _claims: dict[str, dict] = {}
42
+ _claims_lock = threading.Lock()
43
+
44
+
45
+ def queue_router(store):
46
+ router = APIRouter()
47
+
48
+ @router.get("", response_model=QueueResponse)
49
+ def list_queue(
50
+ status: str = Query("all"),
51
+ flag: Optional[str] = Query(None),
52
+ agent: Optional[str] = Query(None),
53
+ min_iqs: Optional[float] = Query(None),
54
+ max_iqs: Optional[float] = Query(None),
55
+ threshold: float = Query(0.70),
56
+ sort: str = Query("created_desc"),
57
+ page: int = Query(1),
58
+ limit: int = Query(50),
59
+ search: Optional[str] = Query(None),
60
+ ):
61
+ records = store.get_all()
62
+
63
+ # IQS status filter (pass / warn / fail) - quality-based
64
+ if status in ("pass", "warn", "fail"):
65
+ warn_floor = threshold * 0.7
66
+ def iqs_status(r):
67
+ iqs = r.scores.get("iqs", 0) if isinstance(r.scores, dict) else 0
68
+ if iqs >= threshold:
69
+ return "pass"
70
+ if iqs >= warn_floor:
71
+ return "warn"
72
+ return "fail"
73
+ records = [r for r in records if iqs_status(r) == status]
74
+ elif status != "all":
75
+ # Workflow status filter (pending / reviewed / rejected)
76
+ records = [r for r in records if getattr(r, "status", "pending") == status]
77
+
78
+ # Text search
79
+ if search:
80
+ q = search.lower()
81
+ records = [r for r in records if q in r.query.lower()]
82
+
83
+ # Filter by flag
84
+ if flag:
85
+ records = [r for r in records if flag in (r.flags or [])]
86
+
87
+ # Filter by IQS range
88
+ if min_iqs is not None:
89
+ records = [r for r in records if r.scores.get("iqs", 0) >= min_iqs]
90
+ if max_iqs is not None:
91
+ records = [r for r in records if r.scores.get("iqs", 1) <= max_iqs]
92
+
93
+ # Sort
94
+ reverse = sort.endswith("_desc")
95
+ key_map = {
96
+ "iqs_asc": lambda r: r.scores.get("iqs", 0),
97
+ "iqs_desc": lambda r: r.scores.get("iqs", 0),
98
+ "created_asc": lambda r: r.timestamp,
99
+ "created_desc": lambda r: r.timestamp,
100
+ "newest": lambda r: r.timestamp,
101
+ "oldest": lambda r: r.timestamp,
102
+ }
103
+ reverse = sort in ("iqs_desc", "created_desc", "newest")
104
+ sort_key = key_map.get(sort, lambda r: r.timestamp)
105
+ records = sorted(records, key=sort_key, reverse=reverse)
106
+
107
+ total = len(records)
108
+ start = (page - 1) * limit
109
+ page_records = records[start: start + limit]
110
+
111
+ items = []
112
+ for r in page_records:
113
+ sc = r.scores if isinstance(r.scores, dict) else {}
114
+ iqs = sc.get("iqs", 0.0)
115
+ metric_count = sc.get(
116
+ "iqs_metric_count", 5 if sc.get("groundedness") is not None else 4
117
+ )
118
+ claim = _claims.get(r.id)
119
+ items.append(QueueItem(
120
+ id=r.id,
121
+ agent_id=r.corrected_by or "unknown",
122
+ query=r.query[:120],
123
+ response=r.response[:200],
124
+ iqs=iqs,
125
+ flags=r.flags or [],
126
+ status=getattr(r, "status", "pending"),
127
+ created_at=r.timestamp,
128
+ claimed_at=claim.get("claimed_at") if claim else None,
129
+ session_id=getattr(r, "session_id", None),
130
+ context_checksum=getattr(r, "context_checksum", None),
131
+ iqs_metric_count=metric_count,
132
+ ))
133
+
134
+ return QueueResponse(records=items, total=total, page=page)
135
+
136
+ @router.post("/claim/{record_id}")
137
+ def claim_record(record_id: str):
138
+ """Atomic claim - 409 if already claimed by another session."""
139
+ with _claims_lock:
140
+ if record_id in _claims:
141
+ raise HTTPException(
142
+ status_code=409,
143
+ detail=f"Record {record_id} is already claimed",
144
+ )
145
+ now = datetime.now(timezone.utc).isoformat()
146
+ _claims[record_id] = {"claimed_at": now}
147
+ return {"record_id": record_id, "claimed_at": now, "status": "claimed"}
148
+
149
+ @router.delete("/claim/{record_id}")
150
+ def unclaim_record(record_id: str):
151
+ """Release a claim when reviewer navigates away."""
152
+ with _claims_lock:
153
+ _claims.pop(record_id, None)
154
+ return {"record_id": record_id, "status": "released"}
155
+
156
+ @router.get("/stats", response_model=StatsResponse)
157
+ def queue_stats():
158
+ all_records = store.get_all()
159
+ pending = [r for r in all_records if getattr(r, "status", "pending") == "pending"]
160
+
161
+ today = datetime.now(timezone.utc).date().isoformat()
162
+ reviewed_today = sum(
163
+ 1 for r in all_records
164
+ if getattr(r, "status", "pending") == "reviewed"
165
+ and r.timestamp[:10] == today
166
+ )
167
+
168
+ iqs_vals = [r.scores.get("iqs", 0) for r in all_records if isinstance(r.scores, dict)]
169
+ avg_iqs = sum(iqs_vals) / len(iqs_vals) if iqs_vals else 0.0
170
+
171
+ oldest_hours = 0.0
172
+ if pending:
173
+ oldest_ts = min(r.timestamp for r in pending)
174
+ try:
175
+ dt = datetime.fromisoformat(oldest_ts.replace("Z", "+00:00"))
176
+ delta = datetime.now(timezone.utc) - dt
177
+ oldest_hours = delta.total_seconds() / 3600
178
+ except (ValueError, AttributeError):
179
+ pass
180
+
181
+ return StatsResponse(
182
+ pending=len(pending),
183
+ reviewed_today=reviewed_today,
184
+ avg_iqs=round(avg_iqs, 3),
185
+ oldest_pending_hours=round(oldest_hours, 1),
186
+ )
187
+
188
+ return router
@@ -0,0 +1,252 @@
1
+ """Records router - /api/records/:id endpoints."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Optional
5
+
6
+ from fastapi import APIRouter, HTTPException
7
+ from pydantic import BaseModel
8
+
9
+
10
+ class ReviewBody(BaseModel):
11
+ correction: str
12
+ category: str = "manual"
13
+ notes: Optional[str] = None
14
+
15
+
16
+ class RejectBody(BaseModel):
17
+ reason: str
18
+
19
+
20
+ # The 5 IQS metrics - used to whitelist `metrics` so derived to_dict() fields
21
+ # (weakest_metric, score_variance, etc.) don't leak in as fake metric scores.
22
+ _METRIC_KEYS = {"groundedness", "completeness", "relevance", "consistency", "confidence"}
23
+
24
+
25
+ def _record_to_dict(r) -> dict:
26
+ """Serialize a CorrectionRecord to the shape the frontend expects."""
27
+ scores = r.scores if isinstance(r.scores, dict) else {}
28
+ iqs = scores.get("iqs", 0.0)
29
+ metrics = {k: v for k, v in scores.items() if k in _METRIC_KEYS}
30
+ return {
31
+ "id": r.id,
32
+ "timestamp": r.timestamp,
33
+ "created_at": r.timestamp,
34
+ "query": r.query,
35
+ "response": r.response,
36
+ "context": "\n".join(r.context_used or []),
37
+ "correction": r.correction,
38
+ "rejection_reason": r.reason,
39
+ "corrected_by": r.corrected_by,
40
+ "status": getattr(r, "status", "pending"),
41
+ "iqs": iqs,
42
+ "metrics": metrics,
43
+ "flags": r.flags or [],
44
+ "corrected_response_iqs": getattr(r, "corrected_response_iqs", None),
45
+ "agent_id": r.corrected_by or None,
46
+ "model": None,
47
+ "weakest_metric": scores.get("weakest_metric"),
48
+ "score_variance": scores.get("score_variance"),
49
+ "iqs_explanation": scores.get("iqs_explanation"),
50
+ "metric_explanations": scores.get("metric_explanations") or {},
51
+ "guardrail_applied_count": getattr(r, "guardrail_applied_count", 0),
52
+ "evidence_map": scores.get("evidence_map"),
53
+ # IQS transparency: whether groundedness was scored and how many metrics
54
+ # contributed (defaults derived for older records without these keys).
55
+ "context_used": scores.get("context_used", scores.get("groundedness") is not None),
56
+ "iqs_metric_count": scores.get(
57
+ "iqs_metric_count", 5 if scores.get("groundedness") is not None else 4
58
+ ),
59
+ "effective_weights": scores.get("effective_weights"),
60
+ }
61
+
62
+
63
+ def records_router(store):
64
+ router = APIRouter()
65
+
66
+ @router.get("/{record_id}")
67
+ def get_record(record_id: str):
68
+ records = store.get_all()
69
+ match = next((r for r in records if r.id == record_id), None)
70
+ if not match:
71
+ raise HTTPException(status_code=404, detail=f"Record {record_id} not found")
72
+ return _record_to_dict(match)
73
+
74
+ @router.post("/{record_id}/review")
75
+ def submit_review(record_id: str, body: ReviewBody):
76
+ if not body.correction.strip():
77
+ raise HTTPException(status_code=422, detail="Correction cannot be empty")
78
+
79
+ ok = store.mark_reviewed(
80
+ record_id=record_id,
81
+ correction=body.correction,
82
+ corrected_by="reviewer",
83
+ status="reviewed",
84
+ )
85
+ if not ok:
86
+ raise HTTPException(status_code=404, detail=f"Record {record_id} not found")
87
+
88
+ # Release claim
89
+ from .queue import _claims, _claims_lock
90
+ with _claims_lock:
91
+ _claims.pop(record_id, None)
92
+
93
+ # Return updated record so frontend can setRecord() directly
94
+ records = store.get_all()
95
+ updated = next((r for r in records if r.id == record_id), None)
96
+ if updated:
97
+ return _record_to_dict(updated)
98
+ return {"record_id": record_id, "status": "reviewed"}
99
+
100
+ @router.post("/{record_id}/reject")
101
+ def reject_record(record_id: str, body: RejectBody):
102
+ ok = store.mark_reviewed(
103
+ record_id=record_id,
104
+ correction="",
105
+ reason=body.reason,
106
+ corrected_by="reviewer",
107
+ status="rejected",
108
+ )
109
+ if not ok:
110
+ raise HTTPException(status_code=404, detail=f"Record {record_id} not found")
111
+
112
+ from .queue import _claims, _claims_lock
113
+ with _claims_lock:
114
+ _claims.pop(record_id, None)
115
+
116
+ records = store.get_all()
117
+ updated = next((r for r in records if r.id == record_id), None)
118
+ if updated:
119
+ return _record_to_dict(updated)
120
+ return {"record_id": record_id, "status": "rejected"}
121
+
122
+ @router.delete("/{record_id}/correction")
123
+ def delete_correction(record_id: str):
124
+ """Reset a record to pending - undoes a correction or rejection."""
125
+ ok = store.mark_reviewed(
126
+ record_id=record_id,
127
+ correction="",
128
+ reason="",
129
+ corrected_by=None,
130
+ status="pending",
131
+ )
132
+ if not ok:
133
+ raise HTTPException(status_code=404, detail=f"Record {record_id} not found")
134
+
135
+ records = store.get_all()
136
+ updated = next((r for r in records if r.id == record_id), None)
137
+ if updated:
138
+ return _record_to_dict(updated)
139
+ return {"record_id": record_id, "status": "pending"}
140
+
141
+ @router.post("/{record_id}/generate-correction")
142
+ async def generate_correction(record_id: str):
143
+ """
144
+ Call the configured LLM and return a draft correction as JSON.
145
+ NEVER auto-populates the frontend - user must click Generate.
146
+ """
147
+ records = store.get_all()
148
+ match = next((r for r in records if r.id == record_id), None)
149
+ if not match:
150
+ raise HTTPException(status_code=404, detail="Record not found")
151
+
152
+ settings = _load_settings()
153
+ provider = settings.get("provider", "none")
154
+
155
+ if provider == "none":
156
+ raise HTTPException(status_code=400, detail="No LLM corrector configured. Set one in Settings.")
157
+
158
+ try:
159
+ draft = _call_llm(match, settings)
160
+ return {"draft": draft}
161
+ except Exception as e:
162
+ raise HTTPException(status_code=502, detail=str(e))
163
+
164
+ return router
165
+
166
+
167
+ def _load_settings() -> dict:
168
+ """Load persisted LLM judge settings."""
169
+ import json
170
+ import os
171
+ settings_path = os.path.join(os.getcwd(), ".scroot_settings.json")
172
+ if os.path.exists(settings_path):
173
+ with open(settings_path) as f:
174
+ return json.load(f)
175
+ return {"provider": "none"}
176
+
177
+
178
+ def _detect_provider(settings: dict) -> str:
179
+ """Infer the actual API provider from base_url and model name."""
180
+ base_url = (settings.get("base_url") or "").lower()
181
+ model = (settings.get("model") or "").lower()
182
+ if "localhost:11434" in base_url or "ollama" in base_url:
183
+ return "ollama"
184
+ if "anthropic" in base_url or model.startswith("claude"):
185
+ return "anthropic"
186
+ if "groq" in base_url:
187
+ return "groq"
188
+ if "openrouter" in base_url:
189
+ return "openrouter"
190
+ return "openai"
191
+
192
+
193
+ def _call_llm(record, settings: dict) -> str:
194
+ """Call the configured LLM provider and return a correction draft."""
195
+ provider = settings.get("provider", "none")
196
+ if provider == "llm":
197
+ provider = _detect_provider(settings)
198
+
199
+ model = settings.get("model", "")
200
+ api_key = settings.get("api_key", "")
201
+ base_url = settings.get("base_url", "") or None
202
+
203
+ # Fall back to env var if direct key not stored
204
+ api_key_env = settings.get("api_key_env_var", "")
205
+ if not api_key and api_key_env:
206
+ import os
207
+ api_key = os.environ.get(api_key_env, "")
208
+
209
+ # M-2: refuse to send the API key to an unvetted/internal endpoint.
210
+ from scroot.dashboard.security import validate_base_url
211
+ validate_base_url(base_url)
212
+
213
+ context_text = "\n".join(record.context_used or [])
214
+ prompt = (
215
+ f"Query: {record.query}\n"
216
+ f"Context: {context_text}\n"
217
+ f"Problematic response: {record.response}\n"
218
+ f"Flags: {', '.join(record.flags or [])}\n\n"
219
+ f"Write a corrected, grounded response:"
220
+ )
221
+
222
+ if provider == "anthropic":
223
+ import anthropic
224
+ client = anthropic.Anthropic(api_key=api_key, base_url=base_url)
225
+ msg = client.messages.create(
226
+ model=model or "claude-haiku-4-5-20251001",
227
+ max_tokens=512,
228
+ messages=[{"role": "user", "content": prompt}],
229
+ )
230
+ return msg.content[0].text
231
+
232
+ elif provider in ("openai", "groq", "openrouter"):
233
+ import openai
234
+ client = openai.OpenAI(api_key=api_key, base_url=base_url)
235
+ resp = client.chat.completions.create(
236
+ model=model or "gpt-4o-mini",
237
+ messages=[{"role": "user", "content": prompt}],
238
+ max_tokens=512,
239
+ )
240
+ return resp.choices[0].message.content
241
+
242
+ elif provider == "ollama":
243
+ import requests
244
+ url = (base_url or "http://localhost:11434") + "/api/generate"
245
+ resp = requests.post(
246
+ url,
247
+ json={"model": model or "llama3.2", "prompt": prompt, "stream": False},
248
+ timeout=60,
249
+ )
250
+ return resp.json().get("response", "")
251
+
252
+ return "No LLM provider configured."