scroot 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scroot/__init__.py +109 -0
- scroot/agents.py +345 -0
- scroot/audit.py +131 -0
- scroot/cli/__init__.py +167 -0
- scroot/cli/download.py +49 -0
- scroot/cli/eval.py +230 -0
- scroot/cli/model_info.py +28 -0
- scroot/composite.py +170 -0
- scroot/config/__init__.py +0 -0
- scroot/config/corrector.py +92 -0
- scroot/connectors/__init__.py +5 -0
- scroot/connectors/database.py +357 -0
- scroot/context/__init__.py +9 -0
- scroot/context/adapters.py +86 -0
- scroot/context/builder.py +514 -0
- scroot/context/dedup.py +99 -0
- scroot/context/payload.py +66 -0
- scroot/context/pii.py +101 -0
- scroot/context/tokenizer.py +42 -0
- scroot/core.py +349 -0
- scroot/corrector/__init__.py +38 -0
- scroot/corrector/api.py +145 -0
- scroot/corrector/base.py +20 -0
- scroot/corrector/disabled.py +13 -0
- scroot/corrector/local.py +112 -0
- scroot/corrector/models.py +69 -0
- scroot/dashboard/__init__.py +0 -0
- scroot/dashboard/__main__.py +37 -0
- scroot/dashboard/routers/__init__.py +0 -0
- scroot/dashboard/routers/analytics.py +236 -0
- scroot/dashboard/routers/corrector.py +230 -0
- scroot/dashboard/routers/export.py +150 -0
- scroot/dashboard/routers/guardrails.py +41 -0
- scroot/dashboard/routers/pipeline.py +218 -0
- scroot/dashboard/routers/queue.py +188 -0
- scroot/dashboard/routers/records.py +252 -0
- scroot/dashboard/routers/settings.py +291 -0
- scroot/dashboard/security.py +135 -0
- scroot/dashboard/server.py +181 -0
- scroot/evidence.py +228 -0
- scroot/exceptions.py +62 -0
- scroot/feedback/__init__.py +6 -0
- scroot/feedback/injector.py +160 -0
- scroot/feedback/sanitizer.py +56 -0
- scroot/feedback/store.py +650 -0
- scroot/flags.py +42 -0
- scroot/metrics/__init__.py +15 -0
- scroot/metrics/_utils.py +9 -0
- scroot/metrics/completeness.py +139 -0
- scroot/metrics/confidence.py +83 -0
- scroot/metrics/consistency.py +125 -0
- scroot/metrics/groundedness.py +193 -0
- scroot/metrics/relevance.py +73 -0
- scroot/models.py +214 -0
- scroot/result.py +276 -0
- scroot/sampling.py +306 -0
- scroot/text_utils.py +136 -0
- scroot/ui/dist/assets/index-DW1dLzDl.js +101 -0
- scroot/ui/dist/assets/index-WOhrVVSM.css +2 -0
- scroot/ui/dist/favicon.svg +27 -0
- scroot/ui/dist/index.html +20 -0
- scroot-0.2.0.dist-info/METADATA +832 -0
- scroot-0.2.0.dist-info/RECORD +67 -0
- scroot-0.2.0.dist-info/WHEEL +5 -0
- scroot-0.2.0.dist-info/entry_points.txt +2 -0
- scroot-0.2.0.dist-info/licenses/LICENSE +201 -0
- scroot-0.2.0.dist-info/top_level.txt +1 -0
scroot/corrector/base.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Base corrector ABC."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseCorrector(ABC):
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def draft_correction(
|
|
10
|
+
self,
|
|
11
|
+
query: str,
|
|
12
|
+
response: str,
|
|
13
|
+
context: str | None,
|
|
14
|
+
) -> str | None:
|
|
15
|
+
"""Return a correction draft, or None if disabled."""
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def is_available(self) -> bool:
|
|
19
|
+
"""True if this corrector can generate drafts right now."""
|
|
20
|
+
return True
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""NullCorrector - no LLM call, returns None."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from scroot.corrector.base import BaseCorrector
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class NullCorrector(BaseCorrector):
|
|
8
|
+
@property
|
|
9
|
+
def is_available(self) -> bool:
|
|
10
|
+
return False
|
|
11
|
+
|
|
12
|
+
def draft_correction(self, query: str, response: str, context: str | None) -> None:
|
|
13
|
+
return None
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""LocalLLMCorrector - llama-cpp-python inference, thread-safe, lazy-loaded."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import threading
|
|
5
|
+
|
|
6
|
+
from scroot.corrector.base import BaseCorrector
|
|
7
|
+
from scroot.corrector.models import MODEL_REGISTRY, get_model_path, is_model_downloaded
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LocalLLMCorrector(BaseCorrector):
|
|
11
|
+
"""
|
|
12
|
+
Wraps llama-cpp-python for in-process CPU (or GPU) inference.
|
|
13
|
+
Thread-safe via lock. Lazy-loaded: model is not loaded until first call.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
_lock = threading.Lock()
|
|
17
|
+
|
|
18
|
+
def __init__(self, config) -> None:
|
|
19
|
+
self._config = config
|
|
20
|
+
self._llm = None
|
|
21
|
+
|
|
22
|
+
def _ensure_loaded(self) -> None:
|
|
23
|
+
if self._llm is not None:
|
|
24
|
+
return
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from llama_cpp import Llama
|
|
28
|
+
except ImportError:
|
|
29
|
+
raise RuntimeError(
|
|
30
|
+
"llama-cpp-python is not installed. "
|
|
31
|
+
"Run: pip install 'scroot[local]'"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
model_id = self._config.model_id
|
|
35
|
+
if not is_model_downloaded(model_id):
|
|
36
|
+
spec = MODEL_REGISTRY[model_id]
|
|
37
|
+
raise RuntimeError(
|
|
38
|
+
f"Model '{spec.name}' is not downloaded. "
|
|
39
|
+
f"Run: scroot download-model --model {model_id}"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
model_path = get_model_path(model_id)
|
|
43
|
+
import os
|
|
44
|
+
n_threads = self._config.n_threads
|
|
45
|
+
if n_threads == -1:
|
|
46
|
+
n_threads = os.cpu_count() or 4
|
|
47
|
+
|
|
48
|
+
self._llm = Llama(
|
|
49
|
+
model_path=str(model_path),
|
|
50
|
+
n_ctx=self._config.context_window,
|
|
51
|
+
n_threads=n_threads,
|
|
52
|
+
n_gpu_layers=self._config.n_gpu_layers,
|
|
53
|
+
verbose=False,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def is_available(self) -> bool:
|
|
58
|
+
try:
|
|
59
|
+
import llama_cpp # noqa: F401
|
|
60
|
+
return is_model_downloaded(self._config.model_id)
|
|
61
|
+
except ImportError:
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
def draft_correction(
|
|
65
|
+
self,
|
|
66
|
+
query: str,
|
|
67
|
+
response: str,
|
|
68
|
+
context: str | None,
|
|
69
|
+
) -> str:
|
|
70
|
+
with self._lock:
|
|
71
|
+
self._ensure_loaded()
|
|
72
|
+
result = self._llm.create_chat_completion(
|
|
73
|
+
messages=[
|
|
74
|
+
{
|
|
75
|
+
"role": "system",
|
|
76
|
+
"content": (
|
|
77
|
+
"You are a correction assistant. "
|
|
78
|
+
"Rewrite the LLM response to be more accurate, "
|
|
79
|
+
"complete, and grounded in the provided context. "
|
|
80
|
+
"Return only the corrected response text. "
|
|
81
|
+
"Do not explain your changes. Do not add preamble."
|
|
82
|
+
),
|
|
83
|
+
},
|
|
84
|
+
{"role": "user", "content": self._build_prompt(query, response, context)},
|
|
85
|
+
],
|
|
86
|
+
max_tokens=512,
|
|
87
|
+
temperature=0.3,
|
|
88
|
+
top_p=0.9,
|
|
89
|
+
repeat_penalty=1.1,
|
|
90
|
+
)
|
|
91
|
+
return result["choices"][0]["message"]["content"].strip()
|
|
92
|
+
|
|
93
|
+
def _build_prompt(self, query: str, response: str, context: str | None) -> str:
|
|
94
|
+
parts = [f"Query:\n{query}", f"\nOriginal response:\n{response}"]
|
|
95
|
+
if context:
|
|
96
|
+
parts.append(f"\nContext / grounding documents:\n{context}")
|
|
97
|
+
parts.append("\nRewrite the response to be more accurate and complete.")
|
|
98
|
+
return "\n".join(parts)
|
|
99
|
+
|
|
100
|
+
def unload(self) -> None:
|
|
101
|
+
"""Free the model from RAM. Call before deleting the GGUF file."""
|
|
102
|
+
with self._lock:
|
|
103
|
+
self._llm = None
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def model_spec(self):
|
|
107
|
+
return MODEL_REGISTRY[self._config.model_id]
|
|
108
|
+
|
|
109
|
+
def tok_per_sec(self) -> float | None:
|
|
110
|
+
"""Rough throughput estimate based on model family."""
|
|
111
|
+
model_id = self._config.model_id
|
|
112
|
+
return {"phi4-mini": 16.0, "smollm3": 22.0}.get(model_id)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Model registry - GGUF model specs and local storage helpers."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ModelSpec:
|
|
11
|
+
id: str
|
|
12
|
+
name: str
|
|
13
|
+
hf_repo: str
|
|
14
|
+
hf_filename: str
|
|
15
|
+
size_gb: float
|
|
16
|
+
min_ram_gb: int
|
|
17
|
+
rec_ram_gb: int
|
|
18
|
+
context_window: int
|
|
19
|
+
license: str
|
|
20
|
+
description: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
MODEL_REGISTRY: dict[str, ModelSpec] = {
|
|
24
|
+
"phi4-mini": ModelSpec(
|
|
25
|
+
id="phi4-mini",
|
|
26
|
+
name="Qwen2.5-3B-Instruct",
|
|
27
|
+
hf_repo="Qwen/Qwen2.5-3B-Instruct-GGUF",
|
|
28
|
+
hf_filename="qwen2.5-3b-instruct-q4_k_m.gguf",
|
|
29
|
+
size_gb=2.0,
|
|
30
|
+
min_ram_gb=4,
|
|
31
|
+
rec_ram_gb=6,
|
|
32
|
+
context_window=32_768,
|
|
33
|
+
license="Apache 2.0",
|
|
34
|
+
description="Alibaba's efficient 3B instruction model. Strong "
|
|
35
|
+
"reasoning and instruction following. Default choice.",
|
|
36
|
+
),
|
|
37
|
+
"smollm3": ModelSpec(
|
|
38
|
+
id="smollm3",
|
|
39
|
+
name="Qwen2.5-1.5B-Instruct",
|
|
40
|
+
hf_repo="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
|
|
41
|
+
hf_filename="qwen2.5-1.5b-instruct-q4_k_m.gguf",
|
|
42
|
+
size_gb=1.0,
|
|
43
|
+
min_ram_gb=2,
|
|
44
|
+
rec_ram_gb=4,
|
|
45
|
+
context_window=32_768,
|
|
46
|
+
license="Apache 2.0",
|
|
47
|
+
description="Compact 1.5B model. Fastest on CPU, lower RAM "
|
|
48
|
+
"requirement. Good for resource-constrained machines.",
|
|
49
|
+
),
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
DEFAULT_MODEL_ID = "phi4-mini"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_model_dir() -> Path:
|
|
56
|
+
"""Respects SCROOT_MODELS_DIR env override for custom storage."""
|
|
57
|
+
custom = os.environ.get("SCROOT_MODELS_DIR")
|
|
58
|
+
if custom:
|
|
59
|
+
return Path(custom)
|
|
60
|
+
return Path.home() / ".scroot" / "models"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_model_path(model_id: str) -> Path:
|
|
64
|
+
spec = MODEL_REGISTRY[model_id]
|
|
65
|
+
return get_model_dir() / model_id / spec.hf_filename
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def is_model_downloaded(model_id: str) -> bool:
|
|
69
|
+
return get_model_path(model_id).exists()
|
|
File without changes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""python -m scroot.dashboard - launch the review console."""
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main():
|
|
6
|
+
import argparse
|
|
7
|
+
parser = argparse.ArgumentParser(
|
|
8
|
+
description="Scroot Review Console",
|
|
9
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
10
|
+
epilog="Open http://localhost:7432 in your browser after starting.",
|
|
11
|
+
)
|
|
12
|
+
parser.add_argument("--port", type=int, default=7432)
|
|
13
|
+
parser.add_argument("--store", default="./scroot_store.jsonl",
|
|
14
|
+
help="Path to JSONL feedback store")
|
|
15
|
+
parser.add_argument("--host", default="127.0.0.1")
|
|
16
|
+
parser.add_argument("--hosted", action="store_true",
|
|
17
|
+
help=argparse.SUPPRESS) # Enterprise only
|
|
18
|
+
args = parser.parse_args()
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
import uvicorn
|
|
22
|
+
except ImportError:
|
|
23
|
+
print("ERROR: uvicorn is required. Install with: pip install 'scroot[dashboard]'")
|
|
24
|
+
sys.exit(1)
|
|
25
|
+
|
|
26
|
+
from .server import create_app
|
|
27
|
+
app = create_app(store_path=args.store, hosted=args.hosted)
|
|
28
|
+
|
|
29
|
+
print("\n ◆ SCROOT Review Console")
|
|
30
|
+
print(f" Store: {args.store}")
|
|
31
|
+
print(f" URL: http://{args.host}:{args.port}\n")
|
|
32
|
+
|
|
33
|
+
uvicorn.run(app, host=args.host, port=args.port, log_level="warning")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
if __name__ == "__main__":
|
|
37
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""Analytics router - /api/analytics endpoints."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from collections import Counter, defaultdict
|
|
5
|
+
from datetime import datetime, timedelta, timezone
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from fastapi import APIRouter, Query
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def analytics_router(store):
|
|
12
|
+
router = APIRouter()
|
|
13
|
+
|
|
14
|
+
@router.get("")
|
|
15
|
+
def summary(time_range: str = Query("30d", alias="range")):
|
|
16
|
+
"""Unified analytics endpoint - returns all charts in one call."""
|
|
17
|
+
records = store.get_all()
|
|
18
|
+
hours = {"24h": 24, "7d": 168, "30d": 720}.get(time_range, 720)
|
|
19
|
+
cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
|
|
20
|
+
|
|
21
|
+
total = len(records)
|
|
22
|
+
iqs_vals = [r.scores.get("iqs", 0) for r in records if isinstance(r.scores, dict)]
|
|
23
|
+
avg_iqs = round(sum(iqs_vals) / len(iqs_vals), 3) if iqs_vals else 0.0
|
|
24
|
+
|
|
25
|
+
pending_review = sum(1 for r in records if getattr(r, "status", "pending") == "pending")
|
|
26
|
+
|
|
27
|
+
# IQS trend - daily buckets
|
|
28
|
+
from collections import defaultdict
|
|
29
|
+
daily: dict[str, list[float]] = defaultdict(list)
|
|
30
|
+
for r in records:
|
|
31
|
+
try:
|
|
32
|
+
dt = datetime.fromisoformat(r.timestamp.replace("Z", "+00:00"))
|
|
33
|
+
except (ValueError, AttributeError):
|
|
34
|
+
continue
|
|
35
|
+
if dt >= cutoff:
|
|
36
|
+
key = dt.strftime("%Y-%m-%d")
|
|
37
|
+
daily[key].append(r.scores.get("iqs", 0) if isinstance(r.scores, dict) else 0)
|
|
38
|
+
iqs_trend = [
|
|
39
|
+
{"date": d, "avg_iqs": round(sum(v) / len(v), 3)}
|
|
40
|
+
for d, v in sorted(daily.items()) if v
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
# Flag frequency - object keyed by metric name
|
|
44
|
+
flag_counter: Counter = Counter()
|
|
45
|
+
for r in records:
|
|
46
|
+
for f in (r.flags or []):
|
|
47
|
+
flag_counter[f] += 1
|
|
48
|
+
flag_metrics = ["groundedness", "completeness", "relevance", "consistency", "confidence"]
|
|
49
|
+
flag_frequency = {m: flag_counter.get(m, flag_counter.get(f"low_{m}", 0)) for m in flag_metrics}
|
|
50
|
+
|
|
51
|
+
# IQS distribution - 5 buckets
|
|
52
|
+
buckets = ["0.0–0.2", "0.2–0.4", "0.4–0.6", "0.6–0.8", "0.8–1.0"]
|
|
53
|
+
dist = Counter()
|
|
54
|
+
for v in iqs_vals:
|
|
55
|
+
idx = min(4, int(v * 5))
|
|
56
|
+
dist[idx] += 1
|
|
57
|
+
iqs_distribution = [{"bucket": buckets[i], "count": dist.get(i, 0)} for i in range(5)]
|
|
58
|
+
|
|
59
|
+
# Per-agent breakdown
|
|
60
|
+
agent_map: dict[str, list[float]] = defaultdict(list)
|
|
61
|
+
for r in records:
|
|
62
|
+
aid = r.corrected_by or "unknown"
|
|
63
|
+
iqs = r.scores.get("iqs", 0) if isinstance(r.scores, dict) else 0
|
|
64
|
+
agent_map[aid].append(iqs)
|
|
65
|
+
per_agent = sorted(
|
|
66
|
+
[
|
|
67
|
+
{"agent_id": aid, "avg_iqs": round(sum(v) / len(v), 3), "count": len(v)}
|
|
68
|
+
for aid, v in agent_map.items()
|
|
69
|
+
],
|
|
70
|
+
key=lambda x: x["avg_iqs"],
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Avg IQS today
|
|
74
|
+
today = datetime.now(timezone.utc).date().isoformat()
|
|
75
|
+
today_vals = [
|
|
76
|
+
r.scores.get("iqs", 0) for r in records
|
|
77
|
+
if isinstance(r.scores, dict) and r.timestamp[:10] == today
|
|
78
|
+
]
|
|
79
|
+
avg_iqs_today = round(sum(today_vals) / len(today_vals), 3) if today_vals else avg_iqs
|
|
80
|
+
|
|
81
|
+
threshold = 0.70
|
|
82
|
+
pass_count = sum(1 for v in iqs_vals if v >= threshold)
|
|
83
|
+
warn_count = sum(1 for v in iqs_vals if threshold * 0.7 <= v < threshold)
|
|
84
|
+
fail_count = sum(1 for v in iqs_vals if v < threshold * 0.7)
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
"total_scored": total,
|
|
88
|
+
"avg_iqs": avg_iqs,
|
|
89
|
+
"avg_iqs_today": avg_iqs_today,
|
|
90
|
+
"avg_iqs_delta": 0.0,
|
|
91
|
+
"pending_review": pending_review,
|
|
92
|
+
"pass_count": pass_count,
|
|
93
|
+
"warn_count": warn_count,
|
|
94
|
+
"fail_count": fail_count,
|
|
95
|
+
"iqs_trend": iqs_trend,
|
|
96
|
+
"flag_frequency": flag_frequency,
|
|
97
|
+
"iqs_distribution": iqs_distribution,
|
|
98
|
+
"per_agent": per_agent,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
@router.get("/iqs-trend")
|
|
102
|
+
def iqs_trend(time_range: str = Query("7d", alias="range"), agent: Optional[str] = Query(None)):
|
|
103
|
+
records = store.get_all()
|
|
104
|
+
if not records:
|
|
105
|
+
return {"points": []}
|
|
106
|
+
|
|
107
|
+
# Determine bucket size
|
|
108
|
+
hours = {"24h": 24, "7d": 168, "30d": 720}.get(time_range, 168)
|
|
109
|
+
now = datetime.now(timezone.utc)
|
|
110
|
+
start = now - timedelta(hours=hours)
|
|
111
|
+
|
|
112
|
+
# Bucket records by hour
|
|
113
|
+
hour_buckets: dict[str, list[float]] = defaultdict(list)
|
|
114
|
+
for r in records:
|
|
115
|
+
try:
|
|
116
|
+
dt = datetime.fromisoformat(r.timestamp.replace("Z", "+00:00"))
|
|
117
|
+
except (ValueError, AttributeError):
|
|
118
|
+
continue
|
|
119
|
+
if dt < start:
|
|
120
|
+
continue
|
|
121
|
+
if time_range in ("24h", "7d"):
|
|
122
|
+
key = dt.strftime("%Y-%m-%dT%H:00:00Z")
|
|
123
|
+
else:
|
|
124
|
+
key = dt.strftime("%Y-%m-%dT00:00:00Z")
|
|
125
|
+
iqs = r.scores.get("iqs", 0) if isinstance(r.scores, dict) else 0
|
|
126
|
+
hour_buckets[key].append(iqs)
|
|
127
|
+
|
|
128
|
+
points = []
|
|
129
|
+
for ts in sorted(hour_buckets):
|
|
130
|
+
vals = hour_buckets[ts]
|
|
131
|
+
if not vals:
|
|
132
|
+
continue
|
|
133
|
+
vals_sorted = sorted(vals)
|
|
134
|
+
n = len(vals_sorted)
|
|
135
|
+
p10 = vals_sorted[max(0, int(n * 0.1) - 1)]
|
|
136
|
+
p90 = vals_sorted[min(n - 1, int(n * 0.9))]
|
|
137
|
+
points.append({
|
|
138
|
+
"timestamp": ts,
|
|
139
|
+
"mean_iqs": round(sum(vals) / len(vals), 4),
|
|
140
|
+
"p10": round(p10, 4),
|
|
141
|
+
"p90": round(p90, 4),
|
|
142
|
+
"flagged_count": sum(1 for r in records
|
|
143
|
+
if r.timestamp[:len(ts)] >= ts[:10]
|
|
144
|
+
and r.flags),
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
return {"points": points}
|
|
148
|
+
|
|
149
|
+
@router.get("/flag-distribution")
|
|
150
|
+
def flag_distribution(time_range: str = Query("7d", alias="range")):
|
|
151
|
+
records = store.get_all()
|
|
152
|
+
hours = {"24h": 24, "7d": 168, "30d": 720}.get(time_range, 168)
|
|
153
|
+
cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
|
|
154
|
+
|
|
155
|
+
flag_counter: Counter = Counter()
|
|
156
|
+
total = 0
|
|
157
|
+
for r in records:
|
|
158
|
+
try:
|
|
159
|
+
dt = datetime.fromisoformat(r.timestamp.replace("Z", "+00:00"))
|
|
160
|
+
except (ValueError, AttributeError):
|
|
161
|
+
continue
|
|
162
|
+
if dt >= cutoff:
|
|
163
|
+
total += 1
|
|
164
|
+
for f in (r.flags or []):
|
|
165
|
+
flag_counter[f] += 1
|
|
166
|
+
|
|
167
|
+
if total == 0:
|
|
168
|
+
return {"flags": []}
|
|
169
|
+
|
|
170
|
+
flag_types = ["hallucination_risk", "incomplete", "off_topic", "self_contradictory", "ungrounded"]
|
|
171
|
+
return {
|
|
172
|
+
"flags": [
|
|
173
|
+
{
|
|
174
|
+
"type": ft,
|
|
175
|
+
"count": flag_counter.get(ft, 0),
|
|
176
|
+
"pct": round(flag_counter.get(ft, 0) / total * 100, 1) if total else 0,
|
|
177
|
+
"trend_pct": 0.0, # TODO: compare to previous period
|
|
178
|
+
}
|
|
179
|
+
for ft in flag_types
|
|
180
|
+
]
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
@router.get("/before-after")
|
|
184
|
+
def before_after(correction_id: Optional[str] = Query(None)):
|
|
185
|
+
records = store.get_all()
|
|
186
|
+
reviewed = [r for r in records if getattr(r, "status", "pending") in ("reviewed", "applied")]
|
|
187
|
+
|
|
188
|
+
def histogram(vals, bins=10):
|
|
189
|
+
if not vals:
|
|
190
|
+
return []
|
|
191
|
+
step = 1.0 / bins
|
|
192
|
+
counts = [0] * bins
|
|
193
|
+
for v in vals:
|
|
194
|
+
idx = min(int(v * bins), bins - 1)
|
|
195
|
+
counts[idx] += 1
|
|
196
|
+
return [{"x": round(i * step, 1), "y": c} for i, c in enumerate(counts)]
|
|
197
|
+
|
|
198
|
+
before_iqs = [r.scores.get("iqs", 0) for r in records if isinstance(r.scores, dict)]
|
|
199
|
+
after_iqs = [r.corrected_response_iqs for r in reviewed
|
|
200
|
+
if getattr(r, "corrected_response_iqs", None) is not None]
|
|
201
|
+
|
|
202
|
+
before_mean = sum(before_iqs) / len(before_iqs) if before_iqs else 0
|
|
203
|
+
after_mean = sum(after_iqs) / len(after_iqs) if after_iqs else 0
|
|
204
|
+
|
|
205
|
+
return {
|
|
206
|
+
"before": {"histogram": histogram(before_iqs), "mean": round(before_mean, 3)},
|
|
207
|
+
"after": {"histogram": histogram(after_iqs), "mean": round(after_mean, 3)},
|
|
208
|
+
"delta": round(after_mean - before_mean, 3),
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
@router.get("/reviewer-throughput")
|
|
212
|
+
def reviewer_throughput():
|
|
213
|
+
records = store.get_all()
|
|
214
|
+
reviewed = [r for r in records if getattr(r, "status", "pending") in ("reviewed", "applied", "rejected")]
|
|
215
|
+
|
|
216
|
+
# Count by day for last 7 days
|
|
217
|
+
today = datetime.now(timezone.utc).date()
|
|
218
|
+
by_day = []
|
|
219
|
+
for i in range(6, -1, -1):
|
|
220
|
+
day = (today - timedelta(days=i)).isoformat()
|
|
221
|
+
count = sum(1 for r in reviewed if r.timestamp[:10] == day)
|
|
222
|
+
by_day.append(count)
|
|
223
|
+
|
|
224
|
+
today_count = by_day[-1]
|
|
225
|
+
week_total = sum(by_day)
|
|
226
|
+
avg_per_day = round(week_total / 7, 1)
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
"reviews_today": today_count,
|
|
230
|
+
"avg_time_per_review_s": 0.0, # requires session tracking
|
|
231
|
+
"reviews_this_week": by_day,
|
|
232
|
+
"week_total": week_total,
|
|
233
|
+
"avg_per_day": avg_per_day,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return router
|