knowledge-worker 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mygraph/viz.py ADDED
@@ -0,0 +1,409 @@
1
+ """
2
+ viz.py — graph viewer generator.
3
+
4
+ Writes a single HTML file with graph JSON embedded directly into the page. The
5
+ viewer uses D3.js from the CDN for force-directed layout, with no sibling JSON
6
+ fetch and no upload step.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import sys
13
+ import webbrowser
14
+ from dataclasses import asdict
15
+ from pathlib import Path
16
+
17
+ from mygraph import Graph, resolve_graph_path
18
+
19
+ HERE = Path(__file__).parent
20
+ HTML_PATH = HERE / "mygraph_viz.html"
21
+
22
+
23
+ HTML_TEMPLATE = r"""<!doctype html>
24
+ <meta charset="utf-8" />
25
+ <title>mygraph — visualizer</title>
26
+ <style>
27
+ :root {
28
+ --bg: #0f1115;
29
+ --fg: #e6e8ea;
30
+ --muted: #8a9099;
31
+ --panel: #181b21;
32
+ --accent: #d2b48c;
33
+ }
34
+ html, body { margin: 0; height: 100%; background: var(--bg); color: var(--fg);
35
+ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
36
+ overflow: hidden; }
37
+ #header { padding: 8px 14px; border-bottom: 1px solid #222;
38
+ display: flex; align-items: center; gap: 14px; font-size: 12px; }
39
+ #header strong { color: var(--accent); letter-spacing: 0; }
40
+ #header .legend { display: flex; gap: 10px; flex-wrap: wrap; }
41
+ #header .legend span { display: inline-flex; align-items: center; gap: 4px; }
42
+ #header .legend i { width: 10px; height: 10px; border-radius: 50%; display: inline-block; }
43
+ #stage { width: 100vw; height: calc(100vh - 38px); }
44
+ svg { width: 100%; height: 100%; cursor: grab; }
45
+ .link { stroke: #3a3f47; stroke-opacity: 0.55; }
46
+ .link.high { stroke-opacity: 0.9; }
47
+ .link.medium { stroke-opacity: 0.6; }
48
+ .link.low { stroke-opacity: 0.3; stroke-dasharray: 3 3; }
49
+ .node circle { stroke: #0f1115; stroke-width: 1.5; cursor: pointer; }
50
+ .node text { fill: var(--fg); font-size: 10px; pointer-events: none;
51
+ text-shadow: 0 0 3px #0f1115, 0 0 3px #0f1115, 0 0 3px #0f1115; }
52
+ .edge-label { fill: var(--muted); font-size: 9px; pointer-events: none; }
53
+ #sitrep { position: fixed; top: 50px; left: 12px; width: min(312px, calc(100vw - 24px));
54
+ max-height: calc(100vh - 64px); overflow: auto; background: rgba(24, 27, 33, 0.88);
55
+ border: 1px solid #2a2f37; border-radius: 6px; font-size: 12px;
56
+ box-shadow: 0 12px 44px rgba(0,0,0,.24); }
57
+ #sitrep .head { display: flex; align-items: center; justify-content: space-between;
58
+ gap: 10px; padding: 10px 12px; border-bottom: 1px solid #2a2f37; }
59
+ #sitrep .title { color: var(--accent); font-weight: 700; text-transform: uppercase; }
60
+ #sitrep .state { color: var(--muted); font-size: 10px; text-transform: uppercase; }
61
+ #sitrep .metrics { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr));
62
+ border-bottom: 1px solid #2a2f37; }
63
+ #sitrep .metric { min-height: 56px; padding: 10px 12px; border-right: 1px solid #2a2f37;
64
+ border-bottom: 1px solid #2a2f37; }
65
+ #sitrep .metric:nth-child(even) { border-right: 0; }
66
+ #sitrep .metric:nth-last-child(-n+2) { border-bottom: 0; }
67
+ #sitrep .value { color: var(--accent); font-size: 20px; line-height: 1; }
68
+ #sitrep .label { margin-top: 6px; color: var(--muted); font-size: 10px; text-transform: uppercase; }
69
+ #sitrep .block { padding: 11px 12px; border-top: 1px solid #2a2f37; }
70
+ #sitrep .block:first-of-type { border-top: 0; }
71
+ #sitrep .block-title { margin-bottom: 8px; color: var(--muted); font-size: 10px; text-transform: uppercase; }
72
+ #sitrep .row { display: grid; grid-template-columns: 1fr auto; gap: 10px; padding: 7px 0;
73
+ border-top: 1px solid rgba(138, 144, 153, 0.16); color: var(--fg); }
74
+ #sitrep .row:first-child { border-top: 0; }
75
+ #sitrep button.row { width: 100%; border-right: 0; border-left: 0; border-bottom: 0;
76
+ background: transparent; text-align: left; cursor: pointer; font: inherit; }
77
+ #sitrep button.row:hover { color: #fff; }
78
+ #sitrep .row-label { min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
79
+ #sitrep .row-meta { color: var(--muted); font-size: 10px; text-transform: uppercase; }
80
+ #sitrep .type-bars { display: grid; gap: 7px; }
81
+ #sitrep .type-bar { display: grid; grid-template-columns: 72px 1fr 24px; gap: 8px; align-items: center; }
82
+ #sitrep .type-name, #sitrep .type-count { color: var(--muted); font-size: 10px; text-transform: uppercase; }
83
+ #sitrep .bar-track { height: 6px; background: rgba(138, 144, 153, 0.18); border-radius: 999px; overflow: hidden; }
84
+ #sitrep .bar-fill { height: 100%; background: var(--accent); }
85
+ #panel { position: fixed; top: 50px; right: 12px; width: 360px; max-height: 80vh;
86
+ overflow: auto; background: var(--panel); border: 1px solid #2a2f37;
87
+ border-radius: 6px; padding: 12px 14px; font-size: 12px; line-height: 1.45;
88
+ display: none; }
89
+ #panel.open { display: block; }
90
+ #panel h3 { margin: 0 0 4px 0; color: var(--accent); font-size: 13px; }
91
+ #panel .meta { color: var(--muted); font-size: 11px; }
92
+ #panel .body { margin: 8px 0; }
93
+ #panel .section { margin-top: 10px; }
94
+ #panel .section-title { color: var(--muted); text-transform: uppercase;
95
+ letter-spacing: 0; font-size: 10px; margin-bottom: 4px; }
96
+ #panel ul { margin: 0; padding-left: 16px; }
97
+ #panel a { color: #87b7e0; text-decoration: none; cursor: pointer; }
98
+ #panel a:hover { text-decoration: underline; }
99
+ .pill { display: inline-block; padding: 0 6px; border-radius: 3px;
100
+ background: #2a2f37; color: var(--muted); font-size: 10px; margin-left: 4px; }
101
+ #close { float: right; cursor: pointer; color: var(--muted); }
102
+ @media (max-width: 860px) {
103
+ #sitrep { display: none; }
104
+ #panel { left: 12px; right: 12px; width: auto; }
105
+ }
106
+ </style>
107
+
108
+ <div id="header">
109
+ <strong>mygraph</strong>
110
+ <span id="counts">loading…</span>
111
+ <span class="legend" id="legend"></span>
112
+ <span style="margin-left:auto; color: var(--muted)">click a node · drag to pan · scroll to zoom</span>
113
+ </div>
114
+ <div id="stage"><svg aria-label="knowledge graph"></svg></div>
115
+ <aside id="sitrep" aria-label="graph sitrep"></aside>
116
+ <div id="panel"></div>
117
+
118
+ <script src="https://d3js.org/d3.v7.min.js"></script>
119
+ <script>
120
+ const GRAPH_DATA = __GRAPH_JSON__;
121
+ const TYPE_COLORS = {
122
+ person: "#e07b7b",
123
+ topic: "#7bb0e0",
124
+ idea: "#d2b48c",
125
+ project: "#7be0a8",
126
+ goal: "#b07be0",
127
+ question: "#e0c87b",
128
+ decision: "#7be0c8",
129
+ reference: "#e07bb0",
130
+ source: "#8a9099",
131
+ };
132
+ const TYPE_RADIUS = { source: 5, topic: 6, person: 8, project: 9, goal: 9,
133
+ idea: 8, question: 7, decision: 7, reference: 7 };
134
+
135
+ function escapeHtml(value) {
136
+ return String(value || "").replace(/[&<>"]/g, c => ({
137
+ "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;"
138
+ }[c]));
139
+ }
140
+
141
+ (function() {
142
+ const data = GRAPH_DATA;
143
+ const nodes = Object.values(data.nodes || {}).map(n => ({ ...n }));
144
+ const originalEdges = data.edges || [];
145
+ const edges = originalEdges.map(e => ({ ...e, source: e.src, target: e.dst }));
146
+ const nodeById = new Map(nodes.map(n => [n.id, n]));
147
+ document.getElementById("counts").textContent = `${nodes.length} nodes · ${edges.length} edges`;
148
+
149
+ const legend = document.getElementById("legend");
150
+ Object.keys(TYPE_COLORS).forEach(type => {
151
+ const span = document.createElement("span");
152
+ span.innerHTML = `<i style="background:${TYPE_COLORS[type]}"></i>${escapeHtml(type)}`;
153
+ legend.appendChild(span);
154
+ });
155
+ renderSitrep();
156
+
157
+ if (typeof d3 === "undefined") {
158
+ document.getElementById("counts").textContent = "failed to load D3.js";
159
+ return;
160
+ }
161
+
162
+ const svg = d3.select("svg");
163
+ const viewport = svg.append("g");
164
+ svg.call(d3.zoom()
165
+ .scaleExtent([0.2, 4])
166
+ .on("zoom", ev => viewport.attr("transform", ev.transform)));
167
+
168
+ const sim = d3.forceSimulation(nodes)
169
+ .force("link", d3.forceLink(edges).id(d => d.id).distance(80).strength(0.5))
170
+ .force("charge", d3.forceManyBody().strength(-160))
171
+ .force("center", d3.forceCenter(window.innerWidth / 2, (window.innerHeight - 38) / 2))
172
+ .force("collide", d3.forceCollide().radius(d => (TYPE_RADIUS[d.type] || 7) + 4));
173
+
174
+ const link = viewport.append("g").attr("class", "links").selectAll("line")
175
+ .data(edges).join("line")
176
+ .attr("class", d => `link ${d.confidence || "medium"}`);
177
+
178
+ const edgeLabel = viewport.append("g").attr("class", "edge-labels").selectAll("text")
179
+ .data(edges).join("text")
180
+ .attr("class", "edge-label")
181
+ .text(d => d.type);
182
+
183
+ const node = viewport.append("g").attr("class", "nodes").selectAll("g.node")
184
+ .data(nodes).join("g")
185
+ .attr("class", "node")
186
+ .call(drag(sim));
187
+
188
+ node.append("circle")
189
+ .attr("r", d => TYPE_RADIUS[d.type] || 7)
190
+ .attr("fill", d => TYPE_COLORS[d.type] || "#888");
191
+ node.append("text")
192
+ .attr("dx", 11)
193
+ .attr("dy", 3)
194
+ .text(d => d.label || d.id);
195
+
196
+ node.on("click", (ev, d) => {
197
+ ev.stopPropagation();
198
+ openPanel(d);
199
+ });
200
+ svg.on("click", () => document.getElementById("panel").classList.remove("open"));
201
+
202
+ sim.on("tick", () => {
203
+ link.attr("x1", d => d.source.x).attr("y1", d => d.source.y)
204
+ .attr("x2", d => d.target.x).attr("y2", d => d.target.y);
205
+ edgeLabel.attr("x", d => (d.source.x + d.target.x) / 2)
206
+ .attr("y", d => (d.source.y + d.target.y) / 2);
207
+ node.attr("transform", d => `translate(${d.x},${d.y})`);
208
+ });
209
+
210
+ function drag(sim) {
211
+ return d3.drag()
212
+ .on("start", (ev, d) => {
213
+ if (!ev.active) sim.alphaTarget(0.3).restart();
214
+ d.fx = d.x;
215
+ d.fy = d.y;
216
+ })
217
+ .on("drag", (ev, d) => {
218
+ d.fx = ev.x;
219
+ d.fy = ev.y;
220
+ })
221
+ .on("end", (ev, d) => {
222
+ if (!ev.active) sim.alphaTarget(0);
223
+ d.fx = null;
224
+ d.fy = null;
225
+ });
226
+ }
227
+
228
+ function countBy(items, keyFn) {
229
+ const counts = new Map();
230
+ items.forEach(item => {
231
+ const key = keyFn(item) || "unknown";
232
+ counts.set(key, (counts.get(key) || 0) + 1);
233
+ });
234
+ return counts;
235
+ }
236
+
237
+ function shortLabel(value, max = 42) {
238
+ const text = String(value || "");
239
+ return text.length <= max ? text : `${text.slice(0, max - 1)}…`;
240
+ }
241
+
242
+ function renderSitrep() {
243
+ const sitrep = document.getElementById("sitrep");
244
+ const typeCounts = countBy(nodes, n => n.type);
245
+ const confidenceCounts = countBy(nodes, n => n.confidence);
246
+ const highConfidence = confidenceCounts.get("high") || 0;
247
+ const mentioned = new Set(originalEdges.filter(e => e.type === "MENTIONED_IN").map(e => e.src));
248
+ const nonSource = nodes.filter(n => n.type !== "source").length;
249
+ const provenance = nonSource ? Math.round((mentioned.size / nonSource) * 100) : 100;
250
+ const degree = new Map(nodes.map(n => [n.id, { in: 0, out: 0, total: 0 }]));
251
+ originalEdges.forEach(edge => {
252
+ if (degree.has(edge.src)) {
253
+ degree.get(edge.src).out += 1;
254
+ degree.get(edge.src).total += 1;
255
+ }
256
+ if (degree.has(edge.dst)) {
257
+ degree.get(edge.dst).in += 1;
258
+ degree.get(edge.dst).total += 1;
259
+ }
260
+ });
261
+ const topNodes = nodes
262
+ .map(node => ({ node, degree: degree.get(node.id) || { total: 0 } }))
263
+ .sort((a, b) => b.degree.total - a.degree.total || String(a.node.id).localeCompare(String(b.node.id)))
264
+ .slice(0, 5);
265
+ const latest = nodes
266
+ .slice()
267
+ .sort((a, b) => String(b.created_at || "").localeCompare(String(a.created_at || "")))
268
+ .slice(0, 4);
269
+ const maxType = Math.max(1, ...typeCounts.values());
270
+ const topTypes = Array.from(typeCounts.entries())
271
+ .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
272
+ .slice(0, 5);
273
+
274
+ sitrep.innerHTML = `
275
+ <div class="head">
276
+ <div class="title">SITREP</div>
277
+ <div class="state">embedded graph</div>
278
+ </div>
279
+ <div class="metrics">
280
+ <div class="metric"><div class="value">${nodes.length}</div><div class="label">nodes</div></div>
281
+ <div class="metric"><div class="value">${edges.length}</div><div class="label">edges</div></div>
282
+ <div class="metric"><div class="value">${provenance}%</div><div class="label">provenance</div></div>
283
+ <div class="metric"><div class="value">${highConfidence}</div><div class="label">high confidence</div></div>
284
+ </div>
285
+ <div class="block">
286
+ <div class="block-title">Top Connected</div>
287
+ ${topNodes.map(({ node, degree }) => `
288
+ <button class="row" data-id="${escapeHtml(node.id)}">
289
+ <span class="row-label">${escapeHtml(shortLabel(node.label || node.id))}</span>
290
+ <span class="row-meta">${escapeHtml(node.type)} / ${degree.total}</span>
291
+ </button>
292
+ `).join("")}
293
+ </div>
294
+ <div class="block">
295
+ <div class="block-title">Node Mix</div>
296
+ <div class="type-bars">
297
+ ${topTypes.map(([type, count]) => `
298
+ <div class="type-bar">
299
+ <span class="type-name">${escapeHtml(type)}</span>
300
+ <span class="bar-track"><span class="bar-fill" style="width:${Math.round((count / maxType) * 100)}%"></span></span>
301
+ <span class="type-count">${count}</span>
302
+ </div>
303
+ `).join("")}
304
+ </div>
305
+ </div>
306
+ <div class="block">
307
+ <div class="block-title">Latest Signal</div>
308
+ ${latest.map(node => `
309
+ <button class="row" data-id="${escapeHtml(node.id)}">
310
+ <span class="row-label">${escapeHtml(shortLabel(node.label || node.id))}</span>
311
+ <span class="row-meta">${escapeHtml(node.type)}</span>
312
+ </button>
313
+ `).join("")}
314
+ </div>
315
+ `;
316
+ sitrep.querySelectorAll("[data-id]").forEach(el => {
317
+ el.addEventListener("click", ev => {
318
+ ev.stopPropagation();
319
+ const node = nodeById.get(el.dataset.id);
320
+ if (node) openPanel(node);
321
+ });
322
+ });
323
+ }
324
+
325
+ function openPanel(n) {
326
+ const panel = document.getElementById("panel");
327
+ const out = originalEdges.filter(e => e.src === n.id);
328
+ const inc = originalEdges.filter(e => e.dst === n.id);
329
+ const prov = originalEdges.filter(e =>
330
+ (e.type === "MENTIONED_IN" || e.type === "MADE_AT") &&
331
+ (e.src === n.id || e.dst === n.id));
332
+ panel.classList.add("open");
333
+ panel.innerHTML = `
334
+ <span id="close">×</span>
335
+ <h3>${escapeHtml(n.label || n.id)}</h3>
336
+ <div class="meta">${escapeHtml(n.type)} · <code>${escapeHtml(n.id)}</code>
337
+ <span class="pill">${escapeHtml(n.confidence || "?")}</span></div>
338
+ ${n.body ? `<div class="body">${escapeHtml(n.body)}</div>` : ""}
339
+ ${prov.length ? `<div class="section">
340
+ <div class="section-title">provenance</div>
341
+ <ul>${prov.map(e => {
342
+ const sid = e.src === n.id ? e.dst : e.src;
343
+ const ex = e.excerpt ? `<div class="meta">"${escapeHtml(e.excerpt)}"</div>` : "";
344
+ return `<li><a data-id="${escapeHtml(sid)}">${escapeHtml(sid)}</a>${ex}</li>`;
345
+ }).join("")}</ul></div>` : ""}
346
+ ${out.length ? `<div class="section">
347
+ <div class="section-title">outgoing (${out.length})</div>
348
+ <ul>${out.map(e =>
349
+ `<li>${escapeHtml(e.type)} → <a data-id="${escapeHtml(e.dst)}">${escapeHtml(e.dst)}</a></li>`).join("")}</ul></div>` : ""}
350
+ ${inc.length ? `<div class="section">
351
+ <div class="section-title">incoming (${inc.length})</div>
352
+ <ul>${inc.map(e =>
353
+ `<li><a data-id="${escapeHtml(e.src)}">${escapeHtml(e.src)}</a> → ${escapeHtml(e.type)}</li>`).join("")}</ul></div>` : ""}
354
+ `;
355
+ document.getElementById("close").onclick = () => panel.classList.remove("open");
356
+ panel.querySelectorAll("a[data-id]").forEach(a => {
357
+ a.onclick = () => {
358
+ const target = nodeById.get(a.dataset.id);
359
+ if (target) openPanel(target);
360
+ };
361
+ });
362
+ }
363
+ })();
364
+
365
+ </script>
366
+ """
367
+
368
+
369
+ def _graph_payload(graph_path: Path) -> dict:
370
+ g = Graph.load(str(graph_path))
371
+ return {
372
+ "nodes": {nid: asdict(node) for nid, node in g.nodes.items()},
373
+ "edges": [asdict(edge) for edge in g.edges],
374
+ }
375
+
376
+
377
+ def render_html(graph_path: Path, out_path: Path = HTML_PATH) -> Path:
378
+ payload = _graph_payload(graph_path)
379
+ graph_json = json.dumps(payload, ensure_ascii=False)
380
+ html = HTML_TEMPLATE.replace("__GRAPH_JSON__", graph_json.replace("</script", "<\\/script"))
381
+ out_path.parent.mkdir(parents=True, exist_ok=True)
382
+ out_path.write_text(html, encoding="utf-8")
383
+ return out_path
384
+
385
+
386
+ def _value_arg(args: list[str], name: str) -> str | None:
387
+ if name not in args:
388
+ return None
389
+ i = args.index(name)
390
+ if i + 1 >= len(args):
391
+ raise SystemExit(f"viz: {name} needs a path")
392
+ return args[i + 1]
393
+
394
+
395
+ def run_viz(args: list[str]) -> int:
396
+ graph_arg = _value_arg(args, "--graph")
397
+ out_arg = _value_arg(args, "--out")
398
+ graph_path = Path(graph_arg).expanduser().resolve() if graph_arg else Path(resolve_graph_path())
399
+ out = Path(out_arg).expanduser().resolve() if out_arg else HTML_PATH
400
+ written = render_html(graph_path, out)
401
+ print(f"viz: wrote {written}")
402
+ if "--no-open" not in args:
403
+ webbrowser.open(written.resolve().as_uri())
404
+ print("viz: opened in default browser")
405
+ return 0
406
+
407
+
408
+ if __name__ == "__main__":
409
+ sys.exit(run_viz(sys.argv[1:]))
@@ -0,0 +1,185 @@
1
+ """
2
+ eval_compare.py — Claude vs Gemma extraction A/B (v1.5).
3
+
4
+ Runs the same source markdown through both extractors (mygraph/extractor.py
5
+ for Claude, ollama_proxy/extractor_adapter.py for Gemma), then writes a single
6
+ record to eval_record.jsonl with kind="extractor_comparison".
7
+
8
+ Use this to feed the v1 eval corpus with comparative data — the hard signal
9
+ for "should we replace Claude with local Gemma?" lives here.
10
+
11
+ Usage:
12
+ python ollama_proxy/eval_compare.py path/to/file.md
13
+ python ollama_proxy/eval_compare.py path/to/file.md --gemma-model gemma4:latest
14
+ python ollama_proxy/eval_compare.py path/to/file.md --claude-only
15
+ python ollama_proxy/eval_compare.py path/to/file.md --gemma-only
16
+
17
+ Compares (per-side):
18
+ - n_nodes, n_edges (raw)
19
+ - n_high / n_medium / n_low confidence
20
+ - validator outcome: accepted / demoted / rejected
21
+ - latency (wall-clock, seconds)
22
+ - which node IDs each side proposed (set diff)
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import argparse
28
+ import json
29
+ import os
30
+ import sys
31
+ import time
32
+ from datetime import datetime, timezone
33
+ from pathlib import Path
34
+
35
+ # wire up sibling imports
36
+ _HERE = Path(__file__).resolve().parent
37
+ _MYGRAPH = _HERE.parent / "mygraph"
38
+ for _p in (_MYGRAPH, _HERE):
39
+ if str(_p) not in sys.path:
40
+ sys.path.insert(0, str(_p))
41
+
42
+ from validator import validate # noqa: E402
43
+
44
+
45
+ def _by_confidence(items: list[dict]) -> dict:
46
+ out = {"high": 0, "medium": 0, "low": 0, "other": 0}
47
+ for it in items:
48
+ c = it.get("confidence", "other")
49
+ out[c if c in out else "other"] += 1
50
+ return out
51
+
52
+
53
+ def _summarize(payload: dict, src_text: str, latency_s: float) -> dict:
54
+ nodes = payload.get("nodes", [])
55
+ edges = payload.get("edges", [])
56
+ try:
57
+ validated, manifest = validate(payload, src_text)
58
+ v_accepted_n = len(manifest.accepted_nodes)
59
+ v_accepted_e = len(manifest.accepted_edges)
60
+ v_demoted = len(manifest.demoted_nodes)
61
+ v_rejected_n = len(manifest.rejected_nodes)
62
+ v_rejected_e = len(manifest.rejected_edges)
63
+ validator_error = None
64
+ except Exception as e:
65
+ v_accepted_n = v_accepted_e = v_demoted = v_rejected_n = v_rejected_e = None
66
+ validator_error = str(e)
67
+
68
+ return {
69
+ "n_nodes_raw": len(nodes),
70
+ "n_edges_raw": len(edges),
71
+ "node_confidence": _by_confidence(nodes),
72
+ "edge_confidence": _by_confidence(edges),
73
+ "validator": {
74
+ "accepted_nodes": v_accepted_n,
75
+ "accepted_edges": v_accepted_e,
76
+ "demoted_nodes": v_demoted,
77
+ "rejected_nodes": v_rejected_n,
78
+ "rejected_edges": v_rejected_e,
79
+ "error": validator_error,
80
+ },
81
+ "node_ids": sorted({n.get("id", "") for n in nodes}),
82
+ "latency_s": round(latency_s, 2),
83
+ "model": (payload.get("_meta") or {}).get("model"),
84
+ "backend": (payload.get("_meta") or {}).get("backend"),
85
+ }
86
+
87
+
88
+ def _id_diff(a_summary: dict, b_summary: dict) -> dict:
89
+ a_ids = set(a_summary.get("node_ids", []))
90
+ b_ids = set(b_summary.get("node_ids", []))
91
+ return {
92
+ "shared": sorted(a_ids & b_ids),
93
+ "claude_only": sorted(a_ids - b_ids),
94
+ "gemma_only": sorted(b_ids - a_ids),
95
+ "jaccard": (len(a_ids & b_ids) / len(a_ids | b_ids)) if (a_ids | b_ids) else None,
96
+ }
97
+
98
+
99
+ def run(md_path: Path, claude_only: bool = False, gemma_only: bool = False,
100
+ gemma_model: str | None = None, claude_model: str | None = None) -> dict:
101
+ src_text = md_path.read_text(encoding="utf-8")
102
+
103
+ claude_summary: dict | None = None
104
+ gemma_summary: dict | None = None
105
+ claude_payload: dict | None = None
106
+ gemma_payload: dict | None = None
107
+
108
+ # Claude side
109
+ if not gemma_only:
110
+ from extractor import extract as claude_extract # noqa: E402
111
+ t0 = time.perf_counter()
112
+ kwargs = {"model": claude_model} if claude_model else {}
113
+ claude_payload = claude_extract(md_path, **kwargs)
114
+ claude_summary = _summarize(claude_payload, src_text, time.perf_counter() - t0)
115
+
116
+ # Gemma side
117
+ if not claude_only:
118
+ from extractor_adapter import extract as gemma_extract # noqa: E402
119
+ t0 = time.perf_counter()
120
+ kwargs = {"model": gemma_model} if gemma_model else {}
121
+ gemma_payload = gemma_extract(md_path, **kwargs)
122
+ gemma_summary = _summarize(gemma_payload, src_text, time.perf_counter() - t0)
123
+
124
+ record = {
125
+ "ts": datetime.now(timezone.utc).isoformat(),
126
+ "kind": "extractor_comparison",
127
+ "source_path": str(md_path),
128
+ "claude": claude_summary,
129
+ "gemma": gemma_summary,
130
+ }
131
+ if claude_summary and gemma_summary:
132
+ record["diff"] = _id_diff(claude_summary, gemma_summary)
133
+
134
+ # write to mygraph/eval_record.jsonl (canonical eval log location)
135
+ log_path = _MYGRAPH / "eval_record.jsonl"
136
+ with log_path.open("a", encoding="utf-8") as f:
137
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
138
+ return record
139
+
140
+
141
+ def main(argv: list[str]) -> int:
142
+ p = argparse.ArgumentParser(description="A/B Claude vs Gemma extraction")
143
+ p.add_argument("source", help="path to source markdown")
144
+ p.add_argument("--claude-only", action="store_true")
145
+ p.add_argument("--gemma-only", action="store_true")
146
+ p.add_argument("--gemma-model", default=None,
147
+ help=f"override (default {os.environ.get('OLLAMA_DEFAULT_MODEL', 'gemma4:e4b')})")
148
+ p.add_argument("--claude-model", default=None,
149
+ help="override Claude model")
150
+ p.add_argument("--print", action="store_true", help="pretty-print the record to stdout")
151
+ args = p.parse_args(argv)
152
+
153
+ md = Path(args.source).expanduser().resolve()
154
+ if not md.exists():
155
+ print(f"eval_compare: not found: {md}", file=sys.stderr)
156
+ return 1
157
+ record = run(md, claude_only=args.claude_only, gemma_only=args.gemma_only,
158
+ gemma_model=args.gemma_model, claude_model=args.claude_model)
159
+ if args.print:
160
+ print(json.dumps(record, indent=2))
161
+ else:
162
+ # Compact summary for the terminal
163
+ c, g = record.get("claude"), record.get("gemma")
164
+ print(f"source: {md.name}")
165
+ if c:
166
+ print(f" claude ({c.get('model')}): {c['n_nodes_raw']} nodes / {c['n_edges_raw']} edges "
167
+ f"high={c['node_confidence']['high']} med={c['node_confidence']['medium']} low={c['node_confidence']['low']} "
168
+ f"{c['latency_s']}s")
169
+ v = c.get("validator", {})
170
+ print(f" validator: accepted {v.get('accepted_nodes')} / demoted {v.get('demoted_nodes')} / rejected {v.get('rejected_nodes')}")
171
+ if g:
172
+ print(f" gemma ({g.get('model')}): {g['n_nodes_raw']} nodes / {g['n_edges_raw']} edges "
173
+ f"high={g['node_confidence']['high']} med={g['node_confidence']['medium']} low={g['node_confidence']['low']} "
174
+ f"{g['latency_s']}s")
175
+ v = g.get("validator", {})
176
+ print(f" validator: accepted {v.get('accepted_nodes')} / demoted {v.get('demoted_nodes')} / rejected {v.get('rejected_nodes')}")
177
+ d = record.get("diff")
178
+ if d:
179
+ print(f" jaccard(node_ids) = {d['jaccard']:.2f} "
180
+ f"shared={len(d['shared'])} claude_only={len(d['claude_only'])} gemma_only={len(d['gemma_only'])}")
181
+ return 0
182
+
183
+
184
+ if __name__ == "__main__":
185
+ sys.exit(main(sys.argv[1:]))