knowledge-worker 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge_worker-0.6.0.dist-info/METADATA +365 -0
- knowledge_worker-0.6.0.dist-info/RECORD +27 -0
- knowledge_worker-0.6.0.dist-info/WHEEL +5 -0
- knowledge_worker-0.6.0.dist-info/entry_points.txt +3 -0
- knowledge_worker-0.6.0.dist-info/licenses/LICENSE +21 -0
- knowledge_worker-0.6.0.dist-info/top_level.txt +2 -0
- mygraph/__init__.py +23 -0
- mygraph/anthropic_client.py +199 -0
- mygraph/audit.py +137 -0
- mygraph/check.py +273 -0
- mygraph/discover.py +654 -0
- mygraph/eval_log.py +36 -0
- mygraph/export_context.py +124 -0
- mygraph/extractor.py +243 -0
- mygraph/extractor_openai.py +165 -0
- mygraph/ingest.py +170 -0
- mygraph/memory_audit.py +1094 -0
- mygraph/merge.py +133 -0
- mygraph/mygraph.py +773 -0
- mygraph/owl_io.py +202 -0
- mygraph/review.py +151 -0
- mygraph/validator.py +149 -0
- mygraph/viz.py +409 -0
- ollama_proxy/eval_compare.py +185 -0
- ollama_proxy/extractor_adapter.py +168 -0
- ollama_proxy/proxy.py +143 -0
- ollama_proxy/server.py +194 -0
mygraph/viz.py
ADDED
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
"""
|
|
2
|
+
viz.py — graph viewer generator.
|
|
3
|
+
|
|
4
|
+
Writes a single HTML file with graph JSON embedded directly into the page. The
|
|
5
|
+
viewer uses D3.js from the CDN for force-directed layout, with no sibling JSON
|
|
6
|
+
fetch and no upload step.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import sys
|
|
13
|
+
import webbrowser
|
|
14
|
+
from dataclasses import asdict
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from mygraph import Graph, resolve_graph_path
|
|
18
|
+
|
|
19
|
+
HERE = Path(__file__).parent
|
|
20
|
+
HTML_PATH = HERE / "mygraph_viz.html"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
HTML_TEMPLATE = r"""<!doctype html>
|
|
24
|
+
<meta charset="utf-8" />
|
|
25
|
+
<title>mygraph — visualizer</title>
|
|
26
|
+
<style>
|
|
27
|
+
:root {
|
|
28
|
+
--bg: #0f1115;
|
|
29
|
+
--fg: #e6e8ea;
|
|
30
|
+
--muted: #8a9099;
|
|
31
|
+
--panel: #181b21;
|
|
32
|
+
--accent: #d2b48c;
|
|
33
|
+
}
|
|
34
|
+
html, body { margin: 0; height: 100%; background: var(--bg); color: var(--fg);
|
|
35
|
+
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
|
|
36
|
+
overflow: hidden; }
|
|
37
|
+
#header { padding: 8px 14px; border-bottom: 1px solid #222;
|
|
38
|
+
display: flex; align-items: center; gap: 14px; font-size: 12px; }
|
|
39
|
+
#header strong { color: var(--accent); letter-spacing: 0; }
|
|
40
|
+
#header .legend { display: flex; gap: 10px; flex-wrap: wrap; }
|
|
41
|
+
#header .legend span { display: inline-flex; align-items: center; gap: 4px; }
|
|
42
|
+
#header .legend i { width: 10px; height: 10px; border-radius: 50%; display: inline-block; }
|
|
43
|
+
#stage { width: 100vw; height: calc(100vh - 38px); }
|
|
44
|
+
svg { width: 100%; height: 100%; cursor: grab; }
|
|
45
|
+
.link { stroke: #3a3f47; stroke-opacity: 0.55; }
|
|
46
|
+
.link.high { stroke-opacity: 0.9; }
|
|
47
|
+
.link.medium { stroke-opacity: 0.6; }
|
|
48
|
+
.link.low { stroke-opacity: 0.3; stroke-dasharray: 3 3; }
|
|
49
|
+
.node circle { stroke: #0f1115; stroke-width: 1.5; cursor: pointer; }
|
|
50
|
+
.node text { fill: var(--fg); font-size: 10px; pointer-events: none;
|
|
51
|
+
text-shadow: 0 0 3px #0f1115, 0 0 3px #0f1115, 0 0 3px #0f1115; }
|
|
52
|
+
.edge-label { fill: var(--muted); font-size: 9px; pointer-events: none; }
|
|
53
|
+
#sitrep { position: fixed; top: 50px; left: 12px; width: min(312px, calc(100vw - 24px));
|
|
54
|
+
max-height: calc(100vh - 64px); overflow: auto; background: rgba(24, 27, 33, 0.88);
|
|
55
|
+
border: 1px solid #2a2f37; border-radius: 6px; font-size: 12px;
|
|
56
|
+
box-shadow: 0 12px 44px rgba(0,0,0,.24); }
|
|
57
|
+
#sitrep .head { display: flex; align-items: center; justify-content: space-between;
|
|
58
|
+
gap: 10px; padding: 10px 12px; border-bottom: 1px solid #2a2f37; }
|
|
59
|
+
#sitrep .title { color: var(--accent); font-weight: 700; text-transform: uppercase; }
|
|
60
|
+
#sitrep .state { color: var(--muted); font-size: 10px; text-transform: uppercase; }
|
|
61
|
+
#sitrep .metrics { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr));
|
|
62
|
+
border-bottom: 1px solid #2a2f37; }
|
|
63
|
+
#sitrep .metric { min-height: 56px; padding: 10px 12px; border-right: 1px solid #2a2f37;
|
|
64
|
+
border-bottom: 1px solid #2a2f37; }
|
|
65
|
+
#sitrep .metric:nth-child(even) { border-right: 0; }
|
|
66
|
+
#sitrep .metric:nth-last-child(-n+2) { border-bottom: 0; }
|
|
67
|
+
#sitrep .value { color: var(--accent); font-size: 20px; line-height: 1; }
|
|
68
|
+
#sitrep .label { margin-top: 6px; color: var(--muted); font-size: 10px; text-transform: uppercase; }
|
|
69
|
+
#sitrep .block { padding: 11px 12px; border-top: 1px solid #2a2f37; }
|
|
70
|
+
#sitrep .block:first-of-type { border-top: 0; }
|
|
71
|
+
#sitrep .block-title { margin-bottom: 8px; color: var(--muted); font-size: 10px; text-transform: uppercase; }
|
|
72
|
+
#sitrep .row { display: grid; grid-template-columns: 1fr auto; gap: 10px; padding: 7px 0;
|
|
73
|
+
border-top: 1px solid rgba(138, 144, 153, 0.16); color: var(--fg); }
|
|
74
|
+
#sitrep .row:first-child { border-top: 0; }
|
|
75
|
+
#sitrep button.row { width: 100%; border-right: 0; border-left: 0; border-bottom: 0;
|
|
76
|
+
background: transparent; text-align: left; cursor: pointer; font: inherit; }
|
|
77
|
+
#sitrep button.row:hover { color: #fff; }
|
|
78
|
+
#sitrep .row-label { min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
|
79
|
+
#sitrep .row-meta { color: var(--muted); font-size: 10px; text-transform: uppercase; }
|
|
80
|
+
#sitrep .type-bars { display: grid; gap: 7px; }
|
|
81
|
+
#sitrep .type-bar { display: grid; grid-template-columns: 72px 1fr 24px; gap: 8px; align-items: center; }
|
|
82
|
+
#sitrep .type-name, #sitrep .type-count { color: var(--muted); font-size: 10px; text-transform: uppercase; }
|
|
83
|
+
#sitrep .bar-track { height: 6px; background: rgba(138, 144, 153, 0.18); border-radius: 999px; overflow: hidden; }
|
|
84
|
+
#sitrep .bar-fill { height: 100%; background: var(--accent); }
|
|
85
|
+
#panel { position: fixed; top: 50px; right: 12px; width: 360px; max-height: 80vh;
|
|
86
|
+
overflow: auto; background: var(--panel); border: 1px solid #2a2f37;
|
|
87
|
+
border-radius: 6px; padding: 12px 14px; font-size: 12px; line-height: 1.45;
|
|
88
|
+
display: none; }
|
|
89
|
+
#panel.open { display: block; }
|
|
90
|
+
#panel h3 { margin: 0 0 4px 0; color: var(--accent); font-size: 13px; }
|
|
91
|
+
#panel .meta { color: var(--muted); font-size: 11px; }
|
|
92
|
+
#panel .body { margin: 8px 0; }
|
|
93
|
+
#panel .section { margin-top: 10px; }
|
|
94
|
+
#panel .section-title { color: var(--muted); text-transform: uppercase;
|
|
95
|
+
letter-spacing: 0; font-size: 10px; margin-bottom: 4px; }
|
|
96
|
+
#panel ul { margin: 0; padding-left: 16px; }
|
|
97
|
+
#panel a { color: #87b7e0; text-decoration: none; cursor: pointer; }
|
|
98
|
+
#panel a:hover { text-decoration: underline; }
|
|
99
|
+
.pill { display: inline-block; padding: 0 6px; border-radius: 3px;
|
|
100
|
+
background: #2a2f37; color: var(--muted); font-size: 10px; margin-left: 4px; }
|
|
101
|
+
#close { float: right; cursor: pointer; color: var(--muted); }
|
|
102
|
+
@media (max-width: 860px) {
|
|
103
|
+
#sitrep { display: none; }
|
|
104
|
+
#panel { left: 12px; right: 12px; width: auto; }
|
|
105
|
+
}
|
|
106
|
+
</style>
|
|
107
|
+
|
|
108
|
+
<div id="header">
|
|
109
|
+
<strong>mygraph</strong>
|
|
110
|
+
<span id="counts">loading…</span>
|
|
111
|
+
<span class="legend" id="legend"></span>
|
|
112
|
+
<span style="margin-left:auto; color: var(--muted)">click a node · drag to pan · scroll to zoom</span>
|
|
113
|
+
</div>
|
|
114
|
+
<div id="stage"><svg aria-label="knowledge graph"></svg></div>
|
|
115
|
+
<aside id="sitrep" aria-label="graph sitrep"></aside>
|
|
116
|
+
<div id="panel"></div>
|
|
117
|
+
|
|
118
|
+
<script src="https://d3js.org/d3.v7.min.js"></script>
|
|
119
|
+
<script>
|
|
120
|
+
const GRAPH_DATA = __GRAPH_JSON__;
|
|
121
|
+
const TYPE_COLORS = {
|
|
122
|
+
person: "#e07b7b",
|
|
123
|
+
topic: "#7bb0e0",
|
|
124
|
+
idea: "#d2b48c",
|
|
125
|
+
project: "#7be0a8",
|
|
126
|
+
goal: "#b07be0",
|
|
127
|
+
question: "#e0c87b",
|
|
128
|
+
decision: "#7be0c8",
|
|
129
|
+
reference: "#e07bb0",
|
|
130
|
+
source: "#8a9099",
|
|
131
|
+
};
|
|
132
|
+
const TYPE_RADIUS = { source: 5, topic: 6, person: 8, project: 9, goal: 9,
|
|
133
|
+
idea: 8, question: 7, decision: 7, reference: 7 };
|
|
134
|
+
|
|
135
|
+
function escapeHtml(value) {
|
|
136
|
+
return String(value || "").replace(/[&<>"]/g, c => ({
|
|
137
|
+
"&": "&", "<": "<", ">": ">", '"': """
|
|
138
|
+
}[c]));
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
(function() {
|
|
142
|
+
const data = GRAPH_DATA;
|
|
143
|
+
const nodes = Object.values(data.nodes || {}).map(n => ({ ...n }));
|
|
144
|
+
const originalEdges = data.edges || [];
|
|
145
|
+
const edges = originalEdges.map(e => ({ ...e, source: e.src, target: e.dst }));
|
|
146
|
+
const nodeById = new Map(nodes.map(n => [n.id, n]));
|
|
147
|
+
document.getElementById("counts").textContent = `${nodes.length} nodes · ${edges.length} edges`;
|
|
148
|
+
|
|
149
|
+
const legend = document.getElementById("legend");
|
|
150
|
+
Object.keys(TYPE_COLORS).forEach(type => {
|
|
151
|
+
const span = document.createElement("span");
|
|
152
|
+
span.innerHTML = `<i style="background:${TYPE_COLORS[type]}"></i>${escapeHtml(type)}`;
|
|
153
|
+
legend.appendChild(span);
|
|
154
|
+
});
|
|
155
|
+
renderSitrep();
|
|
156
|
+
|
|
157
|
+
if (typeof d3 === "undefined") {
|
|
158
|
+
document.getElementById("counts").textContent = "failed to load D3.js";
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const svg = d3.select("svg");
|
|
163
|
+
const viewport = svg.append("g");
|
|
164
|
+
svg.call(d3.zoom()
|
|
165
|
+
.scaleExtent([0.2, 4])
|
|
166
|
+
.on("zoom", ev => viewport.attr("transform", ev.transform)));
|
|
167
|
+
|
|
168
|
+
const sim = d3.forceSimulation(nodes)
|
|
169
|
+
.force("link", d3.forceLink(edges).id(d => d.id).distance(80).strength(0.5))
|
|
170
|
+
.force("charge", d3.forceManyBody().strength(-160))
|
|
171
|
+
.force("center", d3.forceCenter(window.innerWidth / 2, (window.innerHeight - 38) / 2))
|
|
172
|
+
.force("collide", d3.forceCollide().radius(d => (TYPE_RADIUS[d.type] || 7) + 4));
|
|
173
|
+
|
|
174
|
+
const link = viewport.append("g").attr("class", "links").selectAll("line")
|
|
175
|
+
.data(edges).join("line")
|
|
176
|
+
.attr("class", d => `link ${d.confidence || "medium"}`);
|
|
177
|
+
|
|
178
|
+
const edgeLabel = viewport.append("g").attr("class", "edge-labels").selectAll("text")
|
|
179
|
+
.data(edges).join("text")
|
|
180
|
+
.attr("class", "edge-label")
|
|
181
|
+
.text(d => d.type);
|
|
182
|
+
|
|
183
|
+
const node = viewport.append("g").attr("class", "nodes").selectAll("g.node")
|
|
184
|
+
.data(nodes).join("g")
|
|
185
|
+
.attr("class", "node")
|
|
186
|
+
.call(drag(sim));
|
|
187
|
+
|
|
188
|
+
node.append("circle")
|
|
189
|
+
.attr("r", d => TYPE_RADIUS[d.type] || 7)
|
|
190
|
+
.attr("fill", d => TYPE_COLORS[d.type] || "#888");
|
|
191
|
+
node.append("text")
|
|
192
|
+
.attr("dx", 11)
|
|
193
|
+
.attr("dy", 3)
|
|
194
|
+
.text(d => d.label || d.id);
|
|
195
|
+
|
|
196
|
+
node.on("click", (ev, d) => {
|
|
197
|
+
ev.stopPropagation();
|
|
198
|
+
openPanel(d);
|
|
199
|
+
});
|
|
200
|
+
svg.on("click", () => document.getElementById("panel").classList.remove("open"));
|
|
201
|
+
|
|
202
|
+
sim.on("tick", () => {
|
|
203
|
+
link.attr("x1", d => d.source.x).attr("y1", d => d.source.y)
|
|
204
|
+
.attr("x2", d => d.target.x).attr("y2", d => d.target.y);
|
|
205
|
+
edgeLabel.attr("x", d => (d.source.x + d.target.x) / 2)
|
|
206
|
+
.attr("y", d => (d.source.y + d.target.y) / 2);
|
|
207
|
+
node.attr("transform", d => `translate(${d.x},${d.y})`);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
function drag(sim) {
|
|
211
|
+
return d3.drag()
|
|
212
|
+
.on("start", (ev, d) => {
|
|
213
|
+
if (!ev.active) sim.alphaTarget(0.3).restart();
|
|
214
|
+
d.fx = d.x;
|
|
215
|
+
d.fy = d.y;
|
|
216
|
+
})
|
|
217
|
+
.on("drag", (ev, d) => {
|
|
218
|
+
d.fx = ev.x;
|
|
219
|
+
d.fy = ev.y;
|
|
220
|
+
})
|
|
221
|
+
.on("end", (ev, d) => {
|
|
222
|
+
if (!ev.active) sim.alphaTarget(0);
|
|
223
|
+
d.fx = null;
|
|
224
|
+
d.fy = null;
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function countBy(items, keyFn) {
|
|
229
|
+
const counts = new Map();
|
|
230
|
+
items.forEach(item => {
|
|
231
|
+
const key = keyFn(item) || "unknown";
|
|
232
|
+
counts.set(key, (counts.get(key) || 0) + 1);
|
|
233
|
+
});
|
|
234
|
+
return counts;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function shortLabel(value, max = 42) {
|
|
238
|
+
const text = String(value || "");
|
|
239
|
+
return text.length <= max ? text : `${text.slice(0, max - 1)}…`;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function renderSitrep() {
|
|
243
|
+
const sitrep = document.getElementById("sitrep");
|
|
244
|
+
const typeCounts = countBy(nodes, n => n.type);
|
|
245
|
+
const confidenceCounts = countBy(nodes, n => n.confidence);
|
|
246
|
+
const highConfidence = confidenceCounts.get("high") || 0;
|
|
247
|
+
const mentioned = new Set(originalEdges.filter(e => e.type === "MENTIONED_IN").map(e => e.src));
|
|
248
|
+
const nonSource = nodes.filter(n => n.type !== "source").length;
|
|
249
|
+
const provenance = nonSource ? Math.round((mentioned.size / nonSource) * 100) : 100;
|
|
250
|
+
const degree = new Map(nodes.map(n => [n.id, { in: 0, out: 0, total: 0 }]));
|
|
251
|
+
originalEdges.forEach(edge => {
|
|
252
|
+
if (degree.has(edge.src)) {
|
|
253
|
+
degree.get(edge.src).out += 1;
|
|
254
|
+
degree.get(edge.src).total += 1;
|
|
255
|
+
}
|
|
256
|
+
if (degree.has(edge.dst)) {
|
|
257
|
+
degree.get(edge.dst).in += 1;
|
|
258
|
+
degree.get(edge.dst).total += 1;
|
|
259
|
+
}
|
|
260
|
+
});
|
|
261
|
+
const topNodes = nodes
|
|
262
|
+
.map(node => ({ node, degree: degree.get(node.id) || { total: 0 } }))
|
|
263
|
+
.sort((a, b) => b.degree.total - a.degree.total || String(a.node.id).localeCompare(String(b.node.id)))
|
|
264
|
+
.slice(0, 5);
|
|
265
|
+
const latest = nodes
|
|
266
|
+
.slice()
|
|
267
|
+
.sort((a, b) => String(b.created_at || "").localeCompare(String(a.created_at || "")))
|
|
268
|
+
.slice(0, 4);
|
|
269
|
+
const maxType = Math.max(1, ...typeCounts.values());
|
|
270
|
+
const topTypes = Array.from(typeCounts.entries())
|
|
271
|
+
.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
|
|
272
|
+
.slice(0, 5);
|
|
273
|
+
|
|
274
|
+
sitrep.innerHTML = `
|
|
275
|
+
<div class="head">
|
|
276
|
+
<div class="title">SITREP</div>
|
|
277
|
+
<div class="state">embedded graph</div>
|
|
278
|
+
</div>
|
|
279
|
+
<div class="metrics">
|
|
280
|
+
<div class="metric"><div class="value">${nodes.length}</div><div class="label">nodes</div></div>
|
|
281
|
+
<div class="metric"><div class="value">${edges.length}</div><div class="label">edges</div></div>
|
|
282
|
+
<div class="metric"><div class="value">${provenance}%</div><div class="label">provenance</div></div>
|
|
283
|
+
<div class="metric"><div class="value">${highConfidence}</div><div class="label">high confidence</div></div>
|
|
284
|
+
</div>
|
|
285
|
+
<div class="block">
|
|
286
|
+
<div class="block-title">Top Connected</div>
|
|
287
|
+
${topNodes.map(({ node, degree }) => `
|
|
288
|
+
<button class="row" data-id="${escapeHtml(node.id)}">
|
|
289
|
+
<span class="row-label">${escapeHtml(shortLabel(node.label || node.id))}</span>
|
|
290
|
+
<span class="row-meta">${escapeHtml(node.type)} / ${degree.total}</span>
|
|
291
|
+
</button>
|
|
292
|
+
`).join("")}
|
|
293
|
+
</div>
|
|
294
|
+
<div class="block">
|
|
295
|
+
<div class="block-title">Node Mix</div>
|
|
296
|
+
<div class="type-bars">
|
|
297
|
+
${topTypes.map(([type, count]) => `
|
|
298
|
+
<div class="type-bar">
|
|
299
|
+
<span class="type-name">${escapeHtml(type)}</span>
|
|
300
|
+
<span class="bar-track"><span class="bar-fill" style="width:${Math.round((count / maxType) * 100)}%"></span></span>
|
|
301
|
+
<span class="type-count">${count}</span>
|
|
302
|
+
</div>
|
|
303
|
+
`).join("")}
|
|
304
|
+
</div>
|
|
305
|
+
</div>
|
|
306
|
+
<div class="block">
|
|
307
|
+
<div class="block-title">Latest Signal</div>
|
|
308
|
+
${latest.map(node => `
|
|
309
|
+
<button class="row" data-id="${escapeHtml(node.id)}">
|
|
310
|
+
<span class="row-label">${escapeHtml(shortLabel(node.label || node.id))}</span>
|
|
311
|
+
<span class="row-meta">${escapeHtml(node.type)}</span>
|
|
312
|
+
</button>
|
|
313
|
+
`).join("")}
|
|
314
|
+
</div>
|
|
315
|
+
`;
|
|
316
|
+
sitrep.querySelectorAll("[data-id]").forEach(el => {
|
|
317
|
+
el.addEventListener("click", ev => {
|
|
318
|
+
ev.stopPropagation();
|
|
319
|
+
const node = nodeById.get(el.dataset.id);
|
|
320
|
+
if (node) openPanel(node);
|
|
321
|
+
});
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function openPanel(n) {
|
|
326
|
+
const panel = document.getElementById("panel");
|
|
327
|
+
const out = originalEdges.filter(e => e.src === n.id);
|
|
328
|
+
const inc = originalEdges.filter(e => e.dst === n.id);
|
|
329
|
+
const prov = originalEdges.filter(e =>
|
|
330
|
+
(e.type === "MENTIONED_IN" || e.type === "MADE_AT") &&
|
|
331
|
+
(e.src === n.id || e.dst === n.id));
|
|
332
|
+
panel.classList.add("open");
|
|
333
|
+
panel.innerHTML = `
|
|
334
|
+
<span id="close">×</span>
|
|
335
|
+
<h3>${escapeHtml(n.label || n.id)}</h3>
|
|
336
|
+
<div class="meta">${escapeHtml(n.type)} · <code>${escapeHtml(n.id)}</code>
|
|
337
|
+
<span class="pill">${escapeHtml(n.confidence || "?")}</span></div>
|
|
338
|
+
${n.body ? `<div class="body">${escapeHtml(n.body)}</div>` : ""}
|
|
339
|
+
${prov.length ? `<div class="section">
|
|
340
|
+
<div class="section-title">provenance</div>
|
|
341
|
+
<ul>${prov.map(e => {
|
|
342
|
+
const sid = e.src === n.id ? e.dst : e.src;
|
|
343
|
+
const ex = e.excerpt ? `<div class="meta">"${escapeHtml(e.excerpt)}"</div>` : "";
|
|
344
|
+
return `<li><a data-id="${escapeHtml(sid)}">${escapeHtml(sid)}</a>${ex}</li>`;
|
|
345
|
+
}).join("")}</ul></div>` : ""}
|
|
346
|
+
${out.length ? `<div class="section">
|
|
347
|
+
<div class="section-title">outgoing (${out.length})</div>
|
|
348
|
+
<ul>${out.map(e =>
|
|
349
|
+
`<li>${escapeHtml(e.type)} → <a data-id="${escapeHtml(e.dst)}">${escapeHtml(e.dst)}</a></li>`).join("")}</ul></div>` : ""}
|
|
350
|
+
${inc.length ? `<div class="section">
|
|
351
|
+
<div class="section-title">incoming (${inc.length})</div>
|
|
352
|
+
<ul>${inc.map(e =>
|
|
353
|
+
`<li><a data-id="${escapeHtml(e.src)}">${escapeHtml(e.src)}</a> → ${escapeHtml(e.type)}</li>`).join("")}</ul></div>` : ""}
|
|
354
|
+
`;
|
|
355
|
+
document.getElementById("close").onclick = () => panel.classList.remove("open");
|
|
356
|
+
panel.querySelectorAll("a[data-id]").forEach(a => {
|
|
357
|
+
a.onclick = () => {
|
|
358
|
+
const target = nodeById.get(a.dataset.id);
|
|
359
|
+
if (target) openPanel(target);
|
|
360
|
+
};
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
})();
|
|
364
|
+
|
|
365
|
+
</script>
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _graph_payload(graph_path: Path) -> dict:
|
|
370
|
+
g = Graph.load(str(graph_path))
|
|
371
|
+
return {
|
|
372
|
+
"nodes": {nid: asdict(node) for nid, node in g.nodes.items()},
|
|
373
|
+
"edges": [asdict(edge) for edge in g.edges],
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def render_html(graph_path: Path, out_path: Path = HTML_PATH) -> Path:
|
|
378
|
+
payload = _graph_payload(graph_path)
|
|
379
|
+
graph_json = json.dumps(payload, ensure_ascii=False)
|
|
380
|
+
html = HTML_TEMPLATE.replace("__GRAPH_JSON__", graph_json.replace("</script", "<\\/script"))
|
|
381
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
382
|
+
out_path.write_text(html, encoding="utf-8")
|
|
383
|
+
return out_path
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _value_arg(args: list[str], name: str) -> str | None:
|
|
387
|
+
if name not in args:
|
|
388
|
+
return None
|
|
389
|
+
i = args.index(name)
|
|
390
|
+
if i + 1 >= len(args):
|
|
391
|
+
raise SystemExit(f"viz: {name} needs a path")
|
|
392
|
+
return args[i + 1]
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def run_viz(args: list[str]) -> int:
|
|
396
|
+
graph_arg = _value_arg(args, "--graph")
|
|
397
|
+
out_arg = _value_arg(args, "--out")
|
|
398
|
+
graph_path = Path(graph_arg).expanduser().resolve() if graph_arg else Path(resolve_graph_path())
|
|
399
|
+
out = Path(out_arg).expanduser().resolve() if out_arg else HTML_PATH
|
|
400
|
+
written = render_html(graph_path, out)
|
|
401
|
+
print(f"viz: wrote {written}")
|
|
402
|
+
if "--no-open" not in args:
|
|
403
|
+
webbrowser.open(written.resolve().as_uri())
|
|
404
|
+
print("viz: opened in default browser")
|
|
405
|
+
return 0
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
if __name__ == "__main__":
|
|
409
|
+
sys.exit(run_viz(sys.argv[1:]))
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""
|
|
2
|
+
eval_compare.py — Claude vs Gemma extraction A/B (v1.5).
|
|
3
|
+
|
|
4
|
+
Runs the same source markdown through both extractors (mygraph/extractor.py
|
|
5
|
+
for Claude, ollama_proxy/extractor_adapter.py for Gemma), then writes a single
|
|
6
|
+
record to eval_record.jsonl with kind="extractor_comparison".
|
|
7
|
+
|
|
8
|
+
Use this to feed the v1 eval corpus with comparative data — the hard signal
|
|
9
|
+
for "should we replace Claude with local Gemma?" lives here.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python ollama_proxy/eval_compare.py path/to/file.md
|
|
13
|
+
python ollama_proxy/eval_compare.py path/to/file.md --gemma-model gemma4:latest
|
|
14
|
+
python ollama_proxy/eval_compare.py path/to/file.md --claude-only
|
|
15
|
+
python ollama_proxy/eval_compare.py path/to/file.md --gemma-only
|
|
16
|
+
|
|
17
|
+
Compares (per-side):
|
|
18
|
+
- n_nodes, n_edges (raw)
|
|
19
|
+
- n_high / n_medium / n_low confidence
|
|
20
|
+
- validator outcome: accepted / demoted / rejected
|
|
21
|
+
- latency (wall-clock, seconds)
|
|
22
|
+
- which node IDs each side proposed (set diff)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import json
|
|
29
|
+
import os
|
|
30
|
+
import sys
|
|
31
|
+
import time
|
|
32
|
+
from datetime import datetime, timezone
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
# wire up sibling imports
|
|
36
|
+
_HERE = Path(__file__).resolve().parent
|
|
37
|
+
_MYGRAPH = _HERE.parent / "mygraph"
|
|
38
|
+
for _p in (_MYGRAPH, _HERE):
|
|
39
|
+
if str(_p) not in sys.path:
|
|
40
|
+
sys.path.insert(0, str(_p))
|
|
41
|
+
|
|
42
|
+
from validator import validate # noqa: E402
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _by_confidence(items: list[dict]) -> dict:
|
|
46
|
+
out = {"high": 0, "medium": 0, "low": 0, "other": 0}
|
|
47
|
+
for it in items:
|
|
48
|
+
c = it.get("confidence", "other")
|
|
49
|
+
out[c if c in out else "other"] += 1
|
|
50
|
+
return out
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _summarize(payload: dict, src_text: str, latency_s: float) -> dict:
|
|
54
|
+
nodes = payload.get("nodes", [])
|
|
55
|
+
edges = payload.get("edges", [])
|
|
56
|
+
try:
|
|
57
|
+
validated, manifest = validate(payload, src_text)
|
|
58
|
+
v_accepted_n = len(manifest.accepted_nodes)
|
|
59
|
+
v_accepted_e = len(manifest.accepted_edges)
|
|
60
|
+
v_demoted = len(manifest.demoted_nodes)
|
|
61
|
+
v_rejected_n = len(manifest.rejected_nodes)
|
|
62
|
+
v_rejected_e = len(manifest.rejected_edges)
|
|
63
|
+
validator_error = None
|
|
64
|
+
except Exception as e:
|
|
65
|
+
v_accepted_n = v_accepted_e = v_demoted = v_rejected_n = v_rejected_e = None
|
|
66
|
+
validator_error = str(e)
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
"n_nodes_raw": len(nodes),
|
|
70
|
+
"n_edges_raw": len(edges),
|
|
71
|
+
"node_confidence": _by_confidence(nodes),
|
|
72
|
+
"edge_confidence": _by_confidence(edges),
|
|
73
|
+
"validator": {
|
|
74
|
+
"accepted_nodes": v_accepted_n,
|
|
75
|
+
"accepted_edges": v_accepted_e,
|
|
76
|
+
"demoted_nodes": v_demoted,
|
|
77
|
+
"rejected_nodes": v_rejected_n,
|
|
78
|
+
"rejected_edges": v_rejected_e,
|
|
79
|
+
"error": validator_error,
|
|
80
|
+
},
|
|
81
|
+
"node_ids": sorted({n.get("id", "") for n in nodes}),
|
|
82
|
+
"latency_s": round(latency_s, 2),
|
|
83
|
+
"model": (payload.get("_meta") or {}).get("model"),
|
|
84
|
+
"backend": (payload.get("_meta") or {}).get("backend"),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _id_diff(a_summary: dict, b_summary: dict) -> dict:
|
|
89
|
+
a_ids = set(a_summary.get("node_ids", []))
|
|
90
|
+
b_ids = set(b_summary.get("node_ids", []))
|
|
91
|
+
return {
|
|
92
|
+
"shared": sorted(a_ids & b_ids),
|
|
93
|
+
"claude_only": sorted(a_ids - b_ids),
|
|
94
|
+
"gemma_only": sorted(b_ids - a_ids),
|
|
95
|
+
"jaccard": (len(a_ids & b_ids) / len(a_ids | b_ids)) if (a_ids | b_ids) else None,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def run(md_path: Path, claude_only: bool = False, gemma_only: bool = False,
|
|
100
|
+
gemma_model: str | None = None, claude_model: str | None = None) -> dict:
|
|
101
|
+
src_text = md_path.read_text(encoding="utf-8")
|
|
102
|
+
|
|
103
|
+
claude_summary: dict | None = None
|
|
104
|
+
gemma_summary: dict | None = None
|
|
105
|
+
claude_payload: dict | None = None
|
|
106
|
+
gemma_payload: dict | None = None
|
|
107
|
+
|
|
108
|
+
# Claude side
|
|
109
|
+
if not gemma_only:
|
|
110
|
+
from extractor import extract as claude_extract # noqa: E402
|
|
111
|
+
t0 = time.perf_counter()
|
|
112
|
+
kwargs = {"model": claude_model} if claude_model else {}
|
|
113
|
+
claude_payload = claude_extract(md_path, **kwargs)
|
|
114
|
+
claude_summary = _summarize(claude_payload, src_text, time.perf_counter() - t0)
|
|
115
|
+
|
|
116
|
+
# Gemma side
|
|
117
|
+
if not claude_only:
|
|
118
|
+
from extractor_adapter import extract as gemma_extract # noqa: E402
|
|
119
|
+
t0 = time.perf_counter()
|
|
120
|
+
kwargs = {"model": gemma_model} if gemma_model else {}
|
|
121
|
+
gemma_payload = gemma_extract(md_path, **kwargs)
|
|
122
|
+
gemma_summary = _summarize(gemma_payload, src_text, time.perf_counter() - t0)
|
|
123
|
+
|
|
124
|
+
record = {
|
|
125
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
126
|
+
"kind": "extractor_comparison",
|
|
127
|
+
"source_path": str(md_path),
|
|
128
|
+
"claude": claude_summary,
|
|
129
|
+
"gemma": gemma_summary,
|
|
130
|
+
}
|
|
131
|
+
if claude_summary and gemma_summary:
|
|
132
|
+
record["diff"] = _id_diff(claude_summary, gemma_summary)
|
|
133
|
+
|
|
134
|
+
# write to mygraph/eval_record.jsonl (canonical eval log location)
|
|
135
|
+
log_path = _MYGRAPH / "eval_record.jsonl"
|
|
136
|
+
with log_path.open("a", encoding="utf-8") as f:
|
|
137
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
138
|
+
return record
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def main(argv: list[str]) -> int:
|
|
142
|
+
p = argparse.ArgumentParser(description="A/B Claude vs Gemma extraction")
|
|
143
|
+
p.add_argument("source", help="path to source markdown")
|
|
144
|
+
p.add_argument("--claude-only", action="store_true")
|
|
145
|
+
p.add_argument("--gemma-only", action="store_true")
|
|
146
|
+
p.add_argument("--gemma-model", default=None,
|
|
147
|
+
help=f"override (default {os.environ.get('OLLAMA_DEFAULT_MODEL', 'gemma4:e4b')})")
|
|
148
|
+
p.add_argument("--claude-model", default=None,
|
|
149
|
+
help="override Claude model")
|
|
150
|
+
p.add_argument("--print", action="store_true", help="pretty-print the record to stdout")
|
|
151
|
+
args = p.parse_args(argv)
|
|
152
|
+
|
|
153
|
+
md = Path(args.source).expanduser().resolve()
|
|
154
|
+
if not md.exists():
|
|
155
|
+
print(f"eval_compare: not found: {md}", file=sys.stderr)
|
|
156
|
+
return 1
|
|
157
|
+
record = run(md, claude_only=args.claude_only, gemma_only=args.gemma_only,
|
|
158
|
+
gemma_model=args.gemma_model, claude_model=args.claude_model)
|
|
159
|
+
if args.print:
|
|
160
|
+
print(json.dumps(record, indent=2))
|
|
161
|
+
else:
|
|
162
|
+
# Compact summary for the terminal
|
|
163
|
+
c, g = record.get("claude"), record.get("gemma")
|
|
164
|
+
print(f"source: {md.name}")
|
|
165
|
+
if c:
|
|
166
|
+
print(f" claude ({c.get('model')}): {c['n_nodes_raw']} nodes / {c['n_edges_raw']} edges "
|
|
167
|
+
f"high={c['node_confidence']['high']} med={c['node_confidence']['medium']} low={c['node_confidence']['low']} "
|
|
168
|
+
f"{c['latency_s']}s")
|
|
169
|
+
v = c.get("validator", {})
|
|
170
|
+
print(f" validator: accepted {v.get('accepted_nodes')} / demoted {v.get('demoted_nodes')} / rejected {v.get('rejected_nodes')}")
|
|
171
|
+
if g:
|
|
172
|
+
print(f" gemma ({g.get('model')}): {g['n_nodes_raw']} nodes / {g['n_edges_raw']} edges "
|
|
173
|
+
f"high={g['node_confidence']['high']} med={g['node_confidence']['medium']} low={g['node_confidence']['low']} "
|
|
174
|
+
f"{g['latency_s']}s")
|
|
175
|
+
v = g.get("validator", {})
|
|
176
|
+
print(f" validator: accepted {v.get('accepted_nodes')} / demoted {v.get('demoted_nodes')} / rejected {v.get('rejected_nodes')}")
|
|
177
|
+
d = record.get("diff")
|
|
178
|
+
if d:
|
|
179
|
+
print(f" jaccard(node_ids) = {d['jaccard']:.2f} "
|
|
180
|
+
f"shared={len(d['shared'])} claude_only={len(d['claude_only'])} gemma_only={len(d['gemma_only'])}")
|
|
181
|
+
return 0
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
if __name__ == "__main__":
|
|
185
|
+
sys.exit(main(sys.argv[1:]))
|