knowledge-worker 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge_worker-0.6.0.dist-info/METADATA +365 -0
- knowledge_worker-0.6.0.dist-info/RECORD +27 -0
- knowledge_worker-0.6.0.dist-info/WHEEL +5 -0
- knowledge_worker-0.6.0.dist-info/entry_points.txt +3 -0
- knowledge_worker-0.6.0.dist-info/licenses/LICENSE +21 -0
- knowledge_worker-0.6.0.dist-info/top_level.txt +2 -0
- mygraph/__init__.py +23 -0
- mygraph/anthropic_client.py +199 -0
- mygraph/audit.py +137 -0
- mygraph/check.py +273 -0
- mygraph/discover.py +654 -0
- mygraph/eval_log.py +36 -0
- mygraph/export_context.py +124 -0
- mygraph/extractor.py +243 -0
- mygraph/extractor_openai.py +165 -0
- mygraph/ingest.py +170 -0
- mygraph/memory_audit.py +1094 -0
- mygraph/merge.py +133 -0
- mygraph/mygraph.py +773 -0
- mygraph/owl_io.py +202 -0
- mygraph/review.py +151 -0
- mygraph/validator.py +149 -0
- mygraph/viz.py +409 -0
- ollama_proxy/eval_compare.py +185 -0
- ollama_proxy/extractor_adapter.py +168 -0
- ollama_proxy/proxy.py +143 -0
- ollama_proxy/server.py +194 -0
mygraph/memory_audit.py
ADDED
|
@@ -0,0 +1,1094 @@
|
|
|
1
|
+
"""
|
|
2
|
+
memory_audit.py - read-only graph analytics and Memory Audit HTML.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
mykg audit --out analytics.json
|
|
6
|
+
mykg audit --out analytics.json --html memory_audit.html
|
|
7
|
+
|
|
8
|
+
The audit is intentionally local and deterministic. It uses the public Graph API
|
|
9
|
+
instead of reading graph JSON directly, keeps source/provenance edges separate
|
|
10
|
+
from semantic graph analytics, and writes generated artifacts only when asked.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import json
|
|
17
|
+
import math
|
|
18
|
+
import sys
|
|
19
|
+
from collections import Counter, defaultdict, deque
|
|
20
|
+
from dataclasses import asdict
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Iterable
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
from .mygraph import Edge, Graph
|
|
27
|
+
except ImportError: # direct script execution: python mygraph/memory_audit.py
|
|
28
|
+
from mygraph import Edge, Graph
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
PROVENANCE_EDGE_TYPES = {"MENTIONED_IN", "MADE_AT"}
|
|
32
|
+
CONFIDENCE_RANK = {"low": 0, "medium": 1, "high": 2}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _semantic_ids(g: Graph) -> list[str]:
|
|
36
|
+
return sorted(nid for nid, node in g.nodes.items() if node.type != "source")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _semantic_edges(g: Graph, ids: set[str]) -> list[Edge]:
|
|
40
|
+
return [
|
|
41
|
+
edge
|
|
42
|
+
for edge in g.edges
|
|
43
|
+
if edge.src in ids
|
|
44
|
+
and edge.dst in ids
|
|
45
|
+
and edge.type not in PROVENANCE_EDGE_TYPES
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _source_projection_edges(g: Graph, ids: set[str]) -> set[tuple[str, str]]:
|
|
50
|
+
"""Connect non-source nodes that share a source, without adding source nodes.
|
|
51
|
+
|
|
52
|
+
These edges are an audit-time projection only. They keep provenance useful
|
|
53
|
+
for topology while avoiding source nodes dominating centrality.
|
|
54
|
+
"""
|
|
55
|
+
by_source: dict[str, set[str]] = defaultdict(set)
|
|
56
|
+
for edge in g.edges:
|
|
57
|
+
if edge.type not in PROVENANCE_EDGE_TYPES:
|
|
58
|
+
continue
|
|
59
|
+
src = g.nodes.get(edge.src)
|
|
60
|
+
dst = g.nodes.get(edge.dst)
|
|
61
|
+
if src and src.type == "source" and edge.dst in ids:
|
|
62
|
+
by_source[edge.src].add(edge.dst)
|
|
63
|
+
elif dst and dst.type == "source" and edge.src in ids:
|
|
64
|
+
by_source[edge.dst].add(edge.src)
|
|
65
|
+
|
|
66
|
+
projected = set()
|
|
67
|
+
for members in by_source.values():
|
|
68
|
+
ordered = sorted(members)
|
|
69
|
+
for left, right in zip(ordered, ordered[1:]):
|
|
70
|
+
projected.add((left, right))
|
|
71
|
+
return projected
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _build_adjacency(ids: Iterable[str], edges: Iterable[Edge]) -> tuple[dict[str, set[str]], dict[str, set[str]]]:
|
|
75
|
+
directed = {nid: set() for nid in ids}
|
|
76
|
+
undirected = {nid: set() for nid in ids}
|
|
77
|
+
for edge in edges:
|
|
78
|
+
if edge.src not in directed or edge.dst not in directed:
|
|
79
|
+
continue
|
|
80
|
+
directed[edge.src].add(edge.dst)
|
|
81
|
+
undirected[edge.src].add(edge.dst)
|
|
82
|
+
undirected[edge.dst].add(edge.src)
|
|
83
|
+
return directed, undirected
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _add_projection(
|
|
87
|
+
directed: dict[str, set[str]],
|
|
88
|
+
undirected: dict[str, set[str]],
|
|
89
|
+
projected_edges: Iterable[tuple[str, str]],
|
|
90
|
+
) -> None:
|
|
91
|
+
for left, right in projected_edges:
|
|
92
|
+
if left not in directed or right not in directed:
|
|
93
|
+
continue
|
|
94
|
+
directed[left].add(right)
|
|
95
|
+
directed[right].add(left)
|
|
96
|
+
undirected[left].add(right)
|
|
97
|
+
undirected[right].add(left)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _degree(undirected: dict[str, set[str]]) -> dict[str, int]:
|
|
101
|
+
return {nid: len(neighbors) for nid, neighbors in undirected.items()}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _directed_counts(ids: Iterable[str], edges: Iterable[Edge]) -> tuple[dict[str, int], dict[str, int]]:
|
|
105
|
+
in_degree = {nid: 0 for nid in ids}
|
|
106
|
+
out_degree = {nid: 0 for nid in ids}
|
|
107
|
+
for edge in edges:
|
|
108
|
+
if edge.src not in out_degree or edge.dst not in in_degree:
|
|
109
|
+
continue
|
|
110
|
+
out_degree[edge.src] += 1
|
|
111
|
+
in_degree[edge.dst] += 1
|
|
112
|
+
return in_degree, out_degree
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _directed_edge_types(ids: Iterable[str], edges: Iterable[Edge]) -> tuple[dict[str, Counter], dict[str, Counter]]:
|
|
116
|
+
in_types = {nid: Counter() for nid in ids}
|
|
117
|
+
out_types = {nid: Counter() for nid in ids}
|
|
118
|
+
for edge in edges:
|
|
119
|
+
if edge.src in out_types and edge.dst in in_types:
|
|
120
|
+
out_types[edge.src][edge.type] += 1
|
|
121
|
+
in_types[edge.dst][edge.type] += 1
|
|
122
|
+
return in_types, out_types
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _pagerank(
|
|
126
|
+
ids: list[str],
|
|
127
|
+
directed: dict[str, set[str]],
|
|
128
|
+
damping: float = 0.85,
|
|
129
|
+
iterations: int = 100,
|
|
130
|
+
tolerance: float = 1.0e-12,
|
|
131
|
+
) -> dict[str, float]:
|
|
132
|
+
n = len(ids)
|
|
133
|
+
if n == 0:
|
|
134
|
+
return {}
|
|
135
|
+
score = {nid: 1.0 / n for nid in ids}
|
|
136
|
+
base = (1.0 - damping) / n
|
|
137
|
+
for _ in range(iterations):
|
|
138
|
+
next_score = {nid: base for nid in ids}
|
|
139
|
+
sink_mass = sum(score[nid] for nid in ids if not directed[nid])
|
|
140
|
+
sink_share = damping * sink_mass / n
|
|
141
|
+
for nid in ids:
|
|
142
|
+
next_score[nid] += sink_share
|
|
143
|
+
for src in ids:
|
|
144
|
+
targets = directed[src]
|
|
145
|
+
if not targets:
|
|
146
|
+
continue
|
|
147
|
+
share = damping * score[src] / len(targets)
|
|
148
|
+
for dst in targets:
|
|
149
|
+
next_score[dst] += share
|
|
150
|
+
delta = sum(abs(next_score[nid] - score[nid]) for nid in ids)
|
|
151
|
+
score = next_score
|
|
152
|
+
if delta < tolerance:
|
|
153
|
+
break
|
|
154
|
+
return score
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _betweenness(ids: list[str], adjacency: dict[str, set[str]]) -> dict[str, float]:
|
|
158
|
+
"""Brandes betweenness centrality for an undirected, unweighted graph."""
|
|
159
|
+
centrality = {nid: 0.0 for nid in ids}
|
|
160
|
+
for source in ids:
|
|
161
|
+
stack: list[str] = []
|
|
162
|
+
predecessors = {nid: [] for nid in ids}
|
|
163
|
+
sigma = {nid: 0.0 for nid in ids}
|
|
164
|
+
sigma[source] = 1.0
|
|
165
|
+
distance = {nid: -1 for nid in ids}
|
|
166
|
+
distance[source] = 0
|
|
167
|
+
queue = deque([source])
|
|
168
|
+
|
|
169
|
+
while queue:
|
|
170
|
+
current = queue.popleft()
|
|
171
|
+
stack.append(current)
|
|
172
|
+
for neighbor in adjacency[current]:
|
|
173
|
+
if distance[neighbor] < 0:
|
|
174
|
+
queue.append(neighbor)
|
|
175
|
+
distance[neighbor] = distance[current] + 1
|
|
176
|
+
if distance[neighbor] == distance[current] + 1:
|
|
177
|
+
sigma[neighbor] += sigma[current]
|
|
178
|
+
predecessors[neighbor].append(current)
|
|
179
|
+
|
|
180
|
+
delta = {nid: 0.0 for nid in ids}
|
|
181
|
+
while stack:
|
|
182
|
+
node_id = stack.pop()
|
|
183
|
+
for predecessor in predecessors[node_id]:
|
|
184
|
+
if sigma[node_id]:
|
|
185
|
+
share = (sigma[predecessor] / sigma[node_id]) * (1.0 + delta[node_id])
|
|
186
|
+
delta[predecessor] += share
|
|
187
|
+
if node_id != source:
|
|
188
|
+
centrality[node_id] += delta[node_id]
|
|
189
|
+
|
|
190
|
+
# Undirected paths are counted twice.
|
|
191
|
+
for nid in centrality:
|
|
192
|
+
centrality[nid] /= 2.0
|
|
193
|
+
n = len(ids)
|
|
194
|
+
if n > 2:
|
|
195
|
+
scale = 2.0 / ((n - 1) * (n - 2))
|
|
196
|
+
for nid in centrality:
|
|
197
|
+
centrality[nid] *= scale
|
|
198
|
+
return centrality
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _edge_betweenness(ids: list[str], adjacency: dict[str, set[str]]) -> dict[tuple[str, str], float]:
|
|
202
|
+
"""Brandes edge betweenness for undirected community splitting."""
|
|
203
|
+
edge_scores: dict[tuple[str, str], float] = defaultdict(float)
|
|
204
|
+
for source in ids:
|
|
205
|
+
stack: list[str] = []
|
|
206
|
+
predecessors = {nid: [] for nid in ids}
|
|
207
|
+
sigma = {nid: 0.0 for nid in ids}
|
|
208
|
+
sigma[source] = 1.0
|
|
209
|
+
distance = {nid: -1 for nid in ids}
|
|
210
|
+
distance[source] = 0
|
|
211
|
+
queue = deque([source])
|
|
212
|
+
|
|
213
|
+
while queue:
|
|
214
|
+
current = queue.popleft()
|
|
215
|
+
stack.append(current)
|
|
216
|
+
for neighbor in adjacency[current]:
|
|
217
|
+
if distance[neighbor] < 0:
|
|
218
|
+
queue.append(neighbor)
|
|
219
|
+
distance[neighbor] = distance[current] + 1
|
|
220
|
+
if distance[neighbor] == distance[current] + 1:
|
|
221
|
+
sigma[neighbor] += sigma[current]
|
|
222
|
+
predecessors[neighbor].append(current)
|
|
223
|
+
|
|
224
|
+
delta = {nid: 0.0 for nid in ids}
|
|
225
|
+
while stack:
|
|
226
|
+
node_id = stack.pop()
|
|
227
|
+
for predecessor in predecessors[node_id]:
|
|
228
|
+
if not sigma[node_id]:
|
|
229
|
+
continue
|
|
230
|
+
contribution = (sigma[predecessor] / sigma[node_id]) * (1.0 + delta[node_id])
|
|
231
|
+
edge_scores[tuple(sorted((predecessor, node_id)))] += contribution
|
|
232
|
+
delta[predecessor] += contribution
|
|
233
|
+
|
|
234
|
+
for edge_key in list(edge_scores):
|
|
235
|
+
edge_scores[edge_key] /= 2.0
|
|
236
|
+
return dict(edge_scores)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _connected_components(ids: Iterable[str], adjacency: dict[str, set[str]]) -> list[list[str]]:
|
|
240
|
+
remaining = set(ids)
|
|
241
|
+
components: list[list[str]] = []
|
|
242
|
+
while remaining:
|
|
243
|
+
start = min(remaining)
|
|
244
|
+
queue = deque([start])
|
|
245
|
+
remaining.remove(start)
|
|
246
|
+
component = []
|
|
247
|
+
while queue:
|
|
248
|
+
current = queue.popleft()
|
|
249
|
+
component.append(current)
|
|
250
|
+
for neighbor in sorted(adjacency[current]):
|
|
251
|
+
if neighbor in remaining:
|
|
252
|
+
remaining.remove(neighbor)
|
|
253
|
+
queue.append(neighbor)
|
|
254
|
+
components.append(sorted(component))
|
|
255
|
+
return sorted(components, key=lambda c: (-len(c), c[0] if c else ""))
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _core_numbers(ids: list[str], adjacency: dict[str, set[str]]) -> dict[str, int]:
|
|
259
|
+
remaining = set(ids)
|
|
260
|
+
core = {nid: 0 for nid in ids}
|
|
261
|
+
k = 0
|
|
262
|
+
while remaining:
|
|
263
|
+
removed_at_k = []
|
|
264
|
+
changed = True
|
|
265
|
+
while changed:
|
|
266
|
+
changed = False
|
|
267
|
+
for nid in sorted(remaining):
|
|
268
|
+
degree = sum(1 for neighbor in adjacency[nid] if neighbor in remaining)
|
|
269
|
+
if degree <= k:
|
|
270
|
+
removed_at_k.append(nid)
|
|
271
|
+
remaining.remove(nid)
|
|
272
|
+
changed = True
|
|
273
|
+
if removed_at_k:
|
|
274
|
+
for nid in removed_at_k:
|
|
275
|
+
core[nid] = k
|
|
276
|
+
else:
|
|
277
|
+
k += 1
|
|
278
|
+
return core
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _community_partition(
|
|
282
|
+
ids: list[str],
|
|
283
|
+
adjacency: dict[str, set[str]],
|
|
284
|
+
max_communities: int = 12,
|
|
285
|
+
) -> dict[str, int]:
|
|
286
|
+
if not ids:
|
|
287
|
+
return {}
|
|
288
|
+
target = min(max_communities, max(1, round(math.sqrt(len(ids)))))
|
|
289
|
+
current = {nid: set(neighbors) for nid, neighbors in adjacency.items()}
|
|
290
|
+
components = _connected_components(ids, current)
|
|
291
|
+
max_removals = sum(len(neighbors) for neighbors in current.values()) // 2
|
|
292
|
+
removals = 0
|
|
293
|
+
|
|
294
|
+
while len(components) < target and removals < max_removals:
|
|
295
|
+
splittable = [component for component in components if len(component) > 2]
|
|
296
|
+
if not splittable:
|
|
297
|
+
break
|
|
298
|
+
largest = splittable[0]
|
|
299
|
+
subgraph = {nid: current[nid] & set(largest) for nid in largest}
|
|
300
|
+
edge_scores = _edge_betweenness(largest, subgraph)
|
|
301
|
+
if not edge_scores:
|
|
302
|
+
break
|
|
303
|
+
edge_to_remove = sorted(edge_scores.items(), key=lambda item: (-item[1], item[0]))[0][0]
|
|
304
|
+
a, b = edge_to_remove
|
|
305
|
+
current[a].discard(b)
|
|
306
|
+
current[b].discard(a)
|
|
307
|
+
removals += 1
|
|
308
|
+
components = _connected_components(ids, current)
|
|
309
|
+
|
|
310
|
+
partition = {}
|
|
311
|
+
for community_id, members in enumerate(components):
|
|
312
|
+
for nid in members:
|
|
313
|
+
partition[nid] = community_id
|
|
314
|
+
return partition
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _confidence_is_weak(confidence: str | None) -> bool:
|
|
318
|
+
return (confidence or "high") != "high"
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def _node_record(g: Graph, node_id: str, **metrics: object) -> dict:
|
|
322
|
+
node = g.nodes[node_id]
|
|
323
|
+
record = {
|
|
324
|
+
"id": node.id,
|
|
325
|
+
"type": node.type,
|
|
326
|
+
"label": node.label,
|
|
327
|
+
"confidence": node.confidence,
|
|
328
|
+
}
|
|
329
|
+
record.update(metrics)
|
|
330
|
+
return record
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def _edge_record(g: Graph, edge: Edge, index: int | None = None) -> dict:
|
|
334
|
+
record = {
|
|
335
|
+
"src": edge.src,
|
|
336
|
+
"dst": edge.dst,
|
|
337
|
+
"type": edge.type,
|
|
338
|
+
"source_id": edge.source_id,
|
|
339
|
+
"confidence": edge.confidence,
|
|
340
|
+
"excerpt": edge.excerpt,
|
|
341
|
+
}
|
|
342
|
+
if index is not None:
|
|
343
|
+
record["index"] = index
|
|
344
|
+
if edge.src in g.nodes:
|
|
345
|
+
record["src_label"] = g.nodes[edge.src].label
|
|
346
|
+
record["src_type"] = g.nodes[edge.src].type
|
|
347
|
+
if edge.dst in g.nodes:
|
|
348
|
+
record["dst_label"] = g.nodes[edge.dst].label
|
|
349
|
+
record["dst_type"] = g.nodes[edge.dst].type
|
|
350
|
+
return record
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _ranked_nodes(
|
|
354
|
+
g: Graph,
|
|
355
|
+
scores: dict[str, float],
|
|
356
|
+
degree: dict[str, int],
|
|
357
|
+
core: dict[str, int],
|
|
358
|
+
communities: dict[str, int],
|
|
359
|
+
limit: int,
|
|
360
|
+
*,
|
|
361
|
+
include_zero: bool = False,
|
|
362
|
+
) -> list[dict]:
|
|
363
|
+
ranked = sorted(
|
|
364
|
+
scores.items(),
|
|
365
|
+
key=lambda item: (-item[1], -degree.get(item[0], 0), g.nodes[item[0]].label.lower()),
|
|
366
|
+
)
|
|
367
|
+
out = []
|
|
368
|
+
for node_id, score in ranked:
|
|
369
|
+
if not include_zero and score <= 0:
|
|
370
|
+
continue
|
|
371
|
+
out.append(
|
|
372
|
+
_node_record(
|
|
373
|
+
g,
|
|
374
|
+
node_id,
|
|
375
|
+
score=score,
|
|
376
|
+
degree=degree.get(node_id, 0),
|
|
377
|
+
core_number=core.get(node_id, 0),
|
|
378
|
+
community=communities.get(node_id),
|
|
379
|
+
)
|
|
380
|
+
)
|
|
381
|
+
if len(out) >= limit:
|
|
382
|
+
break
|
|
383
|
+
return out
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _provenance_coverage(g: Graph) -> dict:
|
|
387
|
+
mentioned = set()
|
|
388
|
+
mentioned_with_excerpt = set()
|
|
389
|
+
provenance_edges = []
|
|
390
|
+
for edge in g.edges:
|
|
391
|
+
if edge.type not in PROVENANCE_EDGE_TYPES:
|
|
392
|
+
continue
|
|
393
|
+
provenance_edges.append(edge)
|
|
394
|
+
if edge.src in g.nodes and g.nodes[edge.src].type != "source":
|
|
395
|
+
mentioned.add(edge.src)
|
|
396
|
+
if edge.excerpt:
|
|
397
|
+
mentioned_with_excerpt.add(edge.src)
|
|
398
|
+
if edge.dst in g.nodes and g.nodes[edge.dst].type != "source":
|
|
399
|
+
mentioned.add(edge.dst)
|
|
400
|
+
if edge.excerpt:
|
|
401
|
+
mentioned_with_excerpt.add(edge.dst)
|
|
402
|
+
|
|
403
|
+
non_source_nodes = [nid for nid, node in g.nodes.items() if node.type != "source"]
|
|
404
|
+
missing_nodes = [nid for nid in non_source_nodes if nid not in mentioned]
|
|
405
|
+
edges_with_source_id = [edge for edge in g.edges if edge.source_id]
|
|
406
|
+
edges_missing_source_id = [edge for edge in g.edges if not edge.source_id]
|
|
407
|
+
provenance_with_excerpt = [edge for edge in provenance_edges if edge.excerpt]
|
|
408
|
+
|
|
409
|
+
def ratio(numerator: int, denominator: int) -> float:
|
|
410
|
+
return 1.0 if denominator == 0 else numerator / denominator
|
|
411
|
+
|
|
412
|
+
return {
|
|
413
|
+
"node_coverage": ratio(len(mentioned), len(non_source_nodes)),
|
|
414
|
+
"excerpt_coverage": ratio(len(provenance_with_excerpt), len(provenance_edges)),
|
|
415
|
+
"edge_source_coverage": ratio(len(edges_with_source_id), len(g.edges)),
|
|
416
|
+
"non_source_nodes": len(non_source_nodes),
|
|
417
|
+
"nodes_with_provenance": len(mentioned),
|
|
418
|
+
"nodes_with_provenance_excerpt": len(mentioned_with_excerpt),
|
|
419
|
+
"missing_nodes": [_node_record(g, nid) for nid in sorted(missing_nodes)],
|
|
420
|
+
"edges_total": len(g.edges),
|
|
421
|
+
"edges_with_source_id": len(edges_with_source_id),
|
|
422
|
+
"edges_missing_source_id": [
|
|
423
|
+
_edge_record(g, edge, index)
|
|
424
|
+
for index, edge in enumerate(g.edges)
|
|
425
|
+
if not edge.source_id
|
|
426
|
+
],
|
|
427
|
+
"provenance_edges": len(provenance_edges),
|
|
428
|
+
"provenance_edges_with_excerpt": len(provenance_with_excerpt),
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def _proof_trail(g: Graph, node_ids: list[str], limit: int) -> list[dict]:
|
|
433
|
+
out = []
|
|
434
|
+
seen = set()
|
|
435
|
+
for node_id in node_ids:
|
|
436
|
+
if node_id in seen or node_id not in g.nodes:
|
|
437
|
+
continue
|
|
438
|
+
seen.add(node_id)
|
|
439
|
+
provenance = []
|
|
440
|
+
for source_id, excerpt in g.provenance(node_id):
|
|
441
|
+
source = g.nodes.get(source_id)
|
|
442
|
+
provenance.append(
|
|
443
|
+
{
|
|
444
|
+
"source_id": source_id,
|
|
445
|
+
"source_label": source.label if source else source_id,
|
|
446
|
+
"excerpt": excerpt,
|
|
447
|
+
}
|
|
448
|
+
)
|
|
449
|
+
if not provenance:
|
|
450
|
+
continue
|
|
451
|
+
out.append(_node_record(g, node_id, provenance=provenance))
|
|
452
|
+
if len(out) >= limit:
|
|
453
|
+
break
|
|
454
|
+
return out
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def _weak_claims(g: Graph, coverage: dict, limit: int) -> list[dict]:
|
|
458
|
+
claims = []
|
|
459
|
+
for node_id, node in g.nodes.items():
|
|
460
|
+
if node.type == "source" or not _confidence_is_weak(node.confidence):
|
|
461
|
+
continue
|
|
462
|
+
claims.append({"kind": "node_confidence", **_node_record(g, node_id)})
|
|
463
|
+
for index, edge in enumerate(g.edges):
|
|
464
|
+
if _confidence_is_weak(edge.confidence):
|
|
465
|
+
claims.append({"kind": "edge_confidence", **_edge_record(g, edge, index)})
|
|
466
|
+
for node in coverage["missing_nodes"]:
|
|
467
|
+
claims.append({"kind": "missing_node_provenance", **node})
|
|
468
|
+
for edge in coverage["edges_missing_source_id"]:
|
|
469
|
+
claims.append({"kind": "missing_edge_source_id", **edge})
|
|
470
|
+
|
|
471
|
+
def sort_key(claim: dict) -> tuple:
|
|
472
|
+
confidence = claim.get("confidence")
|
|
473
|
+
return (
|
|
474
|
+
CONFIDENCE_RANK.get(str(confidence), -1),
|
|
475
|
+
claim.get("kind", ""),
|
|
476
|
+
claim.get("id") or claim.get("src") or "",
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
return sorted(claims, key=sort_key)[:limit]
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def _idea_flow_records(
|
|
483
|
+
g: Graph,
|
|
484
|
+
ids: list[str],
|
|
485
|
+
in_degree: dict[str, int],
|
|
486
|
+
out_degree: dict[str, int],
|
|
487
|
+
in_types: dict[str, Counter],
|
|
488
|
+
out_types: dict[str, Counter],
|
|
489
|
+
communities: dict[str, int],
|
|
490
|
+
limit: int,
|
|
491
|
+
*,
|
|
492
|
+
mode: str,
|
|
493
|
+
) -> list[dict]:
|
|
494
|
+
if mode not in {"attractor", "generator"}:
|
|
495
|
+
raise ValueError(f"unknown idea flow mode: {mode}")
|
|
496
|
+
|
|
497
|
+
idea_ids = [nid for nid in ids if g.nodes[nid].type == "idea"]
|
|
498
|
+
|
|
499
|
+
def score(node_id: str) -> int:
|
|
500
|
+
if mode == "attractor":
|
|
501
|
+
return in_degree.get(node_id, 0) - out_degree.get(node_id, 0)
|
|
502
|
+
return out_degree.get(node_id, 0) - in_degree.get(node_id, 0)
|
|
503
|
+
|
|
504
|
+
ranked = sorted(
|
|
505
|
+
idea_ids,
|
|
506
|
+
key=lambda nid: (
|
|
507
|
+
-score(nid),
|
|
508
|
+
-max(in_degree.get(nid, 0), out_degree.get(nid, 0)),
|
|
509
|
+
g.nodes[nid].label.lower(),
|
|
510
|
+
),
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
records = []
|
|
514
|
+
for node_id in ranked:
|
|
515
|
+
if mode == "attractor":
|
|
516
|
+
if in_degree.get(node_id, 0) < 1 or score(node_id) <= 0:
|
|
517
|
+
continue
|
|
518
|
+
prompt = (
|
|
519
|
+
"Is this a durable principle, an unresolved sink, or an over-compressed label? "
|
|
520
|
+
"Write one next action."
|
|
521
|
+
)
|
|
522
|
+
else:
|
|
523
|
+
if out_degree.get(node_id, 0) < 1 or score(node_id) <= 0:
|
|
524
|
+
continue
|
|
525
|
+
prompt = (
|
|
526
|
+
"Which branch deserves leg work next? Choose one edge to operationalize, "
|
|
527
|
+
"verify, or prune."
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
records.append(
|
|
531
|
+
_node_record(
|
|
532
|
+
g,
|
|
533
|
+
node_id,
|
|
534
|
+
score=float(score(node_id)),
|
|
535
|
+
in_degree=in_degree.get(node_id, 0),
|
|
536
|
+
out_degree=out_degree.get(node_id, 0),
|
|
537
|
+
flow_balance=score(node_id),
|
|
538
|
+
inbound_edge_types=dict(in_types.get(node_id, Counter())),
|
|
539
|
+
outbound_edge_types=dict(out_types.get(node_id, Counter())),
|
|
540
|
+
community=communities.get(node_id),
|
|
541
|
+
prompt=prompt,
|
|
542
|
+
)
|
|
543
|
+
)
|
|
544
|
+
if len(records) >= limit:
|
|
545
|
+
break
|
|
546
|
+
return records
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def _weak_claim_queue(claims: list[dict], limit: int) -> list[dict]:
|
|
550
|
+
queue = []
|
|
551
|
+
for claim in claims[:limit]:
|
|
552
|
+
prompt = "Choose: verify, downgrade, convert to question, ignore for now."
|
|
553
|
+
if claim.get("kind") == "missing_node_provenance":
|
|
554
|
+
prompt = "Find source evidence or keep this out of durable memory."
|
|
555
|
+
elif claim.get("kind") == "missing_edge_source_id":
|
|
556
|
+
prompt = "Attach a source id or remove this edge from the durable graph."
|
|
557
|
+
elif claim.get("kind") == "edge_confidence":
|
|
558
|
+
prompt = "Inspect this relationship: verify it, downgrade it, or turn it into an open question."
|
|
559
|
+
queue.append(
|
|
560
|
+
{
|
|
561
|
+
**claim,
|
|
562
|
+
"prompt": prompt,
|
|
563
|
+
"review_options": ["verify", "downgrade", "convert_to_question", "ignore_for_now"],
|
|
564
|
+
}
|
|
565
|
+
)
|
|
566
|
+
return queue
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
def _community_records(
|
|
570
|
+
g: Graph,
|
|
571
|
+
communities: dict[str, int],
|
|
572
|
+
pagerank: dict[str, float],
|
|
573
|
+
degree: dict[str, int],
|
|
574
|
+
core: dict[str, int],
|
|
575
|
+
) -> list[dict]:
|
|
576
|
+
grouped: dict[int, list[str]] = defaultdict(list)
|
|
577
|
+
for node_id, community_id in communities.items():
|
|
578
|
+
grouped[community_id].append(node_id)
|
|
579
|
+
|
|
580
|
+
records = []
|
|
581
|
+
for community_id, members in sorted(grouped.items(), key=lambda item: (-len(item[1]), item[0])):
|
|
582
|
+
members = sorted(members)
|
|
583
|
+
top_members = sorted(
|
|
584
|
+
members,
|
|
585
|
+
key=lambda nid: (-pagerank.get(nid, 0.0), -degree.get(nid, 0), g.nodes[nid].label.lower()),
|
|
586
|
+
)[:12]
|
|
587
|
+
records.append(
|
|
588
|
+
{
|
|
589
|
+
"id": community_id,
|
|
590
|
+
"size": len(members),
|
|
591
|
+
"types": dict(Counter(g.nodes[nid].type for nid in members)),
|
|
592
|
+
"members": members,
|
|
593
|
+
"top_members": [
|
|
594
|
+
_node_record(
|
|
595
|
+
g,
|
|
596
|
+
nid,
|
|
597
|
+
score=pagerank.get(nid, 0.0),
|
|
598
|
+
degree=degree.get(nid, 0),
|
|
599
|
+
core_number=core.get(nid, 0),
|
|
600
|
+
)
|
|
601
|
+
for nid in top_members
|
|
602
|
+
],
|
|
603
|
+
}
|
|
604
|
+
)
|
|
605
|
+
return records
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def build_memory_audit(g: Graph, *, limit: int = 25, max_communities: int = 12) -> dict:
|
|
609
|
+
ids = _semantic_ids(g)
|
|
610
|
+
id_set = set(ids)
|
|
611
|
+
semantic_edges = _semantic_edges(g, id_set)
|
|
612
|
+
projection_edges = _source_projection_edges(g, id_set)
|
|
613
|
+
directed, undirected = _build_adjacency(ids, semantic_edges)
|
|
614
|
+
_add_projection(directed, undirected, projection_edges)
|
|
615
|
+
degree = _degree(undirected)
|
|
616
|
+
pagerank = _pagerank(ids, directed)
|
|
617
|
+
betweenness = _betweenness(ids, undirected)
|
|
618
|
+
core = _core_numbers(ids, undirected)
|
|
619
|
+
communities = _community_partition(ids, undirected, max_communities=max_communities)
|
|
620
|
+
coverage = _provenance_coverage(g)
|
|
621
|
+
semantic_in_degree, semantic_out_degree = _directed_counts(ids, semantic_edges)
|
|
622
|
+
semantic_in_types, semantic_out_types = _directed_edge_types(ids, semantic_edges)
|
|
623
|
+
|
|
624
|
+
important = _ranked_nodes(g, pagerank, degree, core, communities, limit, include_zero=True)
|
|
625
|
+
bridges = _ranked_nodes(g, betweenness, degree, core, communities, limit)
|
|
626
|
+
structural_core = _ranked_nodes(
|
|
627
|
+
g,
|
|
628
|
+
{nid: float(core.get(nid, 0)) for nid in ids},
|
|
629
|
+
degree,
|
|
630
|
+
core,
|
|
631
|
+
communities,
|
|
632
|
+
limit,
|
|
633
|
+
include_zero=True,
|
|
634
|
+
)
|
|
635
|
+
proof_ids = [record["id"] for record in important] + [record["id"] for record in bridges]
|
|
636
|
+
weak_claims = _weak_claims(g, coverage, limit)
|
|
637
|
+
idea_attractors = _idea_flow_records(
|
|
638
|
+
g,
|
|
639
|
+
ids,
|
|
640
|
+
semantic_in_degree,
|
|
641
|
+
semantic_out_degree,
|
|
642
|
+
semantic_in_types,
|
|
643
|
+
semantic_out_types,
|
|
644
|
+
communities,
|
|
645
|
+
limit,
|
|
646
|
+
mode="attractor",
|
|
647
|
+
)
|
|
648
|
+
idea_generators = _idea_flow_records(
|
|
649
|
+
g,
|
|
650
|
+
ids,
|
|
651
|
+
semantic_in_degree,
|
|
652
|
+
semantic_out_degree,
|
|
653
|
+
semantic_in_types,
|
|
654
|
+
semantic_out_types,
|
|
655
|
+
communities,
|
|
656
|
+
limit,
|
|
657
|
+
mode="generator",
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
return {
|
|
661
|
+
"schema_version": "memory-audit/v1",
|
|
662
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
663
|
+
"stats": {
|
|
664
|
+
"nodes": len(g.nodes),
|
|
665
|
+
"edges": len(g.edges),
|
|
666
|
+
"semantic_nodes": len(ids),
|
|
667
|
+
"semantic_edges": len(semantic_edges),
|
|
668
|
+
"source_projection_edges": len(projection_edges),
|
|
669
|
+
"audit_edges": len({tuple(sorted((edge.src, edge.dst))) for edge in semantic_edges} | projection_edges),
|
|
670
|
+
"source_nodes": sum(1 for node in g.nodes.values() if node.type == "source"),
|
|
671
|
+
"semantic_components": len(_connected_components(ids, undirected)) if ids else 0,
|
|
672
|
+
"communities": len(set(communities.values())),
|
|
673
|
+
"max_core_number": max(core.values()) if core else 0,
|
|
674
|
+
},
|
|
675
|
+
"counts": {
|
|
676
|
+
"node_types": dict(Counter(node.type for node in g.nodes.values())),
|
|
677
|
+
"edge_types": dict(Counter(edge.type for edge in g.edges)),
|
|
678
|
+
"confidence": {
|
|
679
|
+
"nodes": dict(Counter(node.confidence for node in g.nodes.values())),
|
|
680
|
+
"edges": dict(Counter(edge.confidence for edge in g.edges)),
|
|
681
|
+
},
|
|
682
|
+
},
|
|
683
|
+
"ranked": {
|
|
684
|
+
"important_concepts": important,
|
|
685
|
+
"bridge_ideas": bridges,
|
|
686
|
+
"idea_attractors": idea_attractors,
|
|
687
|
+
"idea_generators": idea_generators,
|
|
688
|
+
"structural_core": structural_core,
|
|
689
|
+
"weak_claims": weak_claims,
|
|
690
|
+
"weak_claim_queue": _weak_claim_queue(weak_claims, limit),
|
|
691
|
+
"proof_trail": _proof_trail(g, proof_ids, limit),
|
|
692
|
+
},
|
|
693
|
+
"centrality": {
|
|
694
|
+
"pagerank": important,
|
|
695
|
+
"betweenness": bridges,
|
|
696
|
+
"core_number": structural_core,
|
|
697
|
+
"semantic_in_degree": _ranked_nodes(
|
|
698
|
+
g,
|
|
699
|
+
{nid: float(semantic_in_degree.get(nid, 0)) for nid in ids},
|
|
700
|
+
degree,
|
|
701
|
+
core,
|
|
702
|
+
communities,
|
|
703
|
+
limit,
|
|
704
|
+
),
|
|
705
|
+
"semantic_out_degree": _ranked_nodes(
|
|
706
|
+
g,
|
|
707
|
+
{nid: float(semantic_out_degree.get(nid, 0)) for nid in ids},
|
|
708
|
+
degree,
|
|
709
|
+
core,
|
|
710
|
+
communities,
|
|
711
|
+
limit,
|
|
712
|
+
),
|
|
713
|
+
},
|
|
714
|
+
"directed_flow": {
|
|
715
|
+
"note": (
|
|
716
|
+
"Directed flow uses semantic edges only. Provenance/source projection edges are excluded "
|
|
717
|
+
"so attractors and generators reflect relationship direction, not citation volume."
|
|
718
|
+
),
|
|
719
|
+
"idea_attractors": idea_attractors,
|
|
720
|
+
"idea_generators": idea_generators,
|
|
721
|
+
},
|
|
722
|
+
"legwork_queue": {
|
|
723
|
+
"idea_attractors": idea_attractors,
|
|
724
|
+
"idea_generators": idea_generators,
|
|
725
|
+
"weak_claims": _weak_claim_queue(weak_claims, limit),
|
|
726
|
+
},
|
|
727
|
+
"communities": _community_records(g, communities, pagerank, degree, core),
|
|
728
|
+
"low_confidence_edges": [
|
|
729
|
+
_edge_record(g, edge, index)
|
|
730
|
+
for index, edge in enumerate(g.edges)
|
|
731
|
+
if _confidence_is_weak(edge.confidence)
|
|
732
|
+
],
|
|
733
|
+
"provenance_coverage": coverage,
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
def _graph_payload(g: Graph) -> dict:
|
|
738
|
+
return {
|
|
739
|
+
"nodes": {node_id: asdict(node) for node_id, node in g.nodes.items()},
|
|
740
|
+
"edges": [asdict(edge) for edge in g.edges],
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
HTML_TEMPLATE = r"""<!doctype html>
|
|
745
|
+
<html lang="en">
|
|
746
|
+
<meta charset="utf-8" />
|
|
747
|
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
748
|
+
<title>Memory Audit</title>
|
|
749
|
+
<style>
|
|
750
|
+
:root {
|
|
751
|
+
--bg: #f6f7f9;
|
|
752
|
+
--fg: #17202a;
|
|
753
|
+
--muted: #667085;
|
|
754
|
+
--line: #d7dde5;
|
|
755
|
+
--panel: #ffffff;
|
|
756
|
+
--accent: #0f766e;
|
|
757
|
+
--blue: #2563eb;
|
|
758
|
+
--amber: #a16207;
|
|
759
|
+
--red: #b42318;
|
|
760
|
+
--ink: #111827;
|
|
761
|
+
}
|
|
762
|
+
* { box-sizing: border-box; }
|
|
763
|
+
html, body { margin: 0; min-height: 100%; background: var(--bg); color: var(--fg);
|
|
764
|
+
font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; }
|
|
765
|
+
body { overflow: hidden; }
|
|
766
|
+
header { height: 56px; padding: 0 18px; display: flex; align-items: center; gap: 14px;
|
|
767
|
+
border-bottom: 1px solid var(--line); background: #fff; }
|
|
768
|
+
header h1 { margin: 0; font-size: 18px; letter-spacing: 0; }
|
|
769
|
+
header .meta { color: var(--muted); font-size: 13px; }
|
|
770
|
+
main { height: calc(100vh - 56px); display: grid; grid-template-columns: minmax(360px, 42%) 1fr; }
|
|
771
|
+
#panels { overflow: auto; padding: 14px; display: grid; gap: 12px; align-content: start; }
|
|
772
|
+
#map { min-width: 0; border-left: 1px solid var(--line); display: grid; grid-template-rows: auto 1fr;
|
|
773
|
+
background: #eef2f6; }
|
|
774
|
+
.metric-row { display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 8px; }
|
|
775
|
+
.metric, .panel, #details { background: var(--panel); border: 1px solid var(--line); border-radius: 8px; }
|
|
776
|
+
.metric { padding: 10px 12px; min-width: 0; }
|
|
777
|
+
.metric strong { display: block; font-size: 18px; line-height: 1.1; }
|
|
778
|
+
.metric span { display: block; color: var(--muted); font-size: 12px; margin-top: 3px; overflow-wrap: anywhere; }
|
|
779
|
+
.panel h2 { margin: 0; padding: 12px 12px 4px; font-size: 14px; color: var(--ink); }
|
|
780
|
+
.panel ol, .panel ul { list-style: none; margin: 0; padding: 0 8px 8px; }
|
|
781
|
+
.item { width: 100%; border: 0; background: transparent; text-align: left; padding: 8px;
|
|
782
|
+
border-radius: 6px; cursor: pointer; display: grid; gap: 3px; color: var(--fg); }
|
|
783
|
+
.item:hover, .item.selected { background: #edf7f5; }
|
|
784
|
+
.item-title { font-size: 13px; font-weight: 700; overflow-wrap: anywhere; }
|
|
785
|
+
.item-meta { color: var(--muted); font-size: 12px; overflow-wrap: anywhere; }
|
|
786
|
+
.score { color: var(--accent); font-variant-numeric: tabular-nums; }
|
|
787
|
+
.weak .score { color: var(--red); }
|
|
788
|
+
.toolbar { min-height: 48px; padding: 10px 12px; display: flex; align-items: center; justify-content: space-between;
|
|
789
|
+
gap: 12px; border-bottom: 1px solid var(--line); background: #fff; }
|
|
790
|
+
.toolbar strong { font-size: 14px; }
|
|
791
|
+
.toolbar span { color: var(--muted); font-size: 12px; }
|
|
792
|
+
#stage { position: relative; min-height: 0; }
|
|
793
|
+
svg { width: 100%; height: 100%; display: block; }
|
|
794
|
+
.edge { stroke: #96a1b2; stroke-width: 1.2; stroke-opacity: 0.46; }
|
|
795
|
+
.edge.weak { stroke: var(--red); stroke-dasharray: 5 4; stroke-opacity: 0.7; }
|
|
796
|
+
.node circle { stroke: #fff; stroke-width: 1.5; }
|
|
797
|
+
.node text { fill: #1f2937; font-size: 11px; paint-order: stroke; stroke: #f8fafc; stroke-width: 4px;
|
|
798
|
+
stroke-linecap: round; stroke-linejoin: round; pointer-events: none; }
|
|
799
|
+
.node.dim { opacity: 0.35; }
|
|
800
|
+
.node.selected circle { stroke: var(--accent); stroke-width: 4; }
|
|
801
|
+
#details { position: absolute; left: 12px; bottom: 12px; width: min(460px, calc(100% - 24px));
|
|
802
|
+
max-height: 40%; overflow: auto; padding: 12px; box-shadow: 0 16px 44px rgba(17,24,39,.14); }
|
|
803
|
+
#details h3 { margin: 0 0 4px; font-size: 15px; }
|
|
804
|
+
#details .body, #details li { font-size: 12px; line-height: 1.45; }
|
|
805
|
+
#details .body { margin: 8px 0; }
|
|
806
|
+
#details ul { margin: 6px 0 0; padding-left: 18px; }
|
|
807
|
+
code { color: var(--muted); overflow-wrap: anywhere; }
|
|
808
|
+
.pill { display: inline-flex; align-items: center; min-height: 18px; padding: 1px 6px; border-radius: 999px;
|
|
809
|
+
background: #eef2f6; color: var(--muted); font-size: 11px; }
|
|
810
|
+
.pill.low { color: var(--red); background: #fee4e2; }
|
|
811
|
+
.pill.medium { color: var(--amber); background: #fef0c7; }
|
|
812
|
+
@media (max-width: 900px) {
|
|
813
|
+
body { overflow: auto; }
|
|
814
|
+
main { height: auto; display: block; }
|
|
815
|
+
#panels { overflow: visible; }
|
|
816
|
+
#map { height: 720px; border-left: 0; border-top: 1px solid var(--line); }
|
|
817
|
+
.metric-row { grid-template-columns: repeat(2, minmax(0, 1fr)); }
|
|
818
|
+
}
|
|
819
|
+
</style>
|
|
820
|
+
<body>
|
|
821
|
+
<header>
|
|
822
|
+
<h1>Memory Audit</h1>
|
|
823
|
+
<div class="meta" id="generated"></div>
|
|
824
|
+
</header>
|
|
825
|
+
<main>
|
|
826
|
+
<section id="panels" aria-label="Ranked audit panels"></section>
|
|
827
|
+
<section id="map" aria-label="Graph canvas">
|
|
828
|
+
<div class="toolbar">
|
|
829
|
+
<strong>Graph Canvas</strong>
|
|
830
|
+
<span>Important and bridge nodes are labeled first. Select a panel row or node.</span>
|
|
831
|
+
</div>
|
|
832
|
+
<div id="stage">
|
|
833
|
+
<svg id="graph" viewBox="0 0 1200 760" role="img" aria-label="Memory audit graph"></svg>
|
|
834
|
+
<aside id="details"></aside>
|
|
835
|
+
</div>
|
|
836
|
+
</section>
|
|
837
|
+
</main>
|
|
838
|
+
<script>
|
|
839
|
+
const AUDIT = __AUDIT_JSON__;
|
|
840
|
+
const GRAPH = __GRAPH_JSON__;
|
|
841
|
+
const nodes = Object.values(GRAPH.nodes || {});
|
|
842
|
+
const edges = GRAPH.edges || [];
|
|
843
|
+
const nodeById = new Map(nodes.map(n => [n.id, n]));
|
|
844
|
+
const importantIds = new Set((AUDIT.ranked.important_concepts || []).slice(0, 10).map(n => n.id));
|
|
845
|
+
const bridgeIds = new Set((AUDIT.ranked.bridge_ideas || []).slice(0, 10).map(n => n.id));
|
|
846
|
+
const selected = { id: null };
|
|
847
|
+
const colors = {
|
|
848
|
+
person: "#dc2626", topic: "#2563eb", idea: "#0f766e", project: "#7c3aed",
|
|
849
|
+
goal: "#16a34a", question: "#a16207", decision: "#0891b2", reference: "#c026d3",
|
|
850
|
+
source: "#64748b"
|
|
851
|
+
};
|
|
852
|
+
|
|
853
|
+
function esc(value) {
|
|
854
|
+
return String(value || "").replace(/[&<>"]/g, c => ({
|
|
855
|
+
"&": "&", "<": "<", ">": ">", '"': """
|
|
856
|
+
}[c]));
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
function fmt(value, digits = 3) {
|
|
860
|
+
if (typeof value !== "number") return "";
|
|
861
|
+
return value.toFixed(digits);
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
function confidencePill(value) {
|
|
865
|
+
return `<span class="pill ${esc(value || "")}">${esc(value || "?")}</span>`;
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
function nodeLine(record) {
|
|
869
|
+
const score = typeof record.score === "number" ? `<span class="score">${fmt(record.score)}</span> ` : "";
|
|
870
|
+
return `${score}${esc(record.type)} · <code>${esc(record.id)}</code> ${confidencePill(record.confidence)}`;
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
function claimTitle(claim) {
|
|
874
|
+
if (claim.id) return claim.label || claim.id;
|
|
875
|
+
return `${claim.src || "?"} -> ${claim.dst || "?"}`;
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
function claimMeta(claim) {
|
|
879
|
+
if (claim.kind === "edge_confidence" || claim.kind === "missing_edge_source_id") {
|
|
880
|
+
return `${claim.kind} · ${claim.type || "edge"} · ${claim.confidence || "unknown"}`;
|
|
881
|
+
}
|
|
882
|
+
return `${claim.kind} · ${claim.type || "node"} · ${claim.confidence || "unknown"}`;
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
function renderPanels() {
|
|
886
|
+
document.getElementById("generated").textContent =
|
|
887
|
+
`${AUDIT.stats.nodes} nodes · ${AUDIT.stats.edges} edges · generated ${AUDIT.generated_at}`;
|
|
888
|
+
const coverage = AUDIT.provenance_coverage || {};
|
|
889
|
+
const panels = document.getElementById("panels");
|
|
890
|
+
panels.innerHTML = `
|
|
891
|
+
<div class="metric-row">
|
|
892
|
+
<div class="metric"><strong>${AUDIT.stats.semantic_nodes}</strong><span>semantic nodes</span></div>
|
|
893
|
+
<div class="metric"><strong>${AUDIT.stats.communities}</strong><span>communities</span></div>
|
|
894
|
+
<div class="metric"><strong>${AUDIT.stats.max_core_number}</strong><span>max k-core</span></div>
|
|
895
|
+
<div class="metric"><strong>${Math.round((coverage.node_coverage || 0) * 100)}%</strong><span>provenance coverage</span></div>
|
|
896
|
+
</div>
|
|
897
|
+
${rankedPanel("Important Concepts", AUDIT.ranked.important_concepts || [], "PageRank over semantic edges")}
|
|
898
|
+
${rankedPanel("Bridge Ideas", AUDIT.ranked.bridge_ideas || [], "Betweenness centrality")}
|
|
899
|
+
${flowPanel("Idea Attractors", AUDIT.ranked.idea_attractors || [], "High semantic in-degree, low out-degree")}
|
|
900
|
+
${flowPanel("Idea Generators", AUDIT.ranked.idea_generators || [], "High semantic out-degree, low in-degree")}
|
|
901
|
+
${weakPanel("Weak Claim Queue", AUDIT.ranked.weak_claim_queue || AUDIT.ranked.weak_claims || [])}
|
|
902
|
+
${proofPanel("Proof Trail", AUDIT.ranked.proof_trail || [])}
|
|
903
|
+
`;
|
|
904
|
+
panels.querySelectorAll("[data-node-id]").forEach(el => {
|
|
905
|
+
el.addEventListener("click", () => selectNode(el.dataset.nodeId));
|
|
906
|
+
});
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
function rankedPanel(title, records, subtitle) {
|
|
910
|
+
const rows = records.slice(0, 12).map(record => `
|
|
911
|
+
<li><button class="item" data-node-id="${esc(record.id)}">
|
|
912
|
+
<span class="item-title">${esc(record.label || record.id)}</span>
|
|
913
|
+
<span class="item-meta">${nodeLine(record)} · degree ${record.degree || 0} · core ${record.core_number || 0}</span>
|
|
914
|
+
</button></li>`).join("");
|
|
915
|
+
return `<section class="panel"><h2>${esc(title)}</h2><ol>${rows || `<li class="item-meta" style="padding:8px">None</li>`}</ol></section>`;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
function flowPanel(title, records, subtitle) {
|
|
919
|
+
const rows = records.slice(0, 12).map(record => `
|
|
920
|
+
<li><button class="item" data-node-id="${esc(record.id)}">
|
|
921
|
+
<span class="item-title">${esc(record.label || record.id)}</span>
|
|
922
|
+
<span class="item-meta">${esc(subtitle)} · in ${record.in_degree || 0} · out ${record.out_degree || 0} · balance ${record.flow_balance || 0}</span>
|
|
923
|
+
<span class="item-meta">${esc(record.prompt || "")}</span>
|
|
924
|
+
</button></li>`).join("");
|
|
925
|
+
return `<section class="panel"><h2>${esc(title)}</h2><ol>${rows || `<li class="item-meta" style="padding:8px">None</li>`}</ol></section>`;
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
function weakPanel(title, claims) {
|
|
929
|
+
const rows = claims.slice(0, 14).map(claim => {
|
|
930
|
+
const nodeId = claim.id || claim.src || claim.dst || "";
|
|
931
|
+
return `<li><button class="item weak" data-node-id="${esc(nodeId)}">
|
|
932
|
+
<span class="item-title">${esc(claimTitle(claim))}</span>
|
|
933
|
+
<span class="item-meta"><span class="score">${esc(claim.confidence || "missing")}</span> ${esc(claimMeta(claim))}</span>
|
|
934
|
+
<span class="item-meta">${esc(claim.prompt || "Choose a review action before this becomes durable memory.")}</span>
|
|
935
|
+
</button></li>`;
|
|
936
|
+
}).join("");
|
|
937
|
+
return `<section class="panel"><h2>${esc(title)}</h2><ul>${rows || `<li class="item-meta" style="padding:8px">No weak claims found</li>`}</ul></section>`;
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
function proofPanel(title, records) {
|
|
941
|
+
const rows = records.slice(0, 10).map(record => {
|
|
942
|
+
const first = (record.provenance || [])[0] || {};
|
|
943
|
+
return `<li><button class="item" data-node-id="${esc(record.id)}">
|
|
944
|
+
<span class="item-title">${esc(record.label || record.id)}</span>
|
|
945
|
+
<span class="item-meta">${esc(first.source_id || "no source")} ${first.excerpt ? "- " + esc(first.excerpt).slice(0, 120) : ""}</span>
|
|
946
|
+
</button></li>`;
|
|
947
|
+
}).join("");
|
|
948
|
+
return `<section class="panel"><h2>${esc(title)}</h2><ul>${rows || `<li class="item-meta" style="padding:8px">No proof trails found</li>`}</ul></section>`;
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
function layoutNodes() {
|
|
952
|
+
const communities = new Map();
|
|
953
|
+
for (const community of AUDIT.communities || []) {
|
|
954
|
+
for (const id of community.members || []) communities.set(id, community.id);
|
|
955
|
+
}
|
|
956
|
+
const cx = 600, cy = 380;
|
|
957
|
+
const semantic = nodes.filter(n => n.type !== "source");
|
|
958
|
+
const sources = nodes.filter(n => n.type === "source");
|
|
959
|
+
semantic.forEach((node, index) => {
|
|
960
|
+
const community = communities.get(node.id) || 0;
|
|
961
|
+
const ring = 120 + (community % 5) * 72;
|
|
962
|
+
const angle = (Math.PI * 2 * index / Math.max(1, semantic.length)) + community * 0.63;
|
|
963
|
+
node.x = cx + Math.cos(angle) * ring;
|
|
964
|
+
node.y = cy + Math.sin(angle) * ring;
|
|
965
|
+
});
|
|
966
|
+
sources.forEach((node, index) => {
|
|
967
|
+
const angle = Math.PI * 2 * index / Math.max(1, sources.length);
|
|
968
|
+
node.x = cx + Math.cos(angle) * 340;
|
|
969
|
+
node.y = cy + Math.sin(angle) * 260;
|
|
970
|
+
});
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
function make(tag, attrs, parent) {
|
|
974
|
+
const el = document.createElementNS("http://www.w3.org/2000/svg", tag);
|
|
975
|
+
for (const [key, value] of Object.entries(attrs || {})) el.setAttribute(key, value);
|
|
976
|
+
parent.appendChild(el);
|
|
977
|
+
return el;
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
function renderGraph() {
|
|
981
|
+
layoutNodes();
|
|
982
|
+
const svg = document.getElementById("graph");
|
|
983
|
+
svg.innerHTML = "";
|
|
984
|
+
const root = make("g", {}, svg);
|
|
985
|
+
for (const edge of edges) {
|
|
986
|
+
const src = nodeById.get(edge.src), dst = nodeById.get(edge.dst);
|
|
987
|
+
if (!src || !dst) continue;
|
|
988
|
+
make("line", {
|
|
989
|
+
class: `edge ${(edge.confidence || "high") === "high" ? "" : "weak"}`,
|
|
990
|
+
x1: src.x, y1: src.y, x2: dst.x, y2: dst.y
|
|
991
|
+
}, root);
|
|
992
|
+
}
|
|
993
|
+
for (const node of nodes) {
|
|
994
|
+
const group = make("g", { class: "node", transform: `translate(${node.x},${node.y})`, "data-id": node.id }, root);
|
|
995
|
+
const radius = importantIds.has(node.id) ? 11 : bridgeIds.has(node.id) ? 10 : node.type === "source" ? 5 : 7;
|
|
996
|
+
make("circle", { r: radius, fill: colors[node.type] || "#475569" }, group);
|
|
997
|
+
if (importantIds.has(node.id) || bridgeIds.has(node.id)) {
|
|
998
|
+
make("text", { x: radius + 5, y: 4 }, group).textContent = node.label || node.id;
|
|
999
|
+
}
|
|
1000
|
+
group.addEventListener("click", ev => { ev.stopPropagation(); selectNode(node.id); });
|
|
1001
|
+
}
|
|
1002
|
+
svg.addEventListener("click", () => selectNode(null));
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
function selectNode(id) {
|
|
1006
|
+
selected.id = id;
|
|
1007
|
+
document.querySelectorAll("[data-node-id]").forEach(el => el.classList.toggle("selected", el.dataset.nodeId === id));
|
|
1008
|
+
document.querySelectorAll(".node").forEach(el => {
|
|
1009
|
+
const isSelected = id && el.dataset.id === id;
|
|
1010
|
+
el.classList.toggle("selected", isSelected);
|
|
1011
|
+
el.classList.toggle("dim", Boolean(id) && !isSelected);
|
|
1012
|
+
});
|
|
1013
|
+
renderDetails(id);
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
function renderDetails(id) {
|
|
1017
|
+
const details = document.getElementById("details");
|
|
1018
|
+
if (!id || !nodeById.has(id)) {
|
|
1019
|
+
details.innerHTML = `<h3>Select a memory</h3><div class="body">Ranked panels are the primary audit view. The canvas is for orientation.</div>`;
|
|
1020
|
+
return;
|
|
1021
|
+
}
|
|
1022
|
+
const node = nodeById.get(id);
|
|
1023
|
+
const rel = edges.filter(e => e.src === id || e.dst === id);
|
|
1024
|
+
const proof = rel.filter(e => e.type === "MENTIONED_IN" || e.type === "MADE_AT");
|
|
1025
|
+
details.innerHTML = `
|
|
1026
|
+
<h3>${esc(node.label || node.id)}</h3>
|
|
1027
|
+
<div class="item-meta">${esc(node.type)} · <code>${esc(node.id)}</code> ${confidencePill(node.confidence)}</div>
|
|
1028
|
+
${node.body ? `<div class="body">${esc(node.body)}</div>` : ""}
|
|
1029
|
+
<div class="item-meta">Proof trail</div>
|
|
1030
|
+
<ul>${proof.map(e => `<li><code>${esc(e.src === id ? e.dst : e.src)}</code>${e.excerpt ? `: ${esc(e.excerpt)}` : ""}</li>`).join("") || "<li>No provenance edge found.</li>"}</ul>
|
|
1031
|
+
`;
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
renderPanels();
|
|
1035
|
+
renderGraph();
|
|
1036
|
+
renderDetails(null);
|
|
1037
|
+
</script>
|
|
1038
|
+
</body>
|
|
1039
|
+
</html>
|
|
1040
|
+
"""
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
def render_memory_audit_html(g: Graph, analytics: dict, out_path: Path) -> Path:
|
|
1044
|
+
audit_json = json.dumps(analytics, ensure_ascii=False)
|
|
1045
|
+
graph_json = json.dumps(_graph_payload(g), ensure_ascii=False)
|
|
1046
|
+
html = HTML_TEMPLATE.replace("__AUDIT_JSON__", audit_json.replace("</script", "<\\/script"))
|
|
1047
|
+
html = html.replace("__GRAPH_JSON__", graph_json.replace("</script", "<\\/script"))
|
|
1048
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1049
|
+
out_path.write_text(html, encoding="utf-8")
|
|
1050
|
+
return out_path
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
def _write_json(data: dict, out: str) -> Path | None:
|
|
1054
|
+
if out == "-":
|
|
1055
|
+
print(json.dumps(data, indent=2, sort_keys=True))
|
|
1056
|
+
return None
|
|
1057
|
+
out_path = Path(out).expanduser().resolve()
|
|
1058
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1059
|
+
out_path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
1060
|
+
return out_path
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
def run_audit(args: list[str]) -> int:
|
|
1064
|
+
parser = argparse.ArgumentParser(prog="mykg audit")
|
|
1065
|
+
parser.add_argument("--graph", default=None, help="Graph JSON path. Defaults to MYGRAPH_PATH or local graph.")
|
|
1066
|
+
parser.add_argument("--out", default="analytics.json", help="Analytics JSON path, or '-' for stdout.")
|
|
1067
|
+
parser.add_argument("--html", default=None, help="Optional standalone Memory Audit HTML path.")
|
|
1068
|
+
parser.add_argument("--max-items", type=int, default=25, help="Ranked records per panel.")
|
|
1069
|
+
parser.add_argument("--max-communities", type=int, default=12, help="Maximum communities to derive.")
|
|
1070
|
+
parsed = parser.parse_args(args)
|
|
1071
|
+
|
|
1072
|
+
g = Graph.load(parsed.graph)
|
|
1073
|
+
analytics = build_memory_audit(g, limit=parsed.max_items, max_communities=parsed.max_communities)
|
|
1074
|
+
written_json = _write_json(analytics, parsed.out)
|
|
1075
|
+
if written_json:
|
|
1076
|
+
print(f"audit: wrote {written_json}")
|
|
1077
|
+
if parsed.html:
|
|
1078
|
+
html_path = Path(parsed.html).expanduser().resolve()
|
|
1079
|
+
render_memory_audit_html(g, analytics, html_path)
|
|
1080
|
+
print(f"audit: wrote {html_path}")
|
|
1081
|
+
coverage = analytics["provenance_coverage"]
|
|
1082
|
+
status_stream = sys.stderr if parsed.out == "-" else sys.stdout
|
|
1083
|
+
print(
|
|
1084
|
+
"audit: "
|
|
1085
|
+
f"{analytics['stats']['semantic_nodes']} semantic nodes, "
|
|
1086
|
+
f"{analytics['stats']['communities']} communities, "
|
|
1087
|
+
f"{round(coverage['node_coverage'] * 100)}% provenance coverage",
|
|
1088
|
+
file=status_stream,
|
|
1089
|
+
)
|
|
1090
|
+
return 0
|
|
1091
|
+
|
|
1092
|
+
|
|
1093
|
+
if __name__ == "__main__":
|
|
1094
|
+
sys.exit(run_audit(sys.argv[1:]))
|