knowledge-worker 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1094 @@
1
+ """
2
+ memory_audit.py - read-only graph analytics and Memory Audit HTML.
3
+
4
+ Usage:
5
+ mykg audit --out analytics.json
6
+ mykg audit --out analytics.json --html memory_audit.html
7
+
8
+ The audit is intentionally local and deterministic. It uses the public Graph API
9
+ instead of reading graph JSON directly, keeps source/provenance edges separate
10
+ from semantic graph analytics, and writes generated artifacts only when asked.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import json
17
+ import math
18
+ import sys
19
+ from collections import Counter, defaultdict, deque
20
+ from dataclasses import asdict
21
+ from datetime import datetime, timezone
22
+ from pathlib import Path
23
+ from typing import Iterable
24
+
25
+ try:
26
+ from .mygraph import Edge, Graph
27
+ except ImportError: # direct script execution: python mygraph/memory_audit.py
28
+ from mygraph import Edge, Graph
29
+
30
+
31
+ PROVENANCE_EDGE_TYPES = {"MENTIONED_IN", "MADE_AT"}
32
+ CONFIDENCE_RANK = {"low": 0, "medium": 1, "high": 2}
33
+
34
+
35
+ def _semantic_ids(g: Graph) -> list[str]:
36
+ return sorted(nid for nid, node in g.nodes.items() if node.type != "source")
37
+
38
+
39
+ def _semantic_edges(g: Graph, ids: set[str]) -> list[Edge]:
40
+ return [
41
+ edge
42
+ for edge in g.edges
43
+ if edge.src in ids
44
+ and edge.dst in ids
45
+ and edge.type not in PROVENANCE_EDGE_TYPES
46
+ ]
47
+
48
+
49
+ def _source_projection_edges(g: Graph, ids: set[str]) -> set[tuple[str, str]]:
50
+ """Connect non-source nodes that share a source, without adding source nodes.
51
+
52
+ These edges are an audit-time projection only. They keep provenance useful
53
+ for topology while avoiding source nodes dominating centrality.
54
+ """
55
+ by_source: dict[str, set[str]] = defaultdict(set)
56
+ for edge in g.edges:
57
+ if edge.type not in PROVENANCE_EDGE_TYPES:
58
+ continue
59
+ src = g.nodes.get(edge.src)
60
+ dst = g.nodes.get(edge.dst)
61
+ if src and src.type == "source" and edge.dst in ids:
62
+ by_source[edge.src].add(edge.dst)
63
+ elif dst and dst.type == "source" and edge.src in ids:
64
+ by_source[edge.dst].add(edge.src)
65
+
66
+ projected = set()
67
+ for members in by_source.values():
68
+ ordered = sorted(members)
69
+ for left, right in zip(ordered, ordered[1:]):
70
+ projected.add((left, right))
71
+ return projected
72
+
73
+
74
+ def _build_adjacency(ids: Iterable[str], edges: Iterable[Edge]) -> tuple[dict[str, set[str]], dict[str, set[str]]]:
75
+ directed = {nid: set() for nid in ids}
76
+ undirected = {nid: set() for nid in ids}
77
+ for edge in edges:
78
+ if edge.src not in directed or edge.dst not in directed:
79
+ continue
80
+ directed[edge.src].add(edge.dst)
81
+ undirected[edge.src].add(edge.dst)
82
+ undirected[edge.dst].add(edge.src)
83
+ return directed, undirected
84
+
85
+
86
+ def _add_projection(
87
+ directed: dict[str, set[str]],
88
+ undirected: dict[str, set[str]],
89
+ projected_edges: Iterable[tuple[str, str]],
90
+ ) -> None:
91
+ for left, right in projected_edges:
92
+ if left not in directed or right not in directed:
93
+ continue
94
+ directed[left].add(right)
95
+ directed[right].add(left)
96
+ undirected[left].add(right)
97
+ undirected[right].add(left)
98
+
99
+
100
+ def _degree(undirected: dict[str, set[str]]) -> dict[str, int]:
101
+ return {nid: len(neighbors) for nid, neighbors in undirected.items()}
102
+
103
+
104
+ def _directed_counts(ids: Iterable[str], edges: Iterable[Edge]) -> tuple[dict[str, int], dict[str, int]]:
105
+ in_degree = {nid: 0 for nid in ids}
106
+ out_degree = {nid: 0 for nid in ids}
107
+ for edge in edges:
108
+ if edge.src not in out_degree or edge.dst not in in_degree:
109
+ continue
110
+ out_degree[edge.src] += 1
111
+ in_degree[edge.dst] += 1
112
+ return in_degree, out_degree
113
+
114
+
115
+ def _directed_edge_types(ids: Iterable[str], edges: Iterable[Edge]) -> tuple[dict[str, Counter], dict[str, Counter]]:
116
+ in_types = {nid: Counter() for nid in ids}
117
+ out_types = {nid: Counter() for nid in ids}
118
+ for edge in edges:
119
+ if edge.src in out_types and edge.dst in in_types:
120
+ out_types[edge.src][edge.type] += 1
121
+ in_types[edge.dst][edge.type] += 1
122
+ return in_types, out_types
123
+
124
+
125
+ def _pagerank(
126
+ ids: list[str],
127
+ directed: dict[str, set[str]],
128
+ damping: float = 0.85,
129
+ iterations: int = 100,
130
+ tolerance: float = 1.0e-12,
131
+ ) -> dict[str, float]:
132
+ n = len(ids)
133
+ if n == 0:
134
+ return {}
135
+ score = {nid: 1.0 / n for nid in ids}
136
+ base = (1.0 - damping) / n
137
+ for _ in range(iterations):
138
+ next_score = {nid: base for nid in ids}
139
+ sink_mass = sum(score[nid] for nid in ids if not directed[nid])
140
+ sink_share = damping * sink_mass / n
141
+ for nid in ids:
142
+ next_score[nid] += sink_share
143
+ for src in ids:
144
+ targets = directed[src]
145
+ if not targets:
146
+ continue
147
+ share = damping * score[src] / len(targets)
148
+ for dst in targets:
149
+ next_score[dst] += share
150
+ delta = sum(abs(next_score[nid] - score[nid]) for nid in ids)
151
+ score = next_score
152
+ if delta < tolerance:
153
+ break
154
+ return score
155
+
156
+
157
+ def _betweenness(ids: list[str], adjacency: dict[str, set[str]]) -> dict[str, float]:
158
+ """Brandes betweenness centrality for an undirected, unweighted graph."""
159
+ centrality = {nid: 0.0 for nid in ids}
160
+ for source in ids:
161
+ stack: list[str] = []
162
+ predecessors = {nid: [] for nid in ids}
163
+ sigma = {nid: 0.0 for nid in ids}
164
+ sigma[source] = 1.0
165
+ distance = {nid: -1 for nid in ids}
166
+ distance[source] = 0
167
+ queue = deque([source])
168
+
169
+ while queue:
170
+ current = queue.popleft()
171
+ stack.append(current)
172
+ for neighbor in adjacency[current]:
173
+ if distance[neighbor] < 0:
174
+ queue.append(neighbor)
175
+ distance[neighbor] = distance[current] + 1
176
+ if distance[neighbor] == distance[current] + 1:
177
+ sigma[neighbor] += sigma[current]
178
+ predecessors[neighbor].append(current)
179
+
180
+ delta = {nid: 0.0 for nid in ids}
181
+ while stack:
182
+ node_id = stack.pop()
183
+ for predecessor in predecessors[node_id]:
184
+ if sigma[node_id]:
185
+ share = (sigma[predecessor] / sigma[node_id]) * (1.0 + delta[node_id])
186
+ delta[predecessor] += share
187
+ if node_id != source:
188
+ centrality[node_id] += delta[node_id]
189
+
190
+ # Undirected paths are counted twice.
191
+ for nid in centrality:
192
+ centrality[nid] /= 2.0
193
+ n = len(ids)
194
+ if n > 2:
195
+ scale = 2.0 / ((n - 1) * (n - 2))
196
+ for nid in centrality:
197
+ centrality[nid] *= scale
198
+ return centrality
199
+
200
+
201
+ def _edge_betweenness(ids: list[str], adjacency: dict[str, set[str]]) -> dict[tuple[str, str], float]:
202
+ """Brandes edge betweenness for undirected community splitting."""
203
+ edge_scores: dict[tuple[str, str], float] = defaultdict(float)
204
+ for source in ids:
205
+ stack: list[str] = []
206
+ predecessors = {nid: [] for nid in ids}
207
+ sigma = {nid: 0.0 for nid in ids}
208
+ sigma[source] = 1.0
209
+ distance = {nid: -1 for nid in ids}
210
+ distance[source] = 0
211
+ queue = deque([source])
212
+
213
+ while queue:
214
+ current = queue.popleft()
215
+ stack.append(current)
216
+ for neighbor in adjacency[current]:
217
+ if distance[neighbor] < 0:
218
+ queue.append(neighbor)
219
+ distance[neighbor] = distance[current] + 1
220
+ if distance[neighbor] == distance[current] + 1:
221
+ sigma[neighbor] += sigma[current]
222
+ predecessors[neighbor].append(current)
223
+
224
+ delta = {nid: 0.0 for nid in ids}
225
+ while stack:
226
+ node_id = stack.pop()
227
+ for predecessor in predecessors[node_id]:
228
+ if not sigma[node_id]:
229
+ continue
230
+ contribution = (sigma[predecessor] / sigma[node_id]) * (1.0 + delta[node_id])
231
+ edge_scores[tuple(sorted((predecessor, node_id)))] += contribution
232
+ delta[predecessor] += contribution
233
+
234
+ for edge_key in list(edge_scores):
235
+ edge_scores[edge_key] /= 2.0
236
+ return dict(edge_scores)
237
+
238
+
239
+ def _connected_components(ids: Iterable[str], adjacency: dict[str, set[str]]) -> list[list[str]]:
240
+ remaining = set(ids)
241
+ components: list[list[str]] = []
242
+ while remaining:
243
+ start = min(remaining)
244
+ queue = deque([start])
245
+ remaining.remove(start)
246
+ component = []
247
+ while queue:
248
+ current = queue.popleft()
249
+ component.append(current)
250
+ for neighbor in sorted(adjacency[current]):
251
+ if neighbor in remaining:
252
+ remaining.remove(neighbor)
253
+ queue.append(neighbor)
254
+ components.append(sorted(component))
255
+ return sorted(components, key=lambda c: (-len(c), c[0] if c else ""))
256
+
257
+
258
+ def _core_numbers(ids: list[str], adjacency: dict[str, set[str]]) -> dict[str, int]:
259
+ remaining = set(ids)
260
+ core = {nid: 0 for nid in ids}
261
+ k = 0
262
+ while remaining:
263
+ removed_at_k = []
264
+ changed = True
265
+ while changed:
266
+ changed = False
267
+ for nid in sorted(remaining):
268
+ degree = sum(1 for neighbor in adjacency[nid] if neighbor in remaining)
269
+ if degree <= k:
270
+ removed_at_k.append(nid)
271
+ remaining.remove(nid)
272
+ changed = True
273
+ if removed_at_k:
274
+ for nid in removed_at_k:
275
+ core[nid] = k
276
+ else:
277
+ k += 1
278
+ return core
279
+
280
+
281
+ def _community_partition(
282
+ ids: list[str],
283
+ adjacency: dict[str, set[str]],
284
+ max_communities: int = 12,
285
+ ) -> dict[str, int]:
286
+ if not ids:
287
+ return {}
288
+ target = min(max_communities, max(1, round(math.sqrt(len(ids)))))
289
+ current = {nid: set(neighbors) for nid, neighbors in adjacency.items()}
290
+ components = _connected_components(ids, current)
291
+ max_removals = sum(len(neighbors) for neighbors in current.values()) // 2
292
+ removals = 0
293
+
294
+ while len(components) < target and removals < max_removals:
295
+ splittable = [component for component in components if len(component) > 2]
296
+ if not splittable:
297
+ break
298
+ largest = splittable[0]
299
+ subgraph = {nid: current[nid] & set(largest) for nid in largest}
300
+ edge_scores = _edge_betweenness(largest, subgraph)
301
+ if not edge_scores:
302
+ break
303
+ edge_to_remove = sorted(edge_scores.items(), key=lambda item: (-item[1], item[0]))[0][0]
304
+ a, b = edge_to_remove
305
+ current[a].discard(b)
306
+ current[b].discard(a)
307
+ removals += 1
308
+ components = _connected_components(ids, current)
309
+
310
+ partition = {}
311
+ for community_id, members in enumerate(components):
312
+ for nid in members:
313
+ partition[nid] = community_id
314
+ return partition
315
+
316
+
317
+ def _confidence_is_weak(confidence: str | None) -> bool:
318
+ return (confidence or "high") != "high"
319
+
320
+
321
+ def _node_record(g: Graph, node_id: str, **metrics: object) -> dict:
322
+ node = g.nodes[node_id]
323
+ record = {
324
+ "id": node.id,
325
+ "type": node.type,
326
+ "label": node.label,
327
+ "confidence": node.confidence,
328
+ }
329
+ record.update(metrics)
330
+ return record
331
+
332
+
333
+ def _edge_record(g: Graph, edge: Edge, index: int | None = None) -> dict:
334
+ record = {
335
+ "src": edge.src,
336
+ "dst": edge.dst,
337
+ "type": edge.type,
338
+ "source_id": edge.source_id,
339
+ "confidence": edge.confidence,
340
+ "excerpt": edge.excerpt,
341
+ }
342
+ if index is not None:
343
+ record["index"] = index
344
+ if edge.src in g.nodes:
345
+ record["src_label"] = g.nodes[edge.src].label
346
+ record["src_type"] = g.nodes[edge.src].type
347
+ if edge.dst in g.nodes:
348
+ record["dst_label"] = g.nodes[edge.dst].label
349
+ record["dst_type"] = g.nodes[edge.dst].type
350
+ return record
351
+
352
+
353
+ def _ranked_nodes(
354
+ g: Graph,
355
+ scores: dict[str, float],
356
+ degree: dict[str, int],
357
+ core: dict[str, int],
358
+ communities: dict[str, int],
359
+ limit: int,
360
+ *,
361
+ include_zero: bool = False,
362
+ ) -> list[dict]:
363
+ ranked = sorted(
364
+ scores.items(),
365
+ key=lambda item: (-item[1], -degree.get(item[0], 0), g.nodes[item[0]].label.lower()),
366
+ )
367
+ out = []
368
+ for node_id, score in ranked:
369
+ if not include_zero and score <= 0:
370
+ continue
371
+ out.append(
372
+ _node_record(
373
+ g,
374
+ node_id,
375
+ score=score,
376
+ degree=degree.get(node_id, 0),
377
+ core_number=core.get(node_id, 0),
378
+ community=communities.get(node_id),
379
+ )
380
+ )
381
+ if len(out) >= limit:
382
+ break
383
+ return out
384
+
385
+
386
+ def _provenance_coverage(g: Graph) -> dict:
387
+ mentioned = set()
388
+ mentioned_with_excerpt = set()
389
+ provenance_edges = []
390
+ for edge in g.edges:
391
+ if edge.type not in PROVENANCE_EDGE_TYPES:
392
+ continue
393
+ provenance_edges.append(edge)
394
+ if edge.src in g.nodes and g.nodes[edge.src].type != "source":
395
+ mentioned.add(edge.src)
396
+ if edge.excerpt:
397
+ mentioned_with_excerpt.add(edge.src)
398
+ if edge.dst in g.nodes and g.nodes[edge.dst].type != "source":
399
+ mentioned.add(edge.dst)
400
+ if edge.excerpt:
401
+ mentioned_with_excerpt.add(edge.dst)
402
+
403
+ non_source_nodes = [nid for nid, node in g.nodes.items() if node.type != "source"]
404
+ missing_nodes = [nid for nid in non_source_nodes if nid not in mentioned]
405
+ edges_with_source_id = [edge for edge in g.edges if edge.source_id]
406
+ edges_missing_source_id = [edge for edge in g.edges if not edge.source_id]
407
+ provenance_with_excerpt = [edge for edge in provenance_edges if edge.excerpt]
408
+
409
+ def ratio(numerator: int, denominator: int) -> float:
410
+ return 1.0 if denominator == 0 else numerator / denominator
411
+
412
+ return {
413
+ "node_coverage": ratio(len(mentioned), len(non_source_nodes)),
414
+ "excerpt_coverage": ratio(len(provenance_with_excerpt), len(provenance_edges)),
415
+ "edge_source_coverage": ratio(len(edges_with_source_id), len(g.edges)),
416
+ "non_source_nodes": len(non_source_nodes),
417
+ "nodes_with_provenance": len(mentioned),
418
+ "nodes_with_provenance_excerpt": len(mentioned_with_excerpt),
419
+ "missing_nodes": [_node_record(g, nid) for nid in sorted(missing_nodes)],
420
+ "edges_total": len(g.edges),
421
+ "edges_with_source_id": len(edges_with_source_id),
422
+ "edges_missing_source_id": [
423
+ _edge_record(g, edge, index)
424
+ for index, edge in enumerate(g.edges)
425
+ if not edge.source_id
426
+ ],
427
+ "provenance_edges": len(provenance_edges),
428
+ "provenance_edges_with_excerpt": len(provenance_with_excerpt),
429
+ }
430
+
431
+
432
+ def _proof_trail(g: Graph, node_ids: list[str], limit: int) -> list[dict]:
433
+ out = []
434
+ seen = set()
435
+ for node_id in node_ids:
436
+ if node_id in seen or node_id not in g.nodes:
437
+ continue
438
+ seen.add(node_id)
439
+ provenance = []
440
+ for source_id, excerpt in g.provenance(node_id):
441
+ source = g.nodes.get(source_id)
442
+ provenance.append(
443
+ {
444
+ "source_id": source_id,
445
+ "source_label": source.label if source else source_id,
446
+ "excerpt": excerpt,
447
+ }
448
+ )
449
+ if not provenance:
450
+ continue
451
+ out.append(_node_record(g, node_id, provenance=provenance))
452
+ if len(out) >= limit:
453
+ break
454
+ return out
455
+
456
+
457
+ def _weak_claims(g: Graph, coverage: dict, limit: int) -> list[dict]:
458
+ claims = []
459
+ for node_id, node in g.nodes.items():
460
+ if node.type == "source" or not _confidence_is_weak(node.confidence):
461
+ continue
462
+ claims.append({"kind": "node_confidence", **_node_record(g, node_id)})
463
+ for index, edge in enumerate(g.edges):
464
+ if _confidence_is_weak(edge.confidence):
465
+ claims.append({"kind": "edge_confidence", **_edge_record(g, edge, index)})
466
+ for node in coverage["missing_nodes"]:
467
+ claims.append({"kind": "missing_node_provenance", **node})
468
+ for edge in coverage["edges_missing_source_id"]:
469
+ claims.append({"kind": "missing_edge_source_id", **edge})
470
+
471
+ def sort_key(claim: dict) -> tuple:
472
+ confidence = claim.get("confidence")
473
+ return (
474
+ CONFIDENCE_RANK.get(str(confidence), -1),
475
+ claim.get("kind", ""),
476
+ claim.get("id") or claim.get("src") or "",
477
+ )
478
+
479
+ return sorted(claims, key=sort_key)[:limit]
480
+
481
+
482
+ def _idea_flow_records(
483
+ g: Graph,
484
+ ids: list[str],
485
+ in_degree: dict[str, int],
486
+ out_degree: dict[str, int],
487
+ in_types: dict[str, Counter],
488
+ out_types: dict[str, Counter],
489
+ communities: dict[str, int],
490
+ limit: int,
491
+ *,
492
+ mode: str,
493
+ ) -> list[dict]:
494
+ if mode not in {"attractor", "generator"}:
495
+ raise ValueError(f"unknown idea flow mode: {mode}")
496
+
497
+ idea_ids = [nid for nid in ids if g.nodes[nid].type == "idea"]
498
+
499
+ def score(node_id: str) -> int:
500
+ if mode == "attractor":
501
+ return in_degree.get(node_id, 0) - out_degree.get(node_id, 0)
502
+ return out_degree.get(node_id, 0) - in_degree.get(node_id, 0)
503
+
504
+ ranked = sorted(
505
+ idea_ids,
506
+ key=lambda nid: (
507
+ -score(nid),
508
+ -max(in_degree.get(nid, 0), out_degree.get(nid, 0)),
509
+ g.nodes[nid].label.lower(),
510
+ ),
511
+ )
512
+
513
+ records = []
514
+ for node_id in ranked:
515
+ if mode == "attractor":
516
+ if in_degree.get(node_id, 0) < 1 or score(node_id) <= 0:
517
+ continue
518
+ prompt = (
519
+ "Is this a durable principle, an unresolved sink, or an over-compressed label? "
520
+ "Write one next action."
521
+ )
522
+ else:
523
+ if out_degree.get(node_id, 0) < 1 or score(node_id) <= 0:
524
+ continue
525
+ prompt = (
526
+ "Which branch deserves leg work next? Choose one edge to operationalize, "
527
+ "verify, or prune."
528
+ )
529
+
530
+ records.append(
531
+ _node_record(
532
+ g,
533
+ node_id,
534
+ score=float(score(node_id)),
535
+ in_degree=in_degree.get(node_id, 0),
536
+ out_degree=out_degree.get(node_id, 0),
537
+ flow_balance=score(node_id),
538
+ inbound_edge_types=dict(in_types.get(node_id, Counter())),
539
+ outbound_edge_types=dict(out_types.get(node_id, Counter())),
540
+ community=communities.get(node_id),
541
+ prompt=prompt,
542
+ )
543
+ )
544
+ if len(records) >= limit:
545
+ break
546
+ return records
547
+
548
+
549
+ def _weak_claim_queue(claims: list[dict], limit: int) -> list[dict]:
550
+ queue = []
551
+ for claim in claims[:limit]:
552
+ prompt = "Choose: verify, downgrade, convert to question, ignore for now."
553
+ if claim.get("kind") == "missing_node_provenance":
554
+ prompt = "Find source evidence or keep this out of durable memory."
555
+ elif claim.get("kind") == "missing_edge_source_id":
556
+ prompt = "Attach a source id or remove this edge from the durable graph."
557
+ elif claim.get("kind") == "edge_confidence":
558
+ prompt = "Inspect this relationship: verify it, downgrade it, or turn it into an open question."
559
+ queue.append(
560
+ {
561
+ **claim,
562
+ "prompt": prompt,
563
+ "review_options": ["verify", "downgrade", "convert_to_question", "ignore_for_now"],
564
+ }
565
+ )
566
+ return queue
567
+
568
+
569
+ def _community_records(
570
+ g: Graph,
571
+ communities: dict[str, int],
572
+ pagerank: dict[str, float],
573
+ degree: dict[str, int],
574
+ core: dict[str, int],
575
+ ) -> list[dict]:
576
+ grouped: dict[int, list[str]] = defaultdict(list)
577
+ for node_id, community_id in communities.items():
578
+ grouped[community_id].append(node_id)
579
+
580
+ records = []
581
+ for community_id, members in sorted(grouped.items(), key=lambda item: (-len(item[1]), item[0])):
582
+ members = sorted(members)
583
+ top_members = sorted(
584
+ members,
585
+ key=lambda nid: (-pagerank.get(nid, 0.0), -degree.get(nid, 0), g.nodes[nid].label.lower()),
586
+ )[:12]
587
+ records.append(
588
+ {
589
+ "id": community_id,
590
+ "size": len(members),
591
+ "types": dict(Counter(g.nodes[nid].type for nid in members)),
592
+ "members": members,
593
+ "top_members": [
594
+ _node_record(
595
+ g,
596
+ nid,
597
+ score=pagerank.get(nid, 0.0),
598
+ degree=degree.get(nid, 0),
599
+ core_number=core.get(nid, 0),
600
+ )
601
+ for nid in top_members
602
+ ],
603
+ }
604
+ )
605
+ return records
606
+
607
+
608
+ def build_memory_audit(g: Graph, *, limit: int = 25, max_communities: int = 12) -> dict:
609
+ ids = _semantic_ids(g)
610
+ id_set = set(ids)
611
+ semantic_edges = _semantic_edges(g, id_set)
612
+ projection_edges = _source_projection_edges(g, id_set)
613
+ directed, undirected = _build_adjacency(ids, semantic_edges)
614
+ _add_projection(directed, undirected, projection_edges)
615
+ degree = _degree(undirected)
616
+ pagerank = _pagerank(ids, directed)
617
+ betweenness = _betweenness(ids, undirected)
618
+ core = _core_numbers(ids, undirected)
619
+ communities = _community_partition(ids, undirected, max_communities=max_communities)
620
+ coverage = _provenance_coverage(g)
621
+ semantic_in_degree, semantic_out_degree = _directed_counts(ids, semantic_edges)
622
+ semantic_in_types, semantic_out_types = _directed_edge_types(ids, semantic_edges)
623
+
624
+ important = _ranked_nodes(g, pagerank, degree, core, communities, limit, include_zero=True)
625
+ bridges = _ranked_nodes(g, betweenness, degree, core, communities, limit)
626
+ structural_core = _ranked_nodes(
627
+ g,
628
+ {nid: float(core.get(nid, 0)) for nid in ids},
629
+ degree,
630
+ core,
631
+ communities,
632
+ limit,
633
+ include_zero=True,
634
+ )
635
+ proof_ids = [record["id"] for record in important] + [record["id"] for record in bridges]
636
+ weak_claims = _weak_claims(g, coverage, limit)
637
+ idea_attractors = _idea_flow_records(
638
+ g,
639
+ ids,
640
+ semantic_in_degree,
641
+ semantic_out_degree,
642
+ semantic_in_types,
643
+ semantic_out_types,
644
+ communities,
645
+ limit,
646
+ mode="attractor",
647
+ )
648
+ idea_generators = _idea_flow_records(
649
+ g,
650
+ ids,
651
+ semantic_in_degree,
652
+ semantic_out_degree,
653
+ semantic_in_types,
654
+ semantic_out_types,
655
+ communities,
656
+ limit,
657
+ mode="generator",
658
+ )
659
+
660
+ return {
661
+ "schema_version": "memory-audit/v1",
662
+ "generated_at": datetime.now(timezone.utc).isoformat(),
663
+ "stats": {
664
+ "nodes": len(g.nodes),
665
+ "edges": len(g.edges),
666
+ "semantic_nodes": len(ids),
667
+ "semantic_edges": len(semantic_edges),
668
+ "source_projection_edges": len(projection_edges),
669
+ "audit_edges": len({tuple(sorted((edge.src, edge.dst))) for edge in semantic_edges} | projection_edges),
670
+ "source_nodes": sum(1 for node in g.nodes.values() if node.type == "source"),
671
+ "semantic_components": len(_connected_components(ids, undirected)) if ids else 0,
672
+ "communities": len(set(communities.values())),
673
+ "max_core_number": max(core.values()) if core else 0,
674
+ },
675
+ "counts": {
676
+ "node_types": dict(Counter(node.type for node in g.nodes.values())),
677
+ "edge_types": dict(Counter(edge.type for edge in g.edges)),
678
+ "confidence": {
679
+ "nodes": dict(Counter(node.confidence for node in g.nodes.values())),
680
+ "edges": dict(Counter(edge.confidence for edge in g.edges)),
681
+ },
682
+ },
683
+ "ranked": {
684
+ "important_concepts": important,
685
+ "bridge_ideas": bridges,
686
+ "idea_attractors": idea_attractors,
687
+ "idea_generators": idea_generators,
688
+ "structural_core": structural_core,
689
+ "weak_claims": weak_claims,
690
+ "weak_claim_queue": _weak_claim_queue(weak_claims, limit),
691
+ "proof_trail": _proof_trail(g, proof_ids, limit),
692
+ },
693
+ "centrality": {
694
+ "pagerank": important,
695
+ "betweenness": bridges,
696
+ "core_number": structural_core,
697
+ "semantic_in_degree": _ranked_nodes(
698
+ g,
699
+ {nid: float(semantic_in_degree.get(nid, 0)) for nid in ids},
700
+ degree,
701
+ core,
702
+ communities,
703
+ limit,
704
+ ),
705
+ "semantic_out_degree": _ranked_nodes(
706
+ g,
707
+ {nid: float(semantic_out_degree.get(nid, 0)) for nid in ids},
708
+ degree,
709
+ core,
710
+ communities,
711
+ limit,
712
+ ),
713
+ },
714
+ "directed_flow": {
715
+ "note": (
716
+ "Directed flow uses semantic edges only. Provenance/source projection edges are excluded "
717
+ "so attractors and generators reflect relationship direction, not citation volume."
718
+ ),
719
+ "idea_attractors": idea_attractors,
720
+ "idea_generators": idea_generators,
721
+ },
722
+ "legwork_queue": {
723
+ "idea_attractors": idea_attractors,
724
+ "idea_generators": idea_generators,
725
+ "weak_claims": _weak_claim_queue(weak_claims, limit),
726
+ },
727
+ "communities": _community_records(g, communities, pagerank, degree, core),
728
+ "low_confidence_edges": [
729
+ _edge_record(g, edge, index)
730
+ for index, edge in enumerate(g.edges)
731
+ if _confidence_is_weak(edge.confidence)
732
+ ],
733
+ "provenance_coverage": coverage,
734
+ }
735
+
736
+
737
+ def _graph_payload(g: Graph) -> dict:
738
+ return {
739
+ "nodes": {node_id: asdict(node) for node_id, node in g.nodes.items()},
740
+ "edges": [asdict(edge) for edge in g.edges],
741
+ }
742
+
743
+
744
+ HTML_TEMPLATE = r"""<!doctype html>
745
+ <html lang="en">
746
+ <meta charset="utf-8" />
747
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
748
+ <title>Memory Audit</title>
749
+ <style>
750
+ :root {
751
+ --bg: #f6f7f9;
752
+ --fg: #17202a;
753
+ --muted: #667085;
754
+ --line: #d7dde5;
755
+ --panel: #ffffff;
756
+ --accent: #0f766e;
757
+ --blue: #2563eb;
758
+ --amber: #a16207;
759
+ --red: #b42318;
760
+ --ink: #111827;
761
+ }
762
+ * { box-sizing: border-box; }
763
+ html, body { margin: 0; min-height: 100%; background: var(--bg); color: var(--fg);
764
+ font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; }
765
+ body { overflow: hidden; }
766
+ header { height: 56px; padding: 0 18px; display: flex; align-items: center; gap: 14px;
767
+ border-bottom: 1px solid var(--line); background: #fff; }
768
+ header h1 { margin: 0; font-size: 18px; letter-spacing: 0; }
769
+ header .meta { color: var(--muted); font-size: 13px; }
770
+ main { height: calc(100vh - 56px); display: grid; grid-template-columns: minmax(360px, 42%) 1fr; }
771
+ #panels { overflow: auto; padding: 14px; display: grid; gap: 12px; align-content: start; }
772
+ #map { min-width: 0; border-left: 1px solid var(--line); display: grid; grid-template-rows: auto 1fr;
773
+ background: #eef2f6; }
774
+ .metric-row { display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 8px; }
775
+ .metric, .panel, #details { background: var(--panel); border: 1px solid var(--line); border-radius: 8px; }
776
+ .metric { padding: 10px 12px; min-width: 0; }
777
+ .metric strong { display: block; font-size: 18px; line-height: 1.1; }
778
+ .metric span { display: block; color: var(--muted); font-size: 12px; margin-top: 3px; overflow-wrap: anywhere; }
779
+ .panel h2 { margin: 0; padding: 12px 12px 4px; font-size: 14px; color: var(--ink); }
780
+ .panel ol, .panel ul { list-style: none; margin: 0; padding: 0 8px 8px; }
781
+ .item { width: 100%; border: 0; background: transparent; text-align: left; padding: 8px;
782
+ border-radius: 6px; cursor: pointer; display: grid; gap: 3px; color: var(--fg); }
783
+ .item:hover, .item.selected { background: #edf7f5; }
784
+ .item-title { font-size: 13px; font-weight: 700; overflow-wrap: anywhere; }
785
+ .item-meta { color: var(--muted); font-size: 12px; overflow-wrap: anywhere; }
786
+ .score { color: var(--accent); font-variant-numeric: tabular-nums; }
787
+ .weak .score { color: var(--red); }
788
+ .toolbar { min-height: 48px; padding: 10px 12px; display: flex; align-items: center; justify-content: space-between;
789
+ gap: 12px; border-bottom: 1px solid var(--line); background: #fff; }
790
+ .toolbar strong { font-size: 14px; }
791
+ .toolbar span { color: var(--muted); font-size: 12px; }
792
+ #stage { position: relative; min-height: 0; }
793
+ svg { width: 100%; height: 100%; display: block; }
794
+ .edge { stroke: #96a1b2; stroke-width: 1.2; stroke-opacity: 0.46; }
795
+ .edge.weak { stroke: var(--red); stroke-dasharray: 5 4; stroke-opacity: 0.7; }
796
+ .node circle { stroke: #fff; stroke-width: 1.5; }
797
+ .node text { fill: #1f2937; font-size: 11px; paint-order: stroke; stroke: #f8fafc; stroke-width: 4px;
798
+ stroke-linecap: round; stroke-linejoin: round; pointer-events: none; }
799
+ .node.dim { opacity: 0.35; }
800
+ .node.selected circle { stroke: var(--accent); stroke-width: 4; }
801
+ #details { position: absolute; left: 12px; bottom: 12px; width: min(460px, calc(100% - 24px));
802
+ max-height: 40%; overflow: auto; padding: 12px; box-shadow: 0 16px 44px rgba(17,24,39,.14); }
803
+ #details h3 { margin: 0 0 4px; font-size: 15px; }
804
+ #details .body, #details li { font-size: 12px; line-height: 1.45; }
805
+ #details .body { margin: 8px 0; }
806
+ #details ul { margin: 6px 0 0; padding-left: 18px; }
807
+ code { color: var(--muted); overflow-wrap: anywhere; }
808
+ .pill { display: inline-flex; align-items: center; min-height: 18px; padding: 1px 6px; border-radius: 999px;
809
+ background: #eef2f6; color: var(--muted); font-size: 11px; }
810
+ .pill.low { color: var(--red); background: #fee4e2; }
811
+ .pill.medium { color: var(--amber); background: #fef0c7; }
812
+ @media (max-width: 900px) {
813
+ body { overflow: auto; }
814
+ main { height: auto; display: block; }
815
+ #panels { overflow: visible; }
816
+ #map { height: 720px; border-left: 0; border-top: 1px solid var(--line); }
817
+ .metric-row { grid-template-columns: repeat(2, minmax(0, 1fr)); }
818
+ }
819
+ </style>
820
+ <body>
821
+ <header>
822
+ <h1>Memory Audit</h1>
823
+ <div class="meta" id="generated"></div>
824
+ </header>
825
+ <main>
826
+ <section id="panels" aria-label="Ranked audit panels"></section>
827
+ <section id="map" aria-label="Graph canvas">
828
+ <div class="toolbar">
829
+ <strong>Graph Canvas</strong>
830
+ <span>Important and bridge nodes are labeled first. Select a panel row or node.</span>
831
+ </div>
832
+ <div id="stage">
833
+ <svg id="graph" viewBox="0 0 1200 760" role="img" aria-label="Memory audit graph"></svg>
834
+ <aside id="details"></aside>
835
+ </div>
836
+ </section>
837
+ </main>
838
+ <script>
839
+ const AUDIT = __AUDIT_JSON__;
840
+ const GRAPH = __GRAPH_JSON__;
841
+ const nodes = Object.values(GRAPH.nodes || {});
842
+ const edges = GRAPH.edges || [];
843
+ const nodeById = new Map(nodes.map(n => [n.id, n]));
844
+ const importantIds = new Set((AUDIT.ranked.important_concepts || []).slice(0, 10).map(n => n.id));
845
+ const bridgeIds = new Set((AUDIT.ranked.bridge_ideas || []).slice(0, 10).map(n => n.id));
846
+ const selected = { id: null };
847
+ const colors = {
848
+ person: "#dc2626", topic: "#2563eb", idea: "#0f766e", project: "#7c3aed",
849
+ goal: "#16a34a", question: "#a16207", decision: "#0891b2", reference: "#c026d3",
850
+ source: "#64748b"
851
+ };
852
+
853
+ function esc(value) {
854
+ return String(value || "").replace(/[&<>"]/g, c => ({
855
+ "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;"
856
+ }[c]));
857
+ }
858
+
859
+ function fmt(value, digits = 3) {
860
+ if (typeof value !== "number") return "";
861
+ return value.toFixed(digits);
862
+ }
863
+
864
+ function confidencePill(value) {
865
+ return `<span class="pill ${esc(value || "")}">${esc(value || "?")}</span>`;
866
+ }
867
+
868
+ function nodeLine(record) {
869
+ const score = typeof record.score === "number" ? `<span class="score">${fmt(record.score)}</span> ` : "";
870
+ return `${score}${esc(record.type)} · <code>${esc(record.id)}</code> ${confidencePill(record.confidence)}`;
871
+ }
872
+
873
+ function claimTitle(claim) {
874
+ if (claim.id) return claim.label || claim.id;
875
+ return `${claim.src || "?"} -> ${claim.dst || "?"}`;
876
+ }
877
+
878
+ function claimMeta(claim) {
879
+ if (claim.kind === "edge_confidence" || claim.kind === "missing_edge_source_id") {
880
+ return `${claim.kind} · ${claim.type || "edge"} · ${claim.confidence || "unknown"}`;
881
+ }
882
+ return `${claim.kind} · ${claim.type || "node"} · ${claim.confidence || "unknown"}`;
883
+ }
884
+
885
+ function renderPanels() {
886
+ document.getElementById("generated").textContent =
887
+ `${AUDIT.stats.nodes} nodes · ${AUDIT.stats.edges} edges · generated ${AUDIT.generated_at}`;
888
+ const coverage = AUDIT.provenance_coverage || {};
889
+ const panels = document.getElementById("panels");
890
+ panels.innerHTML = `
891
+ <div class="metric-row">
892
+ <div class="metric"><strong>${AUDIT.stats.semantic_nodes}</strong><span>semantic nodes</span></div>
893
+ <div class="metric"><strong>${AUDIT.stats.communities}</strong><span>communities</span></div>
894
+ <div class="metric"><strong>${AUDIT.stats.max_core_number}</strong><span>max k-core</span></div>
895
+ <div class="metric"><strong>${Math.round((coverage.node_coverage || 0) * 100)}%</strong><span>provenance coverage</span></div>
896
+ </div>
897
+ ${rankedPanel("Important Concepts", AUDIT.ranked.important_concepts || [], "PageRank over semantic edges")}
898
+ ${rankedPanel("Bridge Ideas", AUDIT.ranked.bridge_ideas || [], "Betweenness centrality")}
899
+ ${flowPanel("Idea Attractors", AUDIT.ranked.idea_attractors || [], "High semantic in-degree, low out-degree")}
900
+ ${flowPanel("Idea Generators", AUDIT.ranked.idea_generators || [], "High semantic out-degree, low in-degree")}
901
+ ${weakPanel("Weak Claim Queue", AUDIT.ranked.weak_claim_queue || AUDIT.ranked.weak_claims || [])}
902
+ ${proofPanel("Proof Trail", AUDIT.ranked.proof_trail || [])}
903
+ `;
904
+ panels.querySelectorAll("[data-node-id]").forEach(el => {
905
+ el.addEventListener("click", () => selectNode(el.dataset.nodeId));
906
+ });
907
+ }
908
+
909
+ function rankedPanel(title, records, subtitle) {
910
+ const rows = records.slice(0, 12).map(record => `
911
+ <li><button class="item" data-node-id="${esc(record.id)}">
912
+ <span class="item-title">${esc(record.label || record.id)}</span>
913
+ <span class="item-meta">${nodeLine(record)} · degree ${record.degree || 0} · core ${record.core_number || 0}</span>
914
+ </button></li>`).join("");
915
+ return `<section class="panel"><h2>${esc(title)}</h2><ol>${rows || `<li class="item-meta" style="padding:8px">None</li>`}</ol></section>`;
916
+ }
917
+
918
+ function flowPanel(title, records, subtitle) {
919
+ const rows = records.slice(0, 12).map(record => `
920
+ <li><button class="item" data-node-id="${esc(record.id)}">
921
+ <span class="item-title">${esc(record.label || record.id)}</span>
922
+ <span class="item-meta">${esc(subtitle)} · in ${record.in_degree || 0} · out ${record.out_degree || 0} · balance ${record.flow_balance || 0}</span>
923
+ <span class="item-meta">${esc(record.prompt || "")}</span>
924
+ </button></li>`).join("");
925
+ return `<section class="panel"><h2>${esc(title)}</h2><ol>${rows || `<li class="item-meta" style="padding:8px">None</li>`}</ol></section>`;
926
+ }
927
+
928
+ function weakPanel(title, claims) {
929
+ const rows = claims.slice(0, 14).map(claim => {
930
+ const nodeId = claim.id || claim.src || claim.dst || "";
931
+ return `<li><button class="item weak" data-node-id="${esc(nodeId)}">
932
+ <span class="item-title">${esc(claimTitle(claim))}</span>
933
+ <span class="item-meta"><span class="score">${esc(claim.confidence || "missing")}</span> ${esc(claimMeta(claim))}</span>
934
+ <span class="item-meta">${esc(claim.prompt || "Choose a review action before this becomes durable memory.")}</span>
935
+ </button></li>`;
936
+ }).join("");
937
+ return `<section class="panel"><h2>${esc(title)}</h2><ul>${rows || `<li class="item-meta" style="padding:8px">No weak claims found</li>`}</ul></section>`;
938
+ }
939
+
940
+ function proofPanel(title, records) {
941
+ const rows = records.slice(0, 10).map(record => {
942
+ const first = (record.provenance || [])[0] || {};
943
+ return `<li><button class="item" data-node-id="${esc(record.id)}">
944
+ <span class="item-title">${esc(record.label || record.id)}</span>
945
+ <span class="item-meta">${esc(first.source_id || "no source")} ${first.excerpt ? "- " + esc(first.excerpt).slice(0, 120) : ""}</span>
946
+ </button></li>`;
947
+ }).join("");
948
+ return `<section class="panel"><h2>${esc(title)}</h2><ul>${rows || `<li class="item-meta" style="padding:8px">No proof trails found</li>`}</ul></section>`;
949
+ }
950
+
951
+ function layoutNodes() {
952
+ const communities = new Map();
953
+ for (const community of AUDIT.communities || []) {
954
+ for (const id of community.members || []) communities.set(id, community.id);
955
+ }
956
+ const cx = 600, cy = 380;
957
+ const semantic = nodes.filter(n => n.type !== "source");
958
+ const sources = nodes.filter(n => n.type === "source");
959
+ semantic.forEach((node, index) => {
960
+ const community = communities.get(node.id) || 0;
961
+ const ring = 120 + (community % 5) * 72;
962
+ const angle = (Math.PI * 2 * index / Math.max(1, semantic.length)) + community * 0.63;
963
+ node.x = cx + Math.cos(angle) * ring;
964
+ node.y = cy + Math.sin(angle) * ring;
965
+ });
966
+ sources.forEach((node, index) => {
967
+ const angle = Math.PI * 2 * index / Math.max(1, sources.length);
968
+ node.x = cx + Math.cos(angle) * 340;
969
+ node.y = cy + Math.sin(angle) * 260;
970
+ });
971
+ }
972
+
973
+ function make(tag, attrs, parent) {
974
+ const el = document.createElementNS("http://www.w3.org/2000/svg", tag);
975
+ for (const [key, value] of Object.entries(attrs || {})) el.setAttribute(key, value);
976
+ parent.appendChild(el);
977
+ return el;
978
+ }
979
+
980
+ function renderGraph() {
981
+ layoutNodes();
982
+ const svg = document.getElementById("graph");
983
+ svg.innerHTML = "";
984
+ const root = make("g", {}, svg);
985
+ for (const edge of edges) {
986
+ const src = nodeById.get(edge.src), dst = nodeById.get(edge.dst);
987
+ if (!src || !dst) continue;
988
+ make("line", {
989
+ class: `edge ${(edge.confidence || "high") === "high" ? "" : "weak"}`,
990
+ x1: src.x, y1: src.y, x2: dst.x, y2: dst.y
991
+ }, root);
992
+ }
993
+ for (const node of nodes) {
994
+ const group = make("g", { class: "node", transform: `translate(${node.x},${node.y})`, "data-id": node.id }, root);
995
+ const radius = importantIds.has(node.id) ? 11 : bridgeIds.has(node.id) ? 10 : node.type === "source" ? 5 : 7;
996
+ make("circle", { r: radius, fill: colors[node.type] || "#475569" }, group);
997
+ if (importantIds.has(node.id) || bridgeIds.has(node.id)) {
998
+ make("text", { x: radius + 5, y: 4 }, group).textContent = node.label || node.id;
999
+ }
1000
+ group.addEventListener("click", ev => { ev.stopPropagation(); selectNode(node.id); });
1001
+ }
1002
+ svg.addEventListener("click", () => selectNode(null));
1003
+ }
1004
+
1005
+ function selectNode(id) {
1006
+ selected.id = id;
1007
+ document.querySelectorAll("[data-node-id]").forEach(el => el.classList.toggle("selected", el.dataset.nodeId === id));
1008
+ document.querySelectorAll(".node").forEach(el => {
1009
+ const isSelected = id && el.dataset.id === id;
1010
+ el.classList.toggle("selected", isSelected);
1011
+ el.classList.toggle("dim", Boolean(id) && !isSelected);
1012
+ });
1013
+ renderDetails(id);
1014
+ }
1015
+
1016
+ function renderDetails(id) {
1017
+ const details = document.getElementById("details");
1018
+ if (!id || !nodeById.has(id)) {
1019
+ details.innerHTML = `<h3>Select a memory</h3><div class="body">Ranked panels are the primary audit view. The canvas is for orientation.</div>`;
1020
+ return;
1021
+ }
1022
+ const node = nodeById.get(id);
1023
+ const rel = edges.filter(e => e.src === id || e.dst === id);
1024
+ const proof = rel.filter(e => e.type === "MENTIONED_IN" || e.type === "MADE_AT");
1025
+ details.innerHTML = `
1026
+ <h3>${esc(node.label || node.id)}</h3>
1027
+ <div class="item-meta">${esc(node.type)} · <code>${esc(node.id)}</code> ${confidencePill(node.confidence)}</div>
1028
+ ${node.body ? `<div class="body">${esc(node.body)}</div>` : ""}
1029
+ <div class="item-meta">Proof trail</div>
1030
+ <ul>${proof.map(e => `<li><code>${esc(e.src === id ? e.dst : e.src)}</code>${e.excerpt ? `: ${esc(e.excerpt)}` : ""}</li>`).join("") || "<li>No provenance edge found.</li>"}</ul>
1031
+ `;
1032
+ }
1033
+
1034
+ renderPanels();
1035
+ renderGraph();
1036
+ renderDetails(null);
1037
+ </script>
1038
+ </body>
1039
+ </html>
1040
+ """
1041
+
1042
+
1043
+ def render_memory_audit_html(g: Graph, analytics: dict, out_path: Path) -> Path:
1044
+ audit_json = json.dumps(analytics, ensure_ascii=False)
1045
+ graph_json = json.dumps(_graph_payload(g), ensure_ascii=False)
1046
+ html = HTML_TEMPLATE.replace("__AUDIT_JSON__", audit_json.replace("</script", "<\\/script"))
1047
+ html = html.replace("__GRAPH_JSON__", graph_json.replace("</script", "<\\/script"))
1048
+ out_path.parent.mkdir(parents=True, exist_ok=True)
1049
+ out_path.write_text(html, encoding="utf-8")
1050
+ return out_path
1051
+
1052
+
1053
+ def _write_json(data: dict, out: str) -> Path | None:
1054
+ if out == "-":
1055
+ print(json.dumps(data, indent=2, sort_keys=True))
1056
+ return None
1057
+ out_path = Path(out).expanduser().resolve()
1058
+ out_path.parent.mkdir(parents=True, exist_ok=True)
1059
+ out_path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
1060
+ return out_path
1061
+
1062
+
1063
+ def run_audit(args: list[str]) -> int:
1064
+ parser = argparse.ArgumentParser(prog="mykg audit")
1065
+ parser.add_argument("--graph", default=None, help="Graph JSON path. Defaults to MYGRAPH_PATH or local graph.")
1066
+ parser.add_argument("--out", default="analytics.json", help="Analytics JSON path, or '-' for stdout.")
1067
+ parser.add_argument("--html", default=None, help="Optional standalone Memory Audit HTML path.")
1068
+ parser.add_argument("--max-items", type=int, default=25, help="Ranked records per panel.")
1069
+ parser.add_argument("--max-communities", type=int, default=12, help="Maximum communities to derive.")
1070
+ parsed = parser.parse_args(args)
1071
+
1072
+ g = Graph.load(parsed.graph)
1073
+ analytics = build_memory_audit(g, limit=parsed.max_items, max_communities=parsed.max_communities)
1074
+ written_json = _write_json(analytics, parsed.out)
1075
+ if written_json:
1076
+ print(f"audit: wrote {written_json}")
1077
+ if parsed.html:
1078
+ html_path = Path(parsed.html).expanduser().resolve()
1079
+ render_memory_audit_html(g, analytics, html_path)
1080
+ print(f"audit: wrote {html_path}")
1081
+ coverage = analytics["provenance_coverage"]
1082
+ status_stream = sys.stderr if parsed.out == "-" else sys.stdout
1083
+ print(
1084
+ "audit: "
1085
+ f"{analytics['stats']['semantic_nodes']} semantic nodes, "
1086
+ f"{analytics['stats']['communities']} communities, "
1087
+ f"{round(coverage['node_coverage'] * 100)}% provenance coverage",
1088
+ file=status_stream,
1089
+ )
1090
+ return 0
1091
+
1092
+
1093
+ if __name__ == "__main__":
1094
+ sys.exit(run_audit(sys.argv[1:]))