knowledge-worker 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mygraph/mygraph.py ADDED
@@ -0,0 +1,773 @@
1
+ """
2
+ mygraph.py — personal knowledge graph (v0 schema, v1 ingest/check/export/viz).
3
+
4
+ Single-file core, stdlib-only. Read SPEC.md and DESIGN.md.
5
+
6
+ Usage:
7
+ mykg seed # populate fictional demo graph
8
+ mykg summary # stats overview
9
+ mykg query "provenance" # search + neighbors + provenance
10
+ mykg path goal:my-goal project:knowledge-worker
11
+ mykg dump # raw JSON
12
+ mykg reset # delete graph file
13
+
14
+ mykg ingest <path/to/file.md> # v1 M1: 5-stage extractor pipeline
15
+ mykg check [--provenance|--stale-edges|--pairs N|--source-candidates DIR]
16
+ mykg export --ttl # v1 M3: emit Turtle
17
+ mykg context # LLM-ready context snapshot
18
+ mykg viz # v1 M4: write offline HTML viewer
19
+ mykg audit # memory audit analytics + optional HTML
20
+ mykg discover # derived-edge proposals + second-order analytics
21
+
22
+ Graph file: ./mygraph.json by default, or MYGRAPH_PATH=/absolute/path.json.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import json
28
+ import os
29
+ import re
30
+ import sys
31
+ from dataclasses import dataclass, asdict, field
32
+ from datetime import datetime, timezone
33
+ from typing import Any, Optional
34
+
35
+ HERE = os.path.dirname(os.path.abspath(__file__))
36
+ DEFAULT_GRAPH_PATH = os.path.join(HERE, "mygraph.json")
37
+
38
+
39
+ def resolve_graph_path(path: Optional[str] = None) -> str:
40
+ raw = path or os.environ.get("MYGRAPH_PATH") or DEFAULT_GRAPH_PATH
41
+ return os.path.abspath(os.path.expanduser(raw))
42
+
43
+
44
+ GRAPH_PATH = resolve_graph_path()
45
+
46
+ # ---------- node + edge types -------------------------------------------------
47
+
48
+ NODE_TYPES = {
49
+ "person", "topic", "idea", "project", "goal",
50
+ "question", "decision", "reference", "source",
51
+ }
52
+
53
+ EDGE_TYPES = {
54
+ "HAS_IDEA", "RELATES_TO", "SUPPORTED_BY", "CHALLENGES",
55
+ "SERVES", "INVOLVES", "ABOUT", "MENTIONED_IN", "MADE_AT",
56
+ }
57
+
58
+ CONFIDENCE = {"high", "medium", "low"}
59
+
60
+
61
+ @dataclass
62
+ class Node:
63
+ id: str
64
+ type: str
65
+ label: str
66
+ body: str = ""
67
+ confidence: str = "high"
68
+ created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
69
+
70
+ # Graph files are external input. The ingest validator keeps new extracted
71
+ # nodes on the public schema, but loading must tolerate legacy/private types.
72
+
73
+
74
+ @dataclass
75
+ class Edge:
76
+ src: str
77
+ dst: str
78
+ type: str
79
+ source_id: str # which Source node this edge was extracted from
80
+ excerpt: str = "" # literal quote if available
81
+ confidence: str = "high"
82
+ created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
83
+ last_seen: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
84
+
85
+
86
+ # ---------- store -------------------------------------------------------------
87
+
88
+ class Graph:
89
+ def __init__(self, nodes: Optional[dict[str, Node]] = None,
90
+ edges: Optional[list[Edge]] = None):
91
+ self.nodes: dict[str, Node] = nodes or {}
92
+ self.edges: list[Edge] = edges or []
93
+
94
+ @classmethod
95
+ def load(cls, path: Optional[str] = None) -> "Graph":
96
+ path = resolve_graph_path(path)
97
+ if not os.path.exists(path):
98
+ return cls()
99
+ with open(path, encoding="utf-8") as f:
100
+ data = json.load(f)
101
+ # forward-compat: drop unknown fields, default missing optional fields
102
+ node_fields = {f.name for f in Node.__dataclass_fields__.values()}
103
+ edge_fields = {f.name for f in Edge.__dataclass_fields__.values()}
104
+ nodes = {
105
+ nid: Node(**{k: v for k, v in n.items() if k in node_fields})
106
+ for nid, n in data.get("nodes", {}).items()
107
+ }
108
+ edges = []
109
+ for e in data.get("edges", []):
110
+ kw = {k: v for k, v in e.items() if k in edge_fields}
111
+ # back-compat: pre-v1 edges lack last_seen → seed with created_at
112
+ if "last_seen" not in kw and "created_at" in kw:
113
+ kw["last_seen"] = kw["created_at"]
114
+ edges.append(Edge(**kw))
115
+ return cls(nodes=nodes, edges=edges)
116
+
117
+ def save(self, path: Optional[str] = None) -> None:
118
+ path = resolve_graph_path(path)
119
+ data = {
120
+ "nodes": {nid: asdict(n) for nid, n in self.nodes.items()},
121
+ "edges": [asdict(e) for e in self.edges],
122
+ }
123
+ os.makedirs(os.path.dirname(path), exist_ok=True)
124
+ with open(path, "w", encoding="utf-8") as f:
125
+ json.dump(data, f, indent=2, sort_keys=True)
126
+
127
+ # --- mutation -------------------------------------------------------------
128
+
129
+ def add_node(self, node: Node) -> Node:
130
+ # idempotent: same id overwrites label/body if they changed but keeps created_at
131
+ if node.id in self.nodes:
132
+ existing = self.nodes[node.id]
133
+ existing.label = node.label
134
+ existing.body = node.body or existing.body
135
+ existing.confidence = node.confidence
136
+ return existing
137
+ self.nodes[node.id] = node
138
+ return node
139
+
140
+ def add_edge(self, edge: Edge) -> Edge:
141
+ # idempotent: dedupe on (src, dst, type, source_id)
142
+ for e in self.edges:
143
+ if (e.src, e.dst, e.type, e.source_id) == (edge.src, edge.dst, edge.type, edge.source_id):
144
+ return e
145
+ # require both endpoints to exist
146
+ if edge.src not in self.nodes:
147
+ raise ValueError(f"edge src missing: {edge.src}")
148
+ if edge.dst not in self.nodes:
149
+ raise ValueError(f"edge dst missing: {edge.dst}")
150
+ self.edges.append(edge)
151
+ return edge
152
+
153
+ # --- introspection --------------------------------------------------------
154
+
155
+ def neighbors(self, node_id: str) -> list[tuple[Edge, Node, str]]:
156
+ """Return (edge, other_node, direction) tuples for a given node."""
157
+ out = []
158
+ for e in self.edges:
159
+ if e.src == node_id and e.dst in self.nodes:
160
+ out.append((e, self.nodes[e.dst], "out"))
161
+ elif e.dst == node_id and e.src in self.nodes:
162
+ out.append((e, self.nodes[e.src], "in"))
163
+ return out
164
+
165
+ def search(self, needle: str) -> list[Node]:
166
+ n = needle.lower().strip()
167
+ hits = []
168
+ for node in self.nodes.values():
169
+ if (n in node.id.lower()
170
+ or n in node.label.lower()
171
+ or n in node.body.lower()):
172
+ hits.append(node)
173
+ return sorted(hits, key=lambda x: (x.type, x.label))
174
+
175
+ def shortest_path(self, src_id: str, dst_id: str) -> Optional[list[str]]:
176
+ """BFS over the undirected projection."""
177
+ if src_id not in self.nodes or dst_id not in self.nodes:
178
+ return None
179
+ adj: dict[str, set[str]] = {nid: set() for nid in self.nodes}
180
+ for e in self.edges:
181
+ adj[e.src].add(e.dst)
182
+ adj[e.dst].add(e.src)
183
+ from collections import deque
184
+ q = deque([(src_id, [src_id])])
185
+ seen = {src_id}
186
+ while q:
187
+ cur, path = q.popleft()
188
+ if cur == dst_id:
189
+ return path
190
+ for nxt in adj[cur]:
191
+ if nxt in seen:
192
+ continue
193
+ seen.add(nxt)
194
+ q.append((nxt, path + [nxt]))
195
+ return None
196
+
197
+ def provenance(self, node_id: str) -> list[tuple[str, str]]:
198
+ """Return [(source_id, excerpt)] for everything that ties this node back to a source."""
199
+ out = []
200
+ for e in self.edges:
201
+ if (e.src == node_id and e.type == "MENTIONED_IN") or \
202
+ (e.dst == node_id and e.type == "MENTIONED_IN"):
203
+ source_id = e.dst if e.src == node_id else e.src
204
+ out.append((source_id, e.excerpt))
205
+ elif e.type == "MADE_AT" and e.src == node_id:
206
+ out.append((e.dst, e.excerpt))
207
+ return out
208
+
209
+
210
+ # ---------- helpers -----------------------------------------------------------
211
+
212
+ def slug(s: str) -> str:
213
+ return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")
214
+
215
+
216
+ def nid(type_: str, label: str) -> str:
217
+ return f"{type_}:{slug(label)}"
218
+
219
+
220
+ def conf_tag(c: str) -> str:
221
+ """Visual flag for non-high confidence. Empty for high (no clutter)."""
222
+ if c == "high":
223
+ return ""
224
+ if c == "medium":
225
+ return " WARN medium (paraphrase)"
226
+ if c == "low":
227
+ return " WARN LOW - UNVERIFIED"
228
+ return f" WARN {c}"
229
+
230
+
231
+ # ---------- seed --------------------------------------------------------------
232
+
233
+ def seed() -> Graph:
234
+ """Write a fictional demo graph to the active graph path.
235
+
236
+ The seed intentionally avoids private-owner facts so generated public
237
+ examples can be committed safely. It spans three fictional work eras
238
+ (a hardware build, a newsletter, and the knowledge-worker toolkit) with
239
+ fixed timestamps, so audit/discover output over the demo graph is
240
+ deterministic and shows multiple communities, bridge ideas, stale
241
+ memories, and low-confidence candidate edges.
242
+ """
243
+ g = Graph()
244
+
245
+ # Three eras: (created_at, last_seen). Era 1 is old enough to go stale.
246
+ ERA1 = ("2026-03-08T10:00:00+00:00", "2026-03-30T10:00:00+00:00")
247
+ ERA2 = ("2026-05-02T10:00:00+00:00", "2026-05-12T10:00:00+00:00")
248
+ ERA3 = ("2026-05-28T10:00:00+00:00", "2026-06-08T10:00:00+00:00")
249
+
250
+ def node(id: str, type_: str, label: str, body: str,
251
+ era: tuple[str, str] = ERA3, confidence: str = "high") -> None:
252
+ g.add_node(Node(id=id, type=type_, label=label, body=body,
253
+ confidence=confidence, created_at=era[0]))
254
+
255
+ # --- era 3: knowledge-worker (the original demo core, unchanged ids) ---
256
+ node("source:demo-notes", "source", "demo-notes.md",
257
+ "Fictional project notes used to demonstrate provenance-backed memory.")
258
+ node("source:architecture-note", "source", "architecture-note.md",
259
+ "Fictional architecture note for the public demo graph.")
260
+ node("person:demo-owner", "person", "Demo Owner",
261
+ "A fictional graph owner used only for public examples.")
262
+ node("project:knowledge-worker", "project", "knowledge-worker",
263
+ "A local-first toolkit for source-backed AI memory.")
264
+ node("idea:context-memory", "idea", "Context memory",
265
+ "AI sessions improve when durable context is stored as concepts instead of loose transcript chunks.")
266
+ node("idea:provenance-first", "idea", "Provenance first",
267
+ "Every durable claim should point back to source evidence.")
268
+ node("idea:scoped-exports", "idea", "Scoped exports",
269
+ "Share a task-sized slice of the graph with an AI, never the whole thing.")
270
+ node("idea:promotion-queue", "idea", "Promotion queue",
271
+ "AI proposes candidate memories; a human review queue decides what is promoted.")
272
+ node("idea:single-owner-tools", "idea", "Single-owner tools",
273
+ "Tools built for exactly one user can cut every scope corner that team software cannot.")
274
+ node("goal:trusted-ai-assistance", "goal", "Trusted AI assistance",
275
+ "Make assistant responses easier to verify and continue across sessions.")
276
+ node("question:storage-backend", "question", "When should storage move beyond JSON?",
277
+ "Open question: keep JSON until size or concurrency makes it awkward.")
278
+ node("question:when-to-automate-ingest", "question", "When should ingest become automatic?",
279
+ "Open question: manual review catches errors, but does not scale past a few notes a week.")
280
+ node("decision:json-first", "decision", "Use JSON first",
281
+ "Start with a simple JSON store before introducing a database.")
282
+ node("decision:markdown-sources", "decision", "Ingest Markdown sources only",
283
+ "Markdown keeps source notes diffable and human-reviewable.")
284
+ node("decision:public-demo-data-only", "decision", "Only fictional data in public examples",
285
+ "The public repo ships fictional demo data; real graphs stay local.")
286
+ node("topic:knowledge-graphs", "topic", "Knowledge graphs",
287
+ "Structured concepts and relationships for durable context.")
288
+ node("topic:local-first", "topic", "Local-first software",
289
+ "Software that keeps user data local unless the owner chooses otherwise.")
290
+ node("topic:provenance", "topic", "Provenance",
291
+ "Where a claim came from and whether it can be traced to evidence.")
292
+ node("reference:coggrag", "reference", "CogGRAG",
293
+ "A public reference about cognition-inspired graph retrieval.", confidence="medium")
294
+ node("reference:graphrag-survey", "reference", "GraphRAG survey",
295
+ "A survey of graph-based retrieval-augmented generation systems.", confidence="medium")
296
+ node("reference:local-first-paper", "reference", "Local-first software paper",
297
+ "The essay defining local-first software principles.")
298
+
299
+ # --- era 1: garden-sensors (an older fictional hardware build) ---------
300
+ node("source:greenhouse-journal", "source", "greenhouse-journal.md",
301
+ "Fictional build journal for a small greenhouse sensor project.", ERA1)
302
+ node("source:hardware-retro", "source", "hardware-retro.md",
303
+ "Fictional retrospective on the greenhouse sensor hardware.", ERA1)
304
+ node("project:garden-sensors", "project", "garden-sensors",
305
+ "A fictional ESP32 sensor network for a backyard greenhouse.", ERA1)
306
+ node("goal:automated-greenhouse", "goal", "Automated greenhouse",
307
+ "Keep greenhouse plants alive with minimal manual checking.", ERA1)
308
+ node("idea:drip-irrigation-loop", "idea", "Drip irrigation loop",
309
+ "Close the loop: soil moisture readings drive the drip valve directly.", ERA1)
310
+ node("idea:low-power-mesh", "idea", "Low-power sensor mesh",
311
+ "Deep-sleep ESP32 nodes could run a mesh for months on small batteries.", ERA1)
312
+ node("idea:sensor-data-as-memory", "idea", "Sensor data as memory",
313
+ "Sensor logs are provenance too: a reading is a source excerpt about the world.", ERA1)
314
+ node("decision:esp32-over-rpi", "decision", "Use ESP32 boards instead of a Raspberry Pi",
315
+ "Microcontrollers beat a full computer for battery-powered sensing.", ERA1)
316
+ node("decision:solar-power-budget", "decision", "Size the solar panel to winter output",
317
+ "Plan the power budget around the worst month, not the average.", ERA1, confidence="medium")
318
+ node("question:battery-life-winter", "question", "Will batteries survive winter?",
319
+ "Open question: cold halves battery capacity; the mesh may not last the season.", ERA1)
320
+ node("topic:embedded-systems", "topic", "Embedded systems",
321
+ "Small computers with hard power and memory constraints.", ERA1)
322
+ node("topic:sensor-networks", "topic", "Sensor networks",
323
+ "Many small devices reporting measurements over a shared protocol.", ERA1)
324
+ node("reference:esp32-deep-sleep-guide", "reference", "ESP32 deep-sleep guide",
325
+ "A public guide to microcontroller deep-sleep power budgets.", ERA1)
326
+
327
+ # --- era 2: field-notes (a fictional newsletter about the projects) ----
328
+ node("source:newsletter-plan", "source", "newsletter-plan.md",
329
+ "Fictional planning note for a monthly build-log newsletter.", ERA2)
330
+ node("source:quarterly-review", "source", "quarterly-review.md",
331
+ "Fictional quarterly review of all side projects.", ERA2)
332
+ node("project:field-notes", "project", "field-notes",
333
+ "A fictional monthly newsletter documenting the owner's builds.", ERA2)
334
+ node("goal:publish-monthly", "goal", "Publish monthly",
335
+ "Ship one newsletter issue every month without heroics.", ERA2)
336
+ node("goal:sustainable-side-projects", "goal", "Sustainable side projects",
337
+ "Keep hobby projects fun and finished instead of abandoned.", ERA2)
338
+ node("idea:write-what-you-build", "idea", "Write what you build",
339
+ "Each project becomes newsletter material; writing pressure improves the build.", ERA2)
340
+ node("idea:show-the-graph", "idea", "Show the graph",
341
+ "Publish the project knowledge graph itself as the newsletter artifact.", ERA2)
342
+ node("idea:weekly-shipping-log", "idea", "Weekly shipping log",
343
+ "A small weekly log makes the monthly issue almost write itself.", ERA2)
344
+ node("idea:boring-tech-default", "idea", "Boring tech by default",
345
+ "Choose boring technology unless the project IS the experiment.", ERA2)
346
+ node("decision:monthly-cadence", "decision", "Commit to a monthly cadence",
347
+ "Monthly is slow enough to sustain and fast enough to matter.", ERA2)
348
+ node("decision:plain-text-newsletter", "decision", "Plain-text newsletter format",
349
+ "No templates, no images: plain text ships on time.", ERA2)
350
+ node("decision:one-project-per-quarter", "decision", "One active project per quarter",
351
+ "Serialize side projects instead of running them in parallel.", ERA2)
352
+ node("question:audience-growth", "question", "Does the newsletter need an audience?",
353
+ "Open question: writing for zero readers may not stay motivating.", ERA2)
354
+ node("topic:technical-writing", "topic", "Technical writing",
355
+ "Explaining systems in prose, for readers and future self.", ERA2)
356
+ node("reference:digital-garden-essay", "reference", "Digital garden essay",
357
+ "A public essay on publishing evolving notes instead of finished posts.",
358
+ ERA2, confidence="medium")
359
+
360
+ def edge(src: str, dst: str, type_: str, source_id: str,
361
+ excerpt: str = "", confidence: str = "high",
362
+ era: tuple[str, str] = ERA3) -> None:
363
+ g.add_edge(Edge(src=src, dst=dst, type=type_, source_id=source_id,
364
+ excerpt=excerpt, confidence=confidence,
365
+ created_at=era[0], last_seen=era[1]))
366
+
367
+ src_demo = "source:demo-notes"
368
+ src_arch = "source:architecture-note"
369
+ src_journal = "source:greenhouse-journal"
370
+ src_retro = "source:hardware-retro"
371
+ src_plan = "source:newsletter-plan"
372
+ src_review = "source:quarterly-review"
373
+
374
+ # --- provenance: every non-source node is MENTIONED_IN a source --------
375
+ for node_id, excerpt in [
376
+ ("person:demo-owner", "The demo owner wants assistant memory that survives across sessions."),
377
+ ("project:knowledge-worker", "Build a local-first toolkit for source-backed AI memory."),
378
+ ("idea:context-memory", "Store durable concepts instead of loose transcript chunks."),
379
+ ("idea:provenance-first", "Every durable claim needs source evidence."),
380
+ ("idea:scoped-exports", "Export a task-sized slice, never the whole graph."),
381
+ ("idea:single-owner-tools", "Software for one user can cut every corner."),
382
+ ("goal:trusted-ai-assistance", "Make assistant responses easier to verify and continue."),
383
+ ("decision:public-demo-data-only", "Only fictional data ships in public examples."),
384
+ ("topic:knowledge-graphs", "Use a graph of concepts and relationships."),
385
+ ("topic:local-first", "Keep owner data local unless explicitly exported."),
386
+ ("reference:local-first-paper", "The local-first paper defines the storage principles."),
387
+ ]:
388
+ edge(node_id, src_demo, "MENTIONED_IN", src_demo, excerpt)
389
+
390
+ for node_id, excerpt in [
391
+ ("question:storage-backend", "When should storage move beyond JSON?"),
392
+ ("question:when-to-automate-ingest", "Manual review is the bottleneck; when does ingest become automatic?"),
393
+ ("decision:json-first", "Use JSON first; add a database only when needed."),
394
+ ("decision:markdown-sources", "Ingest Markdown only; it stays diffable and reviewable."),
395
+ ("idea:promotion-queue", "Candidates wait in a review queue until the owner promotes them."),
396
+ ("topic:provenance", "Track where every claim came from."),
397
+ ("reference:coggrag", "CogGRAG is a public reference for graph retrieval."),
398
+ ("reference:graphrag-survey", "The GraphRAG survey maps the retrieval landscape."),
399
+ ("idea:sensor-data-as-memory", "Sensor readings could enter the graph as provenance-backed observations."),
400
+ ]:
401
+ edge(node_id, src_arch, "MENTIONED_IN", src_arch, excerpt)
402
+
403
+ for node_id, excerpt in [
404
+ ("project:garden-sensors", "Garden sensors: a small ESP32 build for the greenhouse."),
405
+ ("goal:automated-greenhouse", "The greenhouse should mostly look after itself."),
406
+ ("idea:drip-irrigation-loop", "Let soil moisture drive the drip valve directly."),
407
+ ("idea:sensor-data-as-memory", "A sensor log line is an excerpt about the world."),
408
+ ("decision:solar-power-budget", "Size the panel for December, not for June."),
409
+ ("topic:sensor-networks", "A handful of nodes reporting over one protocol."),
410
+ ("topic:provenance", "Sensor logs are evidence: timestamped, source-attributed readings."),
411
+ ]:
412
+ edge(node_id, src_journal, "MENTIONED_IN", src_journal, excerpt, era=ERA1)
413
+
414
+ for node_id, excerpt in [
415
+ ("idea:low-power-mesh", "Deep-sleep nodes might run for months on small cells."),
416
+ ("decision:esp32-over-rpi", "The Pi drew too much power; ESP32 boards won."),
417
+ ("question:battery-life-winter", "Cold weather halves capacity; winter is the real test."),
418
+ ("topic:embedded-systems", "Hard power budgets change every design choice."),
419
+ ("reference:esp32-deep-sleep-guide", "The deep-sleep guide documents real-world power draw."),
420
+ ]:
421
+ edge(node_id, src_retro, "MENTIONED_IN", src_retro, excerpt, era=ERA1)
422
+
423
+ for node_id, excerpt in [
424
+ ("project:field-notes", "Field notes: a monthly build-log newsletter."),
425
+ ("project:garden-sensors", "The greenhouse build is the first newsletter arc."),
426
+ ("goal:publish-monthly", "One issue a month, no heroics."),
427
+ ("idea:write-what-you-build", "Every project doubles as newsletter material."),
428
+ ("idea:show-the-graph", "Publish the knowledge graph as the artifact."),
429
+ ("idea:weekly-shipping-log", "A weekly log makes the monthly issue write itself."),
430
+ ("decision:monthly-cadence", "Monthly is sustainable; weekly was not."),
431
+ ("decision:plain-text-newsletter", "Plain text ships on time."),
432
+ ("topic:technical-writing", "Writing about systems clarifies them."),
433
+ ("reference:digital-garden-essay", "The digital-garden essay argues for evolving notes."),
434
+ ]:
435
+ edge(node_id, src_plan, "MENTIONED_IN", src_plan, excerpt, era=ERA2)
436
+
437
+ for node_id, excerpt in [
438
+ ("goal:sustainable-side-projects", "Projects should end finished, not abandoned."),
439
+ ("idea:write-what-you-build", "Writing pressure kept the sensor build honest."),
440
+ ("idea:boring-tech-default", "Boring tech unless the project is the experiment."),
441
+ ("decision:one-project-per-quarter", "Serialize projects; parallel ones all stall."),
442
+ ("project:garden-sensors", "The sensor project consumed the whole quarter."),
443
+ ("question:audience-growth", "Is writing for zero readers sustainable?"),
444
+ ]:
445
+ edge(node_id, src_review, "MENTIONED_IN", src_review, excerpt, era=ERA2)
446
+
447
+ # --- semantic edges: era 3 (knowledge-worker) ---------------------------
448
+ edge("person:demo-owner", "idea:context-memory", "HAS_IDEA", src_demo)
449
+ edge("person:demo-owner", "idea:provenance-first", "HAS_IDEA", src_demo)
450
+ edge("project:knowledge-worker", "goal:trusted-ai-assistance", "SERVES", src_demo)
451
+ edge("project:knowledge-worker", "topic:knowledge-graphs", "INVOLVES", src_demo)
452
+ edge("project:knowledge-worker", "topic:local-first", "INVOLVES", src_demo)
453
+ edge("idea:context-memory", "topic:knowledge-graphs", "RELATES_TO", src_demo)
454
+ edge("idea:provenance-first", "goal:trusted-ai-assistance", "SERVES", src_demo)
455
+ edge("decision:json-first", "question:storage-backend", "ABOUT", src_arch)
456
+ edge("idea:context-memory", "reference:coggrag", "SUPPORTED_BY", src_arch,
457
+ excerpt="CogGRAG is a public reference for graph retrieval.", confidence="medium")
458
+ edge("idea:scoped-exports", "topic:local-first", "RELATES_TO", src_demo)
459
+ edge("idea:scoped-exports", "goal:trusted-ai-assistance", "SERVES", src_demo)
460
+ edge("idea:scoped-exports", "reference:local-first-paper", "SUPPORTED_BY", src_demo)
461
+ edge("idea:promotion-queue", "topic:provenance", "RELATES_TO", src_arch)
462
+ edge("idea:promotion-queue", "reference:graphrag-survey", "SUPPORTED_BY", src_arch,
463
+ confidence="medium")
464
+ edge("idea:single-owner-tools", "topic:local-first", "RELATES_TO", src_demo)
465
+ edge("idea:provenance-first", "topic:provenance", "RELATES_TO", src_arch)
466
+ edge("decision:markdown-sources", "topic:provenance", "ABOUT", src_arch)
467
+ edge("decision:public-demo-data-only", "goal:trusted-ai-assistance", "SERVES", src_demo)
468
+ edge("decision:public-demo-data-only", "topic:local-first", "RELATES_TO", src_demo)
469
+
470
+ # --- semantic edges: era 1 (garden-sensors) -----------------------------
471
+ edge("project:garden-sensors", "goal:automated-greenhouse", "SERVES", src_journal, era=ERA1)
472
+ edge("project:garden-sensors", "topic:embedded-systems", "INVOLVES", src_retro, era=ERA1)
473
+ edge("project:garden-sensors", "topic:sensor-networks", "INVOLVES", src_journal, era=ERA1)
474
+ edge("person:demo-owner", "idea:drip-irrigation-loop", "HAS_IDEA", src_journal, era=ERA1)
475
+ edge("idea:drip-irrigation-loop", "goal:automated-greenhouse", "SERVES", src_journal, era=ERA1)
476
+ edge("idea:low-power-mesh", "topic:sensor-networks", "RELATES_TO", src_retro,
477
+ confidence="low", era=ERA1)
478
+ edge("idea:low-power-mesh", "reference:esp32-deep-sleep-guide", "SUPPORTED_BY",
479
+ src_retro, excerpt="The deep-sleep guide documents real-world power draw.", era=ERA1)
480
+ edge("question:battery-life-winter", "idea:low-power-mesh", "CHALLENGES", src_retro, era=ERA1)
481
+ edge("decision:esp32-over-rpi", "topic:embedded-systems", "ABOUT", src_retro, era=ERA1)
482
+ edge("decision:solar-power-budget", "goal:automated-greenhouse", "SERVES", src_journal,
483
+ confidence="medium", era=ERA1)
484
+ # bridge: the hardware project reaches into the memory toolkit's domain
485
+ edge("idea:sensor-data-as-memory", "topic:knowledge-graphs", "RELATES_TO", src_arch)
486
+ edge("idea:sensor-data-as-memory", "topic:sensor-networks", "RELATES_TO", src_journal, era=ERA1)
487
+
488
+ # --- semantic edges: era 2 (field-notes) --------------------------------
489
+ edge("project:field-notes", "goal:publish-monthly", "SERVES", src_plan, era=ERA2)
490
+ edge("project:field-notes", "topic:technical-writing", "INVOLVES", src_plan, era=ERA2)
491
+ edge("person:demo-owner", "idea:write-what-you-build", "HAS_IDEA", src_plan, era=ERA2)
492
+ edge("idea:weekly-shipping-log", "goal:publish-monthly", "SERVES", src_plan, era=ERA2)
493
+ edge("idea:weekly-shipping-log", "topic:technical-writing", "RELATES_TO", src_plan, era=ERA2)
494
+ edge("idea:show-the-graph", "reference:digital-garden-essay", "SUPPORTED_BY",
495
+ src_plan, confidence="low", era=ERA2)
496
+ # bridge: the newsletter idea reaches into the memory toolkit's domain
497
+ edge("idea:show-the-graph", "topic:knowledge-graphs", "RELATES_TO", src_plan, era=ERA2)
498
+ edge("idea:boring-tech-default", "goal:sustainable-side-projects", "SERVES", src_review, era=ERA2)
499
+ edge("decision:monthly-cadence", "goal:publish-monthly", "SERVES", src_plan, era=ERA2)
500
+ edge("decision:plain-text-newsletter", "topic:technical-writing", "ABOUT", src_plan, era=ERA2)
501
+ edge("decision:one-project-per-quarter", "goal:sustainable-side-projects", "SERVES",
502
+ src_review, era=ERA2)
503
+ edge("question:audience-growth", "goal:publish-monthly", "CHALLENGES", src_review, era=ERA2)
504
+
505
+ g.save()
506
+ return g
507
+
508
+
509
+ # ---------- summary / query ---------------------------------------------------
510
+
511
+ def summary() -> None:
512
+ g = Graph.load()
513
+ by_type: dict[str, int] = {}
514
+ for n in g.nodes.values():
515
+ by_type[n.type] = by_type.get(n.type, 0) + 1
516
+ edge_by_type: dict[str, int] = {}
517
+ for e in g.edges:
518
+ edge_by_type[e.type] = edge_by_type.get(e.type, 0) + 1
519
+
520
+ print(f"mygraph - {resolve_graph_path()}")
521
+ print(f" {len(g.nodes)} nodes, {len(g.edges)} edges")
522
+ print()
523
+ print(" Nodes by type:")
524
+ for t in sorted(by_type):
525
+ print(f" {t:<12} {by_type[t]}")
526
+ print()
527
+ print(" Edges by type:")
528
+ for t in sorted(edge_by_type):
529
+ print(f" {t:<14} {edge_by_type[t]}")
530
+
531
+
532
+ def query(needle: str) -> None:
533
+ g = Graph.load()
534
+ hits = g.search(needle)
535
+ if not hits:
536
+ print(f"No nodes match '{needle}'.")
537
+ return
538
+
539
+ # Surface non-high confidence summary at the top
540
+ non_high = [n for n in hits if n.confidence != "high"]
541
+ if non_high:
542
+ print(f"WARN {len(non_high)} of {len(hits)} matched node(s) are NOT high-confidence - see flags below.")
543
+ print(f"Matches for '{needle}':\n")
544
+
545
+ for node in hits:
546
+ # Prominent confidence flag on the header line
547
+ print(f" [{node.type}] {node.id}{conf_tag(node.confidence)}")
548
+ print(f" label: {node.label}")
549
+ if node.body:
550
+ print(f" body : {node.body}")
551
+ # Edges, with confidence flag for non-high edges
552
+ nbrs = g.neighbors(node.id)
553
+ if nbrs:
554
+ print(f" edges:")
555
+ for e, other, direction in nbrs:
556
+ arrow = "->" if direction == "out" else "<-"
557
+ ex = f" // \"{e.excerpt}\"" if e.excerpt else ""
558
+ edge_flag = conf_tag(e.confidence)
559
+ target_flag = conf_tag(other.confidence)
560
+ print(f" {arrow} {e.type:<13} {other.id}{target_flag}{ex}{edge_flag}")
561
+ # Provenance
562
+ prov = g.provenance(node.id)
563
+ if prov:
564
+ print(f" provenance:")
565
+ for source_id, ex in prov:
566
+ tag = f' "{ex}"' if ex else ""
567
+ print(f" <- {source_id}{tag}")
568
+ # If the node is non-high, repeat the warning at the end too.
569
+ if node.confidence != "high":
570
+ print(f" WARN Treat content as confidence={node.confidence}; do not quote as verbatim source.")
571
+ print()
572
+
573
+
574
+ def path(a: str, b: str) -> None:
575
+ g = Graph.load()
576
+ p = g.shortest_path(a, b)
577
+ if not p:
578
+ print(f"No path between {a} and {b}.")
579
+ return
580
+ print(f"Path from {a} to {b}:")
581
+ for node_id in p:
582
+ n = g.nodes[node_id]
583
+ print(f" [{n.type}] {n.label} ({n.id})")
584
+
585
+
586
+ def dump() -> None:
587
+ with open(resolve_graph_path(), encoding="utf-8") as f:
588
+ print(f.read())
589
+
590
+
591
+ def reset() -> None:
592
+ path = resolve_graph_path()
593
+ if os.path.exists(path):
594
+ os.remove(path)
595
+ print(f"Deleted {path}")
596
+ else:
597
+ print("No graph file to delete.")
598
+
599
+
600
+ def list_nodes(type_: str) -> None:
601
+ """Return ALL nodes of a given type, so type listings are always complete
602
+ (search-style retrieval can silently miss members of a category)."""
603
+ # Accept plural ("decisions" → "decision")
604
+ t = type_.lower().rstrip("s")
605
+ g = Graph.load()
606
+ matches = [n for n in g.nodes.values() if n.type == t]
607
+ if not matches:
608
+ if t not in NODE_TYPES:
609
+ observed = sorted(NODE_TYPES | {n.type for n in g.nodes.values()})
610
+ print(f"No nodes of type '{t}'. Known/observed: {', '.join(observed)}")
611
+ return
612
+ print(f"No nodes of type '{t}'.")
613
+ return
614
+ non_high = [n for n in matches if n.confidence != "high"]
615
+ if non_high:
616
+ print(f"WARN {len(non_high)} of {len(matches)} are NOT high-confidence.")
617
+ print(f"All {t}s ({len(matches)}):\n")
618
+ for n in sorted(matches, key=lambda x: x.label):
619
+ print(f" [{n.type}] {n.id}{conf_tag(n.confidence)}")
620
+ print(f" {n.label}")
621
+ if n.body:
622
+ body = n.body if len(n.body) < 200 else n.body[:200] + "..."
623
+ print(f" {body}")
624
+ print()
625
+
626
+
627
+ def state(entry: str) -> None:
628
+ """Append a manual mood/state entry to state_log.jsonl. Sidecar — does NOT
629
+ touch the main graph (per SPEC §5 sidecar track)."""
630
+ log = os.path.join(HERE, "state_log.jsonl")
631
+ record = {
632
+ "ts": datetime.now(timezone.utc).isoformat(),
633
+ "entry": entry,
634
+ }
635
+ with open(log, "a", encoding="utf-8") as f:
636
+ f.write(json.dumps(record) + "\n")
637
+ print(f"Logged state -> {log}")
638
+ print(f" {record['ts']}: {entry}")
639
+
640
+
641
+ # ---------- CLI ---------------------------------------------------------------
642
+
643
+ USAGE = """\
644
+ Usage:
645
+ mykg seed
646
+ mykg summary
647
+ mykg query <string>
648
+ mykg list <type> # all nodes of a type (decision, goal, idea, ...)
649
+ mykg path <node_id> <node_id>
650
+ mykg state "<entry>" # append mood/state to state_log.jsonl (sidecar)
651
+ mykg dump
652
+ mykg reset
653
+ mykg ingest <path/to/file.md> [--non-interactive] [--auto-accept-high]
654
+ [--candidates-file <path>]
655
+ [--backend claude|openai|ollama] [--model <name>]
656
+ mykg check [--provenance] [--stale-edges] [--pairs N]
657
+ [--source-candidates <dir>]
658
+ mykg export --ttl [--out <path>]
659
+ mykg context [--out <path>] [--max-ideas N]
660
+ mykg viz [--graph <path>] [--out <path>] [--no-open]
661
+ mykg audit [--graph <path>] [--out analytics.json] [--html memory_audit.html]
662
+ mykg discover [--graph <path>] [--out discovery.json]
663
+ [--candidates <path>] [--limit N] [--stale-days N]
664
+ """
665
+
666
+
667
+ def main(argv: Optional[list[str]] = None) -> int:
668
+ argv = sys.argv if argv is None else argv
669
+ if len(argv) < 2:
670
+ print(USAGE)
671
+ return 1
672
+ cmd = argv[1]
673
+ if cmd in {"-h", "--help", "help"}:
674
+ print(USAGE)
675
+ return 0
676
+ if cmd == "seed":
677
+ g = seed()
678
+ print(f"Seeded. {len(g.nodes)} nodes, {len(g.edges)} edges -> {resolve_graph_path()}")
679
+ return 0
680
+ if cmd == "summary":
681
+ summary()
682
+ return 0
683
+ if cmd == "query":
684
+ if len(argv) < 3:
685
+ print("Need a query string.")
686
+ return 1
687
+ query(" ".join(argv[2:]))
688
+ return 0
689
+ if cmd == "path":
690
+ if len(argv) < 4:
691
+ print("Need two node ids.")
692
+ return 1
693
+ path(argv[2], argv[3])
694
+ return 0
695
+ if cmd == "dump":
696
+ dump()
697
+ return 0
698
+ if cmd == "reset":
699
+ reset()
700
+ return 0
701
+ if cmd == "list":
702
+ if len(argv) < 3:
703
+ print("Need a node type. Valid: " + ", ".join(sorted(NODE_TYPES)))
704
+ return 1
705
+ list_nodes(argv[2])
706
+ return 0
707
+ if cmd == "state":
708
+ if len(argv) < 3:
709
+ print("Need a state entry. Example: mykg state \"focused, 10:30am, coffee\"")
710
+ return 1
711
+ state(" ".join(argv[2:]))
712
+ return 0
713
+ if cmd == "ingest":
714
+ if __package__:
715
+ from .ingest import run_ingest
716
+ else:
717
+ from ingest import run_ingest
718
+ return run_ingest(argv[2:])
719
+ if cmd == "check":
720
+ if __package__:
721
+ from .check import run_check
722
+ else:
723
+ from check import run_check
724
+ return run_check(argv[2:])
725
+ if cmd == "export":
726
+ if __package__:
727
+ from .owl_io import run_export
728
+ else:
729
+ from owl_io import run_export
730
+ return run_export(argv[2:])
731
+ if cmd in {"context", "export_context"}:
732
+ if __package__:
733
+ from .export_context import run_export_context
734
+ else:
735
+ from export_context import run_export_context
736
+ return run_export_context(argv[2:])
737
+ if cmd == "viz":
738
+ if __package__:
739
+ from .viz import run_viz
740
+ else:
741
+ from viz import run_viz
742
+ return run_viz(argv[2:])
743
+ if cmd == "audit":
744
+ if __package__:
745
+ from .memory_audit import run_audit
746
+ else:
747
+ from memory_audit import run_audit
748
+ return run_audit(argv[2:])
749
+ if cmd == "discover":
750
+ if __package__:
751
+ from .discover import run_discover
752
+ else:
753
+ from discover import run_discover
754
+ return run_discover(argv[2:])
755
+ print(USAGE)
756
+ return 1
757
+
758
+
759
+ def cli() -> int:
760
+ for stream in (sys.stdout, sys.stderr):
761
+ if hasattr(stream, "reconfigure"):
762
+ stream.reconfigure(errors="replace")
763
+ try:
764
+ return main(sys.argv)
765
+ except BrokenPipeError:
766
+ # Piped output closed early (e.g. `mykg query x | head`); exit quietly.
767
+ devnull = os.open(os.devnull, os.O_WRONLY)
768
+ os.dup2(devnull, sys.stdout.fileno())
769
+ return 0
770
+
771
+
772
+ if __name__ == "__main__":
773
+ sys.exit(cli())