@0dai-dev/cli 4.3.5 → 4.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +12 -11
  2. package/bin/0dai.js +214 -40
  3. package/lib/ai/manifest/mcp-exposure-contract.json +121 -0
  4. package/lib/ai/meta/manifest/mcp-tool-tiers.json +435 -0
  5. package/lib/ai/registry/mcp-catalog.json +98 -0
  6. package/lib/commands/auth.js +55 -1
  7. package/lib/commands/compliance.js +1 -1
  8. package/lib/commands/detect.js +10 -4
  9. package/lib/commands/doctor.js +545 -26
  10. package/lib/commands/experience.js +40 -5
  11. package/lib/commands/export.js +73 -0
  12. package/lib/commands/feedback.js +157 -15
  13. package/lib/commands/gh.js +26 -0
  14. package/lib/commands/graph.js +9 -4
  15. package/lib/commands/heatmap.js +1 -1
  16. package/lib/commands/init.js +222 -30
  17. package/lib/commands/mcp.js +129 -21
  18. package/lib/commands/models.js +138 -41
  19. package/lib/commands/provider.js +30 -59
  20. package/lib/commands/quota.js +1 -1
  21. package/lib/commands/receipt.js +1 -1
  22. package/lib/commands/run.js +18 -7
  23. package/lib/commands/runner.js +31 -1
  24. package/lib/commands/status.js +44 -11
  25. package/lib/commands/swarm.js +130 -12
  26. package/lib/commands/trust.js +286 -0
  27. package/lib/commands/update.js +184 -38
  28. package/lib/commands/usage.js +1 -1
  29. package/lib/commands/validate.js +32 -3
  30. package/lib/commands/vault.js +46 -9
  31. package/lib/python/__init__.py +0 -0
  32. package/lib/python/agent_quotas.py +525 -0
  33. package/lib/python/anomaly_alert.py +397 -0
  34. package/lib/python/anti_pattern_detector.py +799 -0
  35. package/lib/python/auth.py +443 -0
  36. package/lib/python/capi_profile_guard.py +477 -0
  37. package/lib/python/compliance_report.py +581 -0
  38. package/lib/python/drift_detector.py +388 -0
  39. package/lib/python/experience_pipeline.py +1130 -0
  40. package/lib/python/graph.py +19 -0
  41. package/lib/python/graph_core.py +293 -0
  42. package/lib/python/graph_io.py +179 -0
  43. package/lib/python/graph_legacy.py +2052 -0
  44. package/lib/python/graph_legacy_helpers.py +221 -0
  45. package/lib/python/graph_outcomes_core.py +85 -0
  46. package/lib/python/graph_queries.py +171 -0
  47. package/lib/python/graph_slice.py +198 -0
  48. package/lib/python/graph_slicer.py +576 -0
  49. package/lib/python/graph_slicer_cli.py +60 -0
  50. package/lib/python/graph_validation.py +64 -0
  51. package/lib/python/heatmap.py +934 -0
  52. package/lib/python/json_utils.py +193 -0
  53. package/lib/python/mcp_exposure_check.py +247 -0
  54. package/lib/python/model_router.py +1434 -0
  55. package/lib/python/project_manager.py +621 -0
  56. package/lib/python/provider_profiles.py +1618 -0
  57. package/lib/python/provider_registry.py +1211 -0
  58. package/lib/python/provider_registry_cli.py +125 -0
  59. package/lib/python/receipt_png.py +727 -0
  60. package/lib/python/structural_memory.py +325 -0
  61. package/lib/python/swarm_cost.py +177 -0
  62. package/lib/python/usage_ledger.py +569 -0
  63. package/lib/scripts/mcp_tier_config.py +240 -0
  64. package/lib/shared.js +97 -14
  65. package/lib/tui/index.mjs +35174 -0
  66. package/lib/utils/activation_telemetry.js +230 -11
  67. package/lib/utils/constants.js +7 -1
  68. package/lib/utils/export-bundler.js +285 -0
  69. package/lib/utils/identity.js +198 -1
  70. package/lib/utils/mcp-auth.js +81 -15
  71. package/lib/utils/plan.js +1 -1
  72. package/lib/vault/index.js +19 -3
  73. package/lib/vault/storage.js +21 -2
  74. package/lib/wizard.js +5 -2
  75. package/package.json +9 -3
  76. package/scripts/build-python-bundle.js +106 -0
  77. package/scripts/build-tui.js +14 -1
  78. package/scripts/harvest_experience.py +523 -0
  79. package/scripts/postinstall.js +15 -9
@@ -0,0 +1,2052 @@
1
+ #!/usr/bin/env python3
2
+ # pragma: loc-waiver — #1069 split: pure helpers extracted to graph_legacy_helpers.py (2245->2052); residual surplus is stateful graph-mutation + constraint/outcome/deliberation + CLI logic, deferred to a follow-up
3
+ """0dai Project Context Graph — typed knowledge graph for project state.
4
+
5
+ Implements the schema from docs/project-context-graph.md (derived from
6
+ `user_submitted/project context graph schema.pdf`):
7
+
8
+ - 10 node types (§1): Component, Technology, Decision, Requirement, Risk,
9
+ TestPlan, Endpoint, DesignArtifact, MarketEntity, Session, Deliberation
10
+ - 20+ edge types (§2): architecture, decision, quality, design, market,
11
+ ownership, session
12
+ - JSON adjacency list format (§3): `project_graph.json`
13
+ - Traversal algorithm (§4): anchor extraction → BFS 2-hop expand →
14
+ role-filter → serialize ≤400 tokens
15
+ - Query patterns (§8): decisions_for, tech_context, open_risks, impact,
16
+ stale_tech, unsatisfied_reqs
17
+
18
+ Distinct from `scripts/generate_project_layer.py` (which generates flat
19
+ YAML) — this module manages the TYPED graph that deliberations mutate.
20
+
21
+ Bootstrap from project-layer.yaml is in `scripts/generate_project_graph.py`.
22
+ Integration into working_group._build_context_slice is M14 work.
23
+
24
+ Design notes:
25
+ - Flat JSON adjacency list until graph exceeds 500 nodes (per PDF §6.3).
26
+ Migration path to Kuzu/SurrealDB/Neo4j is a future concern.
27
+ - No PyYAML dependency. Stdlib only (constraint_no_go in project-layer).
28
+ - Node IDs follow `<type_prefix>_<slug>` convention for grep-friendly
29
+ category filtering without reading node.type field.
30
+ - Thread-safe for single-writer (working_group orchestrator) only.
31
+ Concurrent mutation from multiple processes is not supported.
32
+ """
33
+ from __future__ import annotations
34
+
35
+ import datetime as _dt
36
+ import fnmatch # noqa: F401 retained for back-compat re-export (graph facade star-imports graph_legacy)
37
+ import json
38
+ import logging
39
+ import pathlib
40
+ import re
41
+ from typing import Any, Iterable, Optional
42
+
43
+ from graph_legacy_helpers import ( # noqa: F401 re-export for back-compat (#757 #1069)
44
+ _extract_keywords,
45
+ _extract_violation_pattern,
46
+ _line_matches_constraint_rule,
47
+ _parse_constraints_yaml,
48
+ _parse_diff_lines,
49
+ _path_patterns_overlap,
50
+ matches_constraint_diff,
51
+ )
52
+
53
+ log = logging.getLogger("0dai.graph")
54
+
55
+ SCHEMA_VERSION = 1
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # Node + edge type registries (PDF §1-§2)
59
+ # ---------------------------------------------------------------------------
60
+
61
+ # Node types — the 10 taxonomic categories plus meta categories.
62
+ # Each node's `type` field MUST be a member of this set, or validation fails.
63
+ #
64
+ # `Outcome` added in M13 P2 for EP-02 (Outcome Tracking & Error Memory).
65
+ # An Outcome node records what actually happened after a Decision was
66
+ # applied in practice — status, lessons learned, tags for semantic search.
67
+ NODE_TYPES = frozenset({
68
+ "Component", # product parts: services, modules, pages
69
+ "Technology", # external tech: frameworks, languages, libraries
70
+ "Decision", # architectural/product decisions from deliberations
71
+ "Requirement", # functional + non-functional requirements
72
+ "Risk", # identified concerns + open risks
73
+ "TestPlan", # test strategy nodes
74
+ "Endpoint", # API endpoints + entry points
75
+ "DesignArtifact", # UX mockups, flows, brand artifacts
76
+ "MarketEntity", # competitors, target segments, market forces
77
+ "Session", # meta: a work session
78
+ "Deliberation", # meta: a working-group deliberation
79
+ "Outcome", # M13 P2 EP-02: retrospective evaluation of a Decision
80
+ "Artifact", # M19 P0: release artifacts (versions, tags)
81
+ "Event", # M19 P0: session/meta events for timeline queries
82
+ "Constraint", # #479: architecture constraints — hard rules from decisions
83
+ })
84
+
85
+ # Prefix convention for node IDs. Helps filter/grep by category without
86
+ # loading the full node object.
87
+ NODE_ID_PREFIXES: dict[str, str] = {
88
+ "Component": "comp",
89
+ "Technology": "tech",
90
+ "Decision": "dec",
91
+ "Requirement": "req",
92
+ "Risk": "risk",
93
+ "TestPlan": "test",
94
+ "Endpoint": "ep",
95
+ "DesignArtifact": "design",
96
+ "MarketEntity": "mkt",
97
+ "Session": "session",
98
+ "Deliberation": "delib",
99
+ "Outcome": "outcome", # M13 P2 EP-02
100
+ "Artifact": "artifact", # M19 P0: release artifacts
101
+ "Event": "event", # M19 P0: session/meta events
102
+ "Constraint": "cstr", # #479: architecture constraints
103
+ }
104
+
105
+ # Edge types — the 20+ relation categories. Each edge's `type` field MUST
106
+ # be a member of this set. See docs/project-context-graph.md §2 for
107
+ # direction semantics + allowed source/target type pairs.
108
+ EDGE_TYPES = frozenset({
109
+ # Architecture
110
+ "uses", # Component -> Technology
111
+ "depends_on", # Component -> Component
112
+ "exposes", # Component -> Endpoint
113
+ "part_of", # Component -> Component (sub-module relation)
114
+ # Decision
115
+ "affects", # Decision -> Component | Technology | Requirement
116
+ "chose", # Decision -> Technology (picked this over alt)
117
+ "satisfies", # Decision -> Requirement
118
+ "supersedes", # Decision -> Decision (new replaces old)
119
+ "introduces", # Decision -> Risk (new risk from this choice)
120
+ "mitigates", # Decision -> Risk (resolves existing risk)
121
+ "decided_in", # Decision -> Deliberation
122
+ # Quality
123
+ "covers", # TestPlan -> Component | Requirement
124
+ "tests", # TestPlan -> Component
125
+ "blocks", # Risk -> Component | Decision
126
+ "violates", # Component -> Requirement (currently failing)
127
+ # Design
128
+ "designs", # DesignArtifact -> Component
129
+ "follows", # DesignArtifact -> DesignArtifact (style guide)
130
+ # Market
131
+ "targets", # Component | Decision -> MarketEntity
132
+ "competes_with", # MarketEntity -> MarketEntity
133
+ # Ownership
134
+ "owned_by", # Component -> Session (last-touched bookkeeping)
135
+ "identified_by", # Risk -> Session | Deliberation
136
+ "created_by", # Decision -> Deliberation
137
+ "approved_by", # Decision -> Session
138
+ # Session
139
+ "produced", # Session -> Decision | Component
140
+ "updated", # Session -> Component
141
+ # M13 P2 EP-02: Outcome Tracking
142
+ "evaluates", # Outcome -> Decision (retrospective evaluation)
143
+ "decision_outcome", # Decision -> Outcome (task-result feedback loop)
144
+ # EP: Decision Ancestry
145
+ "decision_ancestry", # Decision -> Decision (child influenced by parent)
146
+ # M19 P0: Graph dogfood
147
+ "released_as", # Decision -> Artifact (decision shipped in version)
148
+ "contains", # Artifact -> Decision (version contains decisions)
149
+ "triggered_by", # Event -> Session (event caused by session)
150
+ "observed_in", # Event -> Artifact (event observed in version)
151
+ # #479: Architecture Constraints
152
+ "declared_by", # Constraint -> Decision (constraint derived from this decision)
153
+ "implies", # Constraint -> Constraint (constraint A implies constraint B)
154
+ "constrains", # Constraint -> Component | Technology (scope of constraint)
155
+ "forbids", # Constraint -> Component | Technology (anti-pattern scope)
156
+ })
157
+
158
+ # Valid status values for Outcome nodes per PDF enhancement pack EP-02.
159
+ # `confirmed`: decision played out as predicted
160
+ # `revised`: decision needed adjustment but core idea was sound
161
+ # `reverted`: decision was rolled back, lesson learned
162
+ # `partially_applied`: decision implemented partially, rest is still pending
163
+ OUTCOME_STATUSES = frozenset({
164
+ "confirmed",
165
+ "revised",
166
+ "reverted",
167
+ "partially_applied",
168
+ })
169
+
170
+ # Stage-based review threshold for outcomes (PDF EP-02).
171
+ # Decisions older than this threshold without an Outcome get flagged.
172
+ # Matches project-layer.yaml `stage` values; fallback is 30 days.
173
+ STAGE_OUTCOME_THRESHOLDS_DAYS: dict[str, int] = {
174
+ "idea": 14,
175
+ "mvp": 14,
176
+ "growth": 30,
177
+ "scale": 60,
178
+ }
179
+ DEFAULT_STAGE_THRESHOLD_DAYS = 30
180
+
181
+ # Role type-interest map (PDF §4 step 3). When building a context slice
182
+ # for a given role, only include nodes whose type is in the role's
183
+ # interest set. Unknown roles fall back to ALL_TYPES (no filter).
184
+ ROLE_TYPE_INTERESTS: dict[str, frozenset[str]] = {
185
+ "cto": frozenset({"Decision", "Technology", "Risk", "Component", "Requirement", "Constraint"}),
186
+ "arch": frozenset({"Component", "Technology", "Endpoint", "Requirement", "Risk", "Constraint"}),
187
+ "designer": frozenset({"DesignArtifact", "Component", "Requirement"}),
188
+ "art_director": frozenset({"DesignArtifact", "Component", "MarketEntity"}),
189
+ "qa": frozenset({"TestPlan", "Component", "Risk", "Requirement", "Constraint"}),
190
+ "security": frozenset({"Risk", "Requirement", "Component", "Technology", "Constraint"}),
191
+ "sre": frozenset({"Component", "Technology", "Risk", "Endpoint", "Constraint"}),
192
+ "cmo": frozenset({"MarketEntity", "Requirement", "Component", "Decision"}),
193
+ }
194
+
195
+ # Default edge weight when caller does not supply one. Traversal uses
196
+ # weight to decide whether hop-2 edges are worth following (threshold
197
+ # in expand_bfs).
198
+ DEFAULT_EDGE_WEIGHT = 1.0
199
+ HOP2_WEIGHT_THRESHOLD = 0.5 # hop-2 edges below this are pruned
200
+
201
+ # Serialization token budget for context slice (PDF §4 step 4).
202
+ # 4 chars per token is a coarse but conservative estimate (matches OpenAI
203
+ # tokenizer for English text).
204
+ CHARS_PER_TOKEN = 4
205
+ DEFAULT_SLICE_TOKEN_BUDGET = 400
206
+
207
+ # ---------------------------------------------------------------------------
208
+ # M14: Provenance — "what 0dai KNOWS vs what 0dai THINKS" (Torvalds critique)
209
+ # ---------------------------------------------------------------------------
210
+ #
211
+ # Every node carries a `source_type` field indicating how the data was
212
+ # derived. Deterministic sources (file_parse, git_diff, operator) are
213
+ # "KNOWS" — they can be trusted as facts. LLM-derived sources
214
+ # (scout_ai, deliberation_ai) are "THINKS" — they're opinions that may
215
+ # hallucinate. CLI output must visually distinguish the two so users
216
+ # don't treat LLM opinions as facts.
217
+ #
218
+ # Per Torvalds (2026-04-08 01:06 UTC):
219
+ # "Чётко раздели: вот что 0dai ЗНАЕТ (из графа, из файлов, из API headers).
220
+ # Вот что 0dai ДУМАЕТ (из LLM inference). Никогда не смешивай."
221
+
222
+ DETERMINISTIC_SOURCES = frozenset({
223
+ "bootstrap", # generate_project_graph.py from project-layer.yaml
224
+ "file_parse", # detected from package.json, pyproject.toml, etc.
225
+ "git_diff", # parsed from git diff (regex, not LLM)
226
+ "operator", # manually entered via 0dai graph add / 0dai learn
227
+ "tool_output", # npm_audit, eslint, lighthouse, etc. (EP-03)
228
+ })
229
+
230
+ AI_DERIVED_SOURCES = frozenset({
231
+ "scout_ai", # scout CLI web search findings
232
+ "deliberation_ai", # working-group synthesis extraction
233
+ "hard_block", # parsed from deliberation hard block strings (LLM-derived)
234
+ "red_team_ai", # EP-04 adversarial review
235
+ "forecast_ai", # EP-14 temporal simulation
236
+ "pattern_ai", # EP-09 cross-project pattern matching
237
+ })
238
+
239
+ DEFAULT_SOURCE = "operator"
240
+
241
+
242
+ def is_deterministic_source(source_type: str) -> bool:
243
+ """Return True if the source is considered 'KNOWS' (vs 'THINKS')."""
244
+ return source_type in DETERMINISTIC_SOURCES
245
+
246
+
247
+ def source_marker(source_type: str) -> str:
248
+ """Return a visual marker for CLI output: ✓ for KNOWS, ~ for THINKS."""
249
+ if is_deterministic_source(source_type):
250
+ return "✓"
251
+ if source_type in AI_DERIVED_SOURCES:
252
+ return "~"
253
+ return "?"
254
+
255
+
256
+ # ---------------------------------------------------------------------------
257
+ # Graph construction helpers
258
+ # ---------------------------------------------------------------------------
259
+
260
+ def empty_graph() -> dict:
261
+ """Return a fresh, empty graph with schema metadata populated.
262
+
263
+ Callers should prefer this over building a dict by hand — it
264
+ guarantees `nodes`, `edges`, and `meta` keys are present with the
265
+ right types, which keeps validation passing.
266
+ """
267
+ now = _now_iso()
268
+ return {
269
+ "nodes": {},
270
+ "edges": [],
271
+ "meta": {
272
+ "schema_version": SCHEMA_VERSION,
273
+ "created_at": now,
274
+ "updated_at": now,
275
+ "node_count": 0,
276
+ "edge_count": 0,
277
+ },
278
+ }
279
+
280
+
281
+ def _now_iso() -> str:
282
+ """Return UTC timestamp in ISO 8601 format with trailing Z."""
283
+ return _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
284
+
285
+
286
+ def _slug(text: str) -> str:
287
+ """Produce a grep-friendly id fragment from arbitrary text."""
288
+ cleaned = re.sub(r"[^a-zA-Z0-9]+", "_", text.strip().lower())
289
+ return cleaned.strip("_") or "unnamed"
290
+
291
+
292
+ def make_node_id(node_type: str, name: str) -> str:
293
+ """Construct a canonical node id from type + free-form name.
294
+
295
+ Example:
296
+ make_node_id("Technology", "Next.js 14") -> "tech_next_js_14"
297
+ """
298
+ prefix = NODE_ID_PREFIXES.get(node_type)
299
+ if not prefix:
300
+ raise ValueError(f"unknown node type: {node_type}")
301
+ return f"{prefix}_{_slug(name)}"
302
+
303
+
304
+ def add_node(
305
+ graph: dict,
306
+ node_id: str,
307
+ node_type: str,
308
+ name: str,
309
+ *,
310
+ status: str = "active",
311
+ description: str = "",
312
+ source_type: str = DEFAULT_SOURCE,
313
+ extra: Optional[dict[str, Any]] = None,
314
+ ) -> dict:
315
+ """Add (or update in place) a node in the graph.
316
+
317
+ Returns the node dict after insertion. Idempotent — calling twice
318
+ with the same id updates the existing node's fields instead of
319
+ raising. This matches the mutation-queue conflict resolution policy
320
+ from PDF §5.3 (duplicate id → update).
321
+
322
+ M14 addition: `source_type` marks the node as deterministic (KNOWS)
323
+ or AI-derived (THINKS). See DETERMINISTIC_SOURCES / AI_DERIVED_SOURCES
324
+ module-level constants. CLI output distinguishes the two so users
325
+ don't treat LLM opinions as facts (per Torvalds critique).
326
+ """
327
+ if node_type not in NODE_TYPES:
328
+ raise ValueError(f"unknown node type: {node_type!r}")
329
+ if not node_id:
330
+ raise ValueError("node id must be non-empty")
331
+
332
+ now = _now_iso()
333
+ existing = graph["nodes"].get(node_id)
334
+ if existing:
335
+ existing["type"] = node_type
336
+ existing["name"] = name
337
+ existing["status"] = status
338
+ existing["description"] = description
339
+ existing["updated_at"] = now
340
+ # Source-type update rules (M14 Torvalds-separation):
341
+ # - new is deterministic: always overwrite (operator/file wins)
342
+ # - new is AI AND existing is deterministic: REJECT (protect facts)
343
+ # - new is AI AND existing is AI: overwrite (fresher AI signal)
344
+ existing_source = existing.get("source_type", DEFAULT_SOURCE)
345
+ if is_deterministic_source(source_type):
346
+ existing["source_type"] = source_type
347
+ elif not is_deterministic_source(existing_source):
348
+ existing["source_type"] = source_type
349
+ # else: AI trying to overwrite deterministic → silently ignored
350
+ if extra:
351
+ existing.update(extra)
352
+ graph["meta"]["updated_at"] = now
353
+ return existing
354
+
355
+ node = {
356
+ "id": node_id,
357
+ "type": node_type,
358
+ "name": name,
359
+ "status": status,
360
+ "description": description,
361
+ "source_type": source_type,
362
+ "created_at": now,
363
+ "updated_at": now,
364
+ }
365
+ if extra:
366
+ node.update(extra)
367
+ graph["nodes"][node_id] = node
368
+ graph["meta"]["node_count"] = len(graph["nodes"])
369
+ graph["meta"]["updated_at"] = now
370
+ return node
371
+
372
+
373
+ def add_edge(
374
+ graph: dict,
375
+ source: str,
376
+ target: str,
377
+ edge_type: str,
378
+ *,
379
+ weight: float = DEFAULT_EDGE_WEIGHT,
380
+ extra: Optional[dict[str, Any]] = None,
381
+ ) -> dict:
382
+ """Add a directed edge to the graph.
383
+
384
+ No duplicate detection — callers that want "upsert" semantics should
385
+ check first with `find_edge`. This matches PDF §5.2: mutation queue
386
+ applies atomically, and orchestrator is the single writer.
387
+
388
+ Edge format:
389
+ {"from": source, "to": target, "type": edge_type,
390
+ "weight": weight, "created_at": iso}
391
+
392
+ Raises ValueError if source/target don't exist in graph (placeholder
393
+ nodes should be created first per §5.3 conflict resolution).
394
+ """
395
+ if edge_type not in EDGE_TYPES:
396
+ raise ValueError(f"unknown edge type: {edge_type!r}")
397
+ if source not in graph["nodes"]:
398
+ raise ValueError(f"edge source {source!r} not in graph")
399
+ if target not in graph["nodes"]:
400
+ raise ValueError(f"edge target {target!r} not in graph")
401
+
402
+ edge = {
403
+ "from": source,
404
+ "to": target,
405
+ "type": edge_type,
406
+ "weight": float(weight),
407
+ "created_at": _now_iso(),
408
+ }
409
+ if extra:
410
+ edge.update(extra)
411
+ graph["edges"].append(edge)
412
+ graph["meta"]["edge_count"] = len(graph["edges"])
413
+ graph["meta"]["updated_at"] = edge["created_at"]
414
+ return edge
415
+
416
+
417
+ def find_edge(
418
+ graph: dict,
419
+ source: str,
420
+ target: str,
421
+ edge_type: Optional[str] = None,
422
+ ) -> Optional[dict]:
423
+ """Return first matching edge or None."""
424
+ for edge in graph["edges"]:
425
+ if edge["from"] == source and edge["to"] == target:
426
+ if edge_type is None or edge["type"] == edge_type:
427
+ return edge
428
+ return None
429
+
430
+
431
+ def outgoing_edges(graph: dict, node_id: str) -> list[dict]:
432
+ """Return all edges where node_id is the source."""
433
+ return [e for e in graph["edges"] if e["from"] == node_id]
434
+
435
+
436
+ def incoming_edges(graph: dict, node_id: str) -> list[dict]:
437
+ """Return all edges where node_id is the target."""
438
+ return [e for e in graph["edges"] if e["to"] == node_id]
439
+
440
+
441
+ def nodes_by_type(graph: dict, node_type: str) -> list[dict]:
442
+ """Return all nodes of a given type, sorted by id for stability."""
443
+ return sorted(
444
+ (n for n in graph["nodes"].values() if n.get("type") == node_type),
445
+ key=lambda n: n["id"],
446
+ )
447
+
448
+
449
+ def _usage_path(path: pathlib.Path) -> pathlib.Path:
450
+ """Return the sidecar path used for graph usage counters."""
451
+ path = pathlib.Path(path)
452
+ return path.with_name(f"{path.stem}_usage{path.suffix}")
453
+
454
+
455
+ def _default_usage() -> dict:
456
+ """Return a fresh graph usage payload."""
457
+ return {
458
+ "schema_version": 1,
459
+ "updated_at": _now_iso(),
460
+ "totals": {
461
+ "loads": 0,
462
+ "queries": 0,
463
+ "updates": 0,
464
+ "saves": 0,
465
+ },
466
+ "operations": {},
467
+ "recent": [],
468
+ }
469
+
470
+
471
+ def load_graph_usage(path: pathlib.Path) -> dict:
472
+ """Load graph usage counters for a graph path."""
473
+ usage_path = _usage_path(path)
474
+ if not usage_path.exists():
475
+ return _default_usage()
476
+ try:
477
+ payload = json.loads(usage_path.read_text(encoding="utf-8"))
478
+ except (json.JSONDecodeError, OSError):
479
+ return _default_usage()
480
+
481
+ default = _default_usage()
482
+ if not isinstance(payload, dict):
483
+ return default
484
+
485
+ totals = payload.get("totals")
486
+ if not isinstance(totals, dict):
487
+ payload["totals"] = default["totals"]
488
+ else:
489
+ for key, value in default["totals"].items():
490
+ totals[key] = int(totals.get(key, value) or 0)
491
+
492
+ operations = payload.get("operations")
493
+ if not isinstance(operations, dict):
494
+ payload["operations"] = {}
495
+
496
+ recent = payload.get("recent")
497
+ if not isinstance(recent, list):
498
+ payload["recent"] = []
499
+
500
+ payload["schema_version"] = int(payload.get("schema_version", 1) or 1)
501
+ payload["updated_at"] = str(payload.get("updated_at") or default["updated_at"])
502
+ return payload
503
+
504
+
505
+ def save_graph_usage(path: pathlib.Path, usage: dict) -> None:
506
+ """Persist graph usage counters to the sidecar file."""
507
+ usage_path = _usage_path(path)
508
+ usage_path.parent.mkdir(parents=True, exist_ok=True)
509
+ usage_path.write_text(json.dumps(usage, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
510
+
511
+
512
+ def record_graph_usage(
513
+ graph: dict,
514
+ operation: str,
515
+ *,
516
+ kind: str = "queries",
517
+ count: int = 1,
518
+ ) -> None:
519
+ """Increment usage counters for a graph-backed operation.
520
+
521
+ Graphs loaded through load_graph() carry a private `_usage_path`
522
+ marker so callers can bump counters without threading file paths
523
+ through every query helper.
524
+ """
525
+ usage_path_raw = graph.get("_usage_path")
526
+ if not usage_path_raw:
527
+ return
528
+
529
+ try:
530
+ graph_path = pathlib.Path(str(usage_path_raw))
531
+ payload = load_graph_usage(graph_path)
532
+ except (TypeError, ValueError):
533
+ return
534
+
535
+ totals = payload.setdefault("totals", _default_usage()["totals"])
536
+ if kind in totals:
537
+ totals[kind] = int(totals.get(kind, 0) or 0) + count
538
+ operations = payload.setdefault("operations", {})
539
+ operations[operation] = int(operations.get(operation, 0) or 0) + count
540
+ payload["updated_at"] = _now_iso()
541
+ recent = payload.setdefault("recent", [])
542
+ recent.insert(0, {
543
+ "operation": operation,
544
+ "kind": kind,
545
+ "count": count,
546
+ "at": payload["updated_at"],
547
+ })
548
+ del recent[20:]
549
+
550
+ try:
551
+ save_graph_usage(graph_path, payload)
552
+ except OSError:
553
+ return
554
+
555
+
556
+ def summarize_graph_usage(path: pathlib.Path) -> dict:
557
+ """Load a graph usage ledger and add a compact top-operations view."""
558
+ usage = load_graph_usage(path)
559
+ operations = usage.get("operations", {})
560
+ if isinstance(operations, dict):
561
+ usage["top_operations"] = [
562
+ {"operation": name, "count": count}
563
+ for name, count in sorted(
564
+ ((str(name), int(count or 0)) for name, count in operations.items()),
565
+ key=lambda item: (-item[1], item[0]),
566
+ )[:8]
567
+ ]
568
+ else:
569
+ usage["top_operations"] = []
570
+ return usage
571
+
572
+
573
+ # ---------------------------------------------------------------------------
574
+ # Validation (for round-trip + bootstrap safety)
575
+ # ---------------------------------------------------------------------------
576
+
577
+ class GraphValidationError(ValueError):
578
+ """Raised when a graph fails structural validation."""
579
+
580
+
581
+ def validate_graph(graph: dict) -> list[str]:
582
+ """Return a list of validation errors (empty list == valid).
583
+
584
+ Soft-checks (logs warning but does NOT fail):
585
+ - orphan nodes (no incoming + no outgoing edges)
586
+ - dangling edges (referencing unknown node id)
587
+
588
+ Hard-checks (appended to error list):
589
+ - missing `nodes`, `edges`, `meta` keys
590
+ - node missing `type` or `id`
591
+ - node type not in NODE_TYPES
592
+ - edge type not in EDGE_TYPES
593
+ - schema_version mismatch
594
+ """
595
+ errors: list[str] = []
596
+
597
+ for key in ("nodes", "edges", "meta"):
598
+ if key not in graph:
599
+ errors.append(f"missing top-level key: {key}")
600
+ if errors:
601
+ return errors
602
+
603
+ if not isinstance(graph["nodes"], dict):
604
+ errors.append("nodes must be a dict")
605
+ if not isinstance(graph["edges"], list):
606
+ errors.append("edges must be a list")
607
+ if errors:
608
+ return errors
609
+
610
+ meta_version = graph["meta"].get("schema_version")
611
+ if meta_version != SCHEMA_VERSION:
612
+ errors.append(
613
+ f"schema_version mismatch: expected {SCHEMA_VERSION}, got {meta_version}"
614
+ )
615
+
616
+ for node_id, node in graph["nodes"].items():
617
+ if node.get("id") != node_id:
618
+ errors.append(f"node id mismatch: key={node_id}, node.id={node.get('id')}")
619
+ node_type = node.get("type")
620
+ if node_type not in NODE_TYPES:
621
+ errors.append(f"node {node_id} has unknown type: {node_type!r}")
622
+ for field in ("name", "status", "created_at", "updated_at"):
623
+ if field not in node:
624
+ errors.append(f"node {node_id} missing required field: {field}")
625
+
626
+ seen_edges: set[tuple] = set()
627
+ for i, edge in enumerate(graph["edges"]):
628
+ for field in ("from", "to", "type"):
629
+ if field not in edge:
630
+ errors.append(f"edge[{i}] missing field: {field}")
631
+ continue
632
+ edge_type = edge.get("type")
633
+ if edge_type not in EDGE_TYPES:
634
+ errors.append(f"edge[{i}] has unknown type: {edge_type!r}")
635
+ src, tgt = edge.get("from"), edge.get("to")
636
+ if src not in graph["nodes"]:
637
+ log.warning("edge[%d] dangling source: %s", i, src)
638
+ if tgt not in graph["nodes"]:
639
+ log.warning("edge[%d] dangling target: %s", i, tgt)
640
+ key = (src, tgt, edge_type)
641
+ if key in seen_edges:
642
+ log.warning("edge[%d] duplicate: %s -> %s [%s]", i, src, tgt, edge_type)
643
+ seen_edges.add(key)
644
+
645
+ return errors
646
+
647
+
648
+ # ---------------------------------------------------------------------------
649
+ # JSON I/O
650
+ # ---------------------------------------------------------------------------
651
+
652
+ def load_graph(path: pathlib.Path) -> dict:
653
+ """Load graph from a JSON file. Returns empty_graph() if file absent.
654
+
655
+ Absence is not an error — many 0dai projects start without a graph
656
+ and bootstrap lazily. Callers that need "must exist" semantics should
657
+ check path.exists() first.
658
+ """
659
+ path = pathlib.Path(path)
660
+ if not path.exists():
661
+ log.info("graph file %s does not exist; returning empty graph", path)
662
+ graph = empty_graph()
663
+ graph["_usage_path"] = str(path)
664
+ record_graph_usage(graph, "load_graph", kind="loads")
665
+ return graph
666
+
667
+ with path.open("r", encoding="utf-8") as f:
668
+ data = json.load(f)
669
+ data["_usage_path"] = str(path)
670
+ record_graph_usage(data, "load_graph", kind="loads")
671
+
672
+ errors = validate_graph(data)
673
+ if errors:
674
+ raise GraphValidationError(
675
+ f"graph at {path} failed validation:\n " + "\n ".join(errors)
676
+ )
677
+ return data
678
+
679
+
680
+ def save_graph(path: pathlib.Path, graph: dict, *, validate: bool = True) -> None:
681
+ """Write graph to a JSON file with stable key ordering.
682
+
683
+ Stable ordering is critical for Git-friendly diffs (per M13 plan
684
+ "Git-friendly — stable file names, predictable frontmatter order,
685
+ minimal diff noise"). Nodes sorted by id, edges sorted by
686
+ (from, to, type).
687
+ """
688
+ path = pathlib.Path(path)
689
+ if validate:
690
+ errors = validate_graph(graph)
691
+ if errors:
692
+ raise GraphValidationError(
693
+ "refusing to save invalid graph:\n " + "\n ".join(errors)
694
+ )
695
+
696
+ graph["meta"]["updated_at"] = _now_iso()
697
+ graph["meta"]["node_count"] = len(graph["nodes"])
698
+ graph["meta"]["edge_count"] = len(graph["edges"])
699
+
700
+ stable = {
701
+ "meta": graph["meta"],
702
+ "nodes": dict(sorted(graph["nodes"].items())),
703
+ "edges": sorted(
704
+ graph["edges"],
705
+ key=lambda e: (e.get("from", ""), e.get("to", ""), e.get("type", "")),
706
+ ),
707
+ }
708
+
709
+ path.parent.mkdir(parents=True, exist_ok=True)
710
+ with path.open("w", encoding="utf-8") as f:
711
+ json.dump(stable, f, indent=2, ensure_ascii=False)
712
+ f.write("\n")
713
+ graph["_usage_path"] = str(path)
714
+ record_graph_usage(graph, "save_graph", kind="saves")
715
+
716
+
717
+ # ---------------------------------------------------------------------------
718
+ # Traversal: anchor extraction → BFS expand → role filter → serialize (§4)
719
+ # ---------------------------------------------------------------------------
720
+
721
+ def extract_anchors(graph: dict, task_text: str, max_anchors: int = 8) -> list[str]:
722
+ """Find node ids that plausibly anchor to the task text (PDF §4 step 1).
723
+
724
+ Matching strategy (in priority order):
725
+ 1. Exact node id match (`comp_api_gateway` appears verbatim in text)
726
+ 2. Node name substring match (case-insensitive)
727
+ 3. Slugified name match (for tech like "Next.js" -> "next_js")
728
+
729
+ Returns at most `max_anchors` ids, deduped, order preserved by
730
+ match score (id match beats name match beats slug match).
731
+
732
+ Empty task text returns an empty list — traversal caller should
733
+ fall back to "all tech nodes" via tech_context() in that case.
734
+ """
735
+ record_graph_usage(graph, "extract_anchors", kind="queries")
736
+ if not task_text.strip():
737
+ return []
738
+
739
+ text_lower = task_text.lower()
740
+ scores: dict[str, int] = {}
741
+
742
+ for node_id, node in graph["nodes"].items():
743
+ # Score 3: exact id match
744
+ if node_id in task_text:
745
+ scores[node_id] = max(scores.get(node_id, 0), 3)
746
+ continue
747
+
748
+ name = node.get("name", "")
749
+ name_lower = name.lower()
750
+ if name_lower and name_lower in text_lower:
751
+ scores[node_id] = max(scores.get(node_id, 0), 2)
752
+ continue
753
+
754
+ slug = _slug(name)
755
+ if slug and len(slug) >= 3 and slug in text_lower:
756
+ scores[node_id] = max(scores.get(node_id, 0), 1)
757
+
758
+ ranked = sorted(scores.items(), key=lambda kv: (-kv[1], kv[0]))
759
+ return [node_id for node_id, _score in ranked[:max_anchors]]
760
+
761
+
762
+ def expand_bfs(
763
+ graph: dict,
764
+ anchors: Iterable[str],
765
+ *,
766
+ hops: int = 2,
767
+ hop2_weight_threshold: float = HOP2_WEIGHT_THRESHOLD,
768
+ ) -> set[str]:
769
+ """BFS expand from anchor nodes up to `hops` (PDF §4 step 2).
770
+
771
+ Hop 1 follows all edges unconditionally. Hop 2 only follows edges
772
+ with weight >= hop2_weight_threshold (the "edge weight filter" from
773
+ the spec — prevents hop-2 explosion for densely-connected graphs).
774
+
775
+ Both outgoing and incoming edges are traversed — for a context
776
+ slice, "components that use this tech" and "tech used by this
777
+ component" are equally relevant.
778
+ """
779
+ record_graph_usage(graph, "expand_bfs", kind="queries")
780
+ if hops < 1:
781
+ raise ValueError(f"hops must be >= 1, got {hops}")
782
+
783
+ visited: set[str] = set()
784
+ frontier: set[str] = set()
785
+
786
+ for anchor in anchors:
787
+ if anchor in graph["nodes"]:
788
+ visited.add(anchor)
789
+ frontier.add(anchor)
790
+
791
+ for hop in range(1, hops + 1):
792
+ next_frontier: set[str] = set()
793
+ for node_id in frontier:
794
+ for edge in graph["edges"]:
795
+ if edge["from"] != node_id and edge["to"] != node_id:
796
+ continue
797
+ if hop >= 2 and edge.get("weight", DEFAULT_EDGE_WEIGHT) < hop2_weight_threshold:
798
+ continue
799
+ neighbor = edge["to"] if edge["from"] == node_id else edge["from"]
800
+ if neighbor not in visited and neighbor in graph["nodes"]:
801
+ next_frontier.add(neighbor)
802
+ visited.add(neighbor)
803
+ if not next_frontier:
804
+ break
805
+ frontier = next_frontier
806
+
807
+ return visited
808
+
809
+
810
+ def filter_by_role(
811
+ graph: dict,
812
+ node_ids: Iterable[str],
813
+ role: str,
814
+ ) -> set[str]:
815
+ """Keep only nodes whose type is in the role's interest set (§4 step 3).
816
+
817
+ Unknown roles (not in ROLE_TYPE_INTERESTS) return the input set
818
+ unchanged — cautious default, better to over-include for an
819
+ unfamiliar role than drop context silently.
820
+ """
821
+ record_graph_usage(graph, "filter_by_role", kind="queries")
822
+ interests = ROLE_TYPE_INTERESTS.get(role)
823
+ if interests is None:
824
+ return set(node_ids)
825
+
826
+ result: set[str] = set()
827
+ for node_id in node_ids:
828
+ node = graph["nodes"].get(node_id)
829
+ if node and node.get("type") in interests:
830
+ result.add(node_id)
831
+ return result
832
+
833
+
834
+ def serialize_slice(
835
+ graph: dict,
836
+ node_ids: Iterable[str],
837
+ *,
838
+ token_budget: int = DEFAULT_SLICE_TOKEN_BUDGET,
839
+ ) -> str:
840
+ """Render selected nodes as compact text for prompt injection (§4 step 4).
841
+
842
+ Format is deliberately terse — one line per node, grouped by type.
843
+ Edges between selected nodes are rendered as trailing `→ <target>`
844
+ suffixes when they fit in the token budget.
845
+
846
+ Token budget uses the CHARS_PER_TOKEN constant (coarse estimate).
847
+ When the budget is exceeded, nodes are truncated tail-first with a
848
+ `... (N more)` sentinel.
849
+
850
+ Returns empty string if no nodes selected.
851
+ """
852
+ record_graph_usage(graph, "serialize_slice", kind="queries")
853
+ selected = [graph["nodes"][nid] for nid in node_ids if nid in graph["nodes"]]
854
+ if not selected:
855
+ return ""
856
+
857
+ char_budget = token_budget * CHARS_PER_TOKEN
858
+
859
+ by_type: dict[str, list[dict]] = {}
860
+ for node in selected:
861
+ by_type.setdefault(node["type"], []).append(node)
862
+
863
+ selected_ids = {n["id"] for n in selected}
864
+ edges_by_source: dict[str, list[str]] = {}
865
+ for edge in graph["edges"]:
866
+ src, tgt = edge["from"], edge["to"]
867
+ if src in selected_ids and tgt in selected_ids:
868
+ edges_by_source.setdefault(src, []).append(f"{edge['type']}->{tgt}")
869
+
870
+ lines: list[str] = []
871
+ for node_type in sorted(by_type.keys()):
872
+ lines.append(f"[{node_type}]")
873
+ for node in sorted(by_type[node_type], key=lambda n: n["id"]):
874
+ parts = [f" {node['id']}: {node.get('name', '')}"]
875
+ status = node.get("status", "")
876
+ if status and status != "active":
877
+ parts.append(f"({status})")
878
+ desc = node.get("description", "")
879
+ if desc:
880
+ parts.append(f"— {desc[:100]}")
881
+ out_edges = edges_by_source.get(node["id"], [])
882
+ if out_edges:
883
+ parts.append(f"[{', '.join(out_edges[:3])}]")
884
+ lines.append(" ".join(parts))
885
+
886
+ text = "\n".join(lines)
887
+
888
+ if len(text) > char_budget:
889
+ truncated_lines: list[str] = []
890
+ running = 0
891
+ for line in lines:
892
+ if running + len(line) + 1 > char_budget:
893
+ remaining = len(lines) - len(truncated_lines)
894
+ truncated_lines.append(f"... ({remaining} more)")
895
+ break
896
+ truncated_lines.append(line)
897
+ running += len(line) + 1
898
+ text = "\n".join(truncated_lines)
899
+
900
+ return text
901
+
902
+
903
+ def build_context_slice(
904
+ graph: dict,
905
+ task_text: str,
906
+ role: str,
907
+ *,
908
+ hops: int = 2,
909
+ token_budget: int = DEFAULT_SLICE_TOKEN_BUDGET,
910
+ ) -> str:
911
+ """Full §4 pipeline: anchor → expand → filter → serialize.
912
+
913
+ This is the entry point M14's `working_group._build_context_slice`
914
+ will call. When the graph is empty or no anchors match, returns
915
+ empty string — caller should fall back to flat-YAML context.
916
+ """
917
+ record_graph_usage(graph, "build_context_slice", kind="queries")
918
+ anchors = extract_anchors(graph, task_text)
919
+ if not anchors:
920
+ return ""
921
+ expanded = expand_bfs(graph, anchors, hops=hops)
922
+ filtered = filter_by_role(graph, expanded, role)
923
+ return serialize_slice(graph, filtered, token_budget=token_budget)
924
+
925
+
926
+ # ---------------------------------------------------------------------------
927
+ # Query patterns (PDF §8)
928
+ # ---------------------------------------------------------------------------
929
+
930
+ def decisions_for(graph: dict, node_id: str) -> list[dict]:
931
+ """Return all Decision nodes that `affect` or `satisfy` the given node.
932
+
933
+ Useful for "why did we choose X for this component?" queries.
934
+ Supersede chains followed — superseded decisions are marked in
935
+ their `status` field but still returned for history.
936
+ """
937
+ record_graph_usage(graph, "decisions_for", kind="queries")
938
+ results: list[dict] = []
939
+ for edge in graph["edges"]:
940
+ if edge["to"] != node_id:
941
+ continue
942
+ if edge["type"] not in ("affects", "satisfies", "chose"):
943
+ continue
944
+ dec = graph["nodes"].get(edge["from"])
945
+ if dec and dec.get("type") == "Decision":
946
+ results.append(dec)
947
+ return sorted(results, key=lambda n: n.get("created_at", ""))
948
+
949
+
950
+ def ancestors_of(graph: dict, node_id: str, max_depth: int = 10) -> list[dict]:
951
+ """Trace decision ancestry backward: which past decisions influenced this one.
952
+
953
+ Follows `decision_ancestry` edges (from=child -> to=parent) and
954
+ `supersedes` edges forward (from=newer -> to=older).
955
+ Returns a list of (decision, depth, edge_type) tuples as dicts.
956
+ """
957
+ record_graph_usage(graph, "ancestors_of", kind="queries")
958
+ visited: set[str] = {node_id}
959
+ results: list[dict] = []
960
+ queue: list[tuple[str, int, str]] = [(node_id, 0, "self")]
961
+
962
+ while queue:
963
+ current_id, depth, edge_type = queue.pop(0)
964
+ if depth >= max_depth:
965
+ continue
966
+ for edge in graph["edges"]:
967
+ # decision_ancestry: from=child -> to=parent
968
+ if edge["from"] == current_id and edge["type"] == "decision_ancestry":
969
+ parent_id = edge["to"]
970
+ if parent_id not in visited:
971
+ visited.add(parent_id)
972
+ parent = graph["nodes"].get(parent_id)
973
+ if parent and parent.get("type") == "Decision":
974
+ results.append({
975
+ "node": parent,
976
+ "depth": depth + 1,
977
+ "edge_type": "decision_ancestry",
978
+ "edge_reason": edge.get("reason", ""),
979
+ })
980
+ queue.append((parent_id, depth + 1, "decision_ancestry"))
981
+ # supersedes: from=newer -> to=older
982
+ if edge["from"] == current_id and edge["type"] == "supersedes":
983
+ old_id = edge["to"]
984
+ if old_id not in visited:
985
+ visited.add(old_id)
986
+ old = graph["nodes"].get(old_id)
987
+ if old and old.get("type") == "Decision":
988
+ results.append({
989
+ "node": old,
990
+ "depth": depth + 1,
991
+ "edge_type": "supersedes",
992
+ "edge_reason": edge.get("reason", ""),
993
+ })
994
+ queue.append((old_id, depth + 1, "supersedes"))
995
+
996
+ return results
997
+
998
+
999
+ def descendants_of(graph: dict, node_id: str, max_depth: int = 10) -> list[dict]:
1000
+ """Trace decision ancestry forward: which later decisions were influenced by this one.
1001
+
1002
+ Follows `decision_ancestry` edges in reverse (to=parent -> from=child)
1003
+ and `supersedes` edges forward (to=older -> from=newer).
1004
+ """
1005
+ record_graph_usage(graph, "descendants_of", kind="queries")
1006
+ visited: set[str] = {node_id}
1007
+ results: list[dict] = []
1008
+ queue: list[tuple[str, int, str]] = [(node_id, 0, "self")]
1009
+
1010
+ while queue:
1011
+ current_id, depth, edge_type = queue.pop(0)
1012
+ if depth >= max_depth:
1013
+ continue
1014
+ for edge in graph["edges"]:
1015
+ # decision_ancestry: from=child -> to=parent, so to=current means current is parent
1016
+ if edge["to"] == current_id and edge["type"] == "decision_ancestry":
1017
+ child_id = edge["from"]
1018
+ if child_id not in visited:
1019
+ visited.add(child_id)
1020
+ child = graph["nodes"].get(child_id)
1021
+ if child and child.get("type") == "Decision":
1022
+ results.append({
1023
+ "node": child,
1024
+ "depth": depth + 1,
1025
+ "edge_type": "decision_ancestry",
1026
+ "edge_reason": edge.get("reason", ""),
1027
+ })
1028
+ queue.append((child_id, depth + 1, "decision_ancestry"))
1029
+ # supersedes: from=newer -> to=older, so to=current means current is older
1030
+ if edge["to"] == current_id and edge["type"] == "supersedes":
1031
+ new_id = edge["from"]
1032
+ if new_id not in visited:
1033
+ visited.add(new_id)
1034
+ new = graph["nodes"].get(new_id)
1035
+ if new and new.get("type") == "Decision":
1036
+ results.append({
1037
+ "node": new,
1038
+ "depth": depth + 1,
1039
+ "edge_type": "supersedes",
1040
+ "edge_reason": edge.get("reason", ""),
1041
+ })
1042
+ queue.append((new_id, depth + 1, "supersedes"))
1043
+
1044
+ return results
1045
+
1046
+
1047
+ def tech_context(graph: dict) -> list[dict]:
1048
+ """Return all Technology nodes in the graph, sorted by id."""
1049
+ record_graph_usage(graph, "tech_context", kind="queries")
1050
+ return nodes_by_type(graph, "Technology")
1051
+
1052
+
1053
+ def open_risks(graph: dict) -> list[dict]:
1054
+ """Return Risk nodes that are not mitigated.
1055
+
1056
+ A risk is "mitigated" if any Decision has a `mitigates` edge
1057
+ pointing to it. Otherwise it's "open".
1058
+ """
1059
+ record_graph_usage(graph, "open_risks", kind="queries")
1060
+ mitigated: set[str] = set()
1061
+ for edge in graph["edges"]:
1062
+ if edge["type"] == "mitigates":
1063
+ mitigated.add(edge["to"])
1064
+
1065
+ return [
1066
+ n for n in nodes_by_type(graph, "Risk")
1067
+ if n["id"] not in mitigated and n.get("status", "active") != "resolved"
1068
+ ]
1069
+
1070
+
1071
+ def impact(graph: dict, node_id: str) -> dict[str, list[str]]:
1072
+ """Return "what depends on this node" impact analysis.
1073
+
1074
+ Groups incoming edges by edge type for easy scanning:
1075
+ {
1076
+ "uses": ["comp_api", "comp_worker"],
1077
+ "depends_on": ["comp_admin"],
1078
+ "blocks": ["risk_042"],
1079
+ }
1080
+ """
1081
+ record_graph_usage(graph, "impact", kind="queries")
1082
+ result: dict[str, list[str]] = {}
1083
+ for edge in graph["edges"]:
1084
+ if edge["to"] != node_id:
1085
+ continue
1086
+ result.setdefault(edge["type"], []).append(edge["from"])
1087
+ return result
1088
+
1089
+
1090
+ def stale_tech(graph: dict, max_age_days: int = 7) -> list[dict]:
1091
+ """Return Technology nodes whose `scout_checked_at` is older than threshold.
1092
+
1093
+ Used by scout integration (PDF §6.1 STEP 0) — fresh data prevents
1094
+ redundant web searches. A tech node without `scout_checked_at` is
1095
+ considered stale (never checked).
1096
+ """
1097
+ record_graph_usage(graph, "stale_tech", kind="queries")
1098
+ cutoff = _dt.datetime.now(_dt.timezone.utc) - _dt.timedelta(days=max_age_days)
1099
+ result: list[dict] = []
1100
+ for node in nodes_by_type(graph, "Technology"):
1101
+ checked_at_raw = node.get("scout_checked_at")
1102
+ if not checked_at_raw:
1103
+ result.append(node)
1104
+ continue
1105
+ try:
1106
+ checked_at = _dt.datetime.strptime(
1107
+ checked_at_raw.replace("Z", "+0000"),
1108
+ "%Y-%m-%dT%H:%M:%S%z",
1109
+ )
1110
+ except (ValueError, AttributeError):
1111
+ result.append(node)
1112
+ continue
1113
+ if checked_at < cutoff:
1114
+ result.append(node)
1115
+ return result
1116
+
1117
+
1118
+ def unsatisfied_reqs(graph: dict) -> list[dict]:
1119
+ """Return Requirement nodes that have no satisfying Decision.
1120
+
1121
+ A requirement is "satisfied" when any Decision has a `satisfies`
1122
+ edge pointing to it. Violated requirements (Component->Req via
1123
+ `violates`) are also flagged as unsatisfied regardless of Decision
1124
+ state.
1125
+ """
1126
+ record_graph_usage(graph, "unsatisfied_reqs", kind="queries")
1127
+ satisfied: set[str] = set()
1128
+ violated: set[str] = set()
1129
+ for edge in graph["edges"]:
1130
+ if edge["type"] == "satisfies":
1131
+ satisfied.add(edge["to"])
1132
+ elif edge["type"] == "violates":
1133
+ violated.add(edge["to"])
1134
+
1135
+ return [
1136
+ n for n in nodes_by_type(graph, "Requirement")
1137
+ if n["id"] not in satisfied or n["id"] in violated
1138
+ ]
1139
+
1140
+
1141
+ # ---------------------------------------------------------------------------
1142
+ # M13 P2 EP-02: Outcome Tracking & Error Memory
1143
+ # ---------------------------------------------------------------------------
1144
+
1145
+ def record_outcome(
1146
+ graph: dict,
1147
+ decision_id: str,
1148
+ status: str,
1149
+ actual_result: str,
1150
+ lessons_learned: str = "",
1151
+ tags: Optional[list[str]] = None,
1152
+ recorded_by: str = "operator",
1153
+ ) -> dict:
1154
+ """Record an Outcome node for an existing Decision.
1155
+
1156
+ Creates a new Outcome node with id `outcome_{decision_id}` and adds
1157
+ an `evaluates` edge from Outcome → Decision. If an Outcome already
1158
+ exists for this decision, it is updated in place per §5.3 conflict
1159
+ resolution (duplicate id → update).
1160
+
1161
+ Args:
1162
+ graph: the graph dict (will be mutated)
1163
+ decision_id: id of the Decision being evaluated (must exist)
1164
+ status: one of OUTCOME_STATUSES
1165
+ actual_result: free-form description of what happened
1166
+ lessons_learned: optional guidance for future similar decisions
1167
+ tags: optional semantic tags for find_similar_outcomes matching
1168
+ recorded_by: who recorded this (default "operator")
1169
+
1170
+ Returns:
1171
+ The Outcome node dict.
1172
+
1173
+ Raises:
1174
+ ValueError: if decision_id not in graph, status invalid, or
1175
+ target node is not a Decision.
1176
+ """
1177
+ record_graph_usage(graph, "record_outcome", kind="updates")
1178
+ if status not in OUTCOME_STATUSES:
1179
+ raise ValueError(
1180
+ f"invalid outcome status {status!r}; "
1181
+ f"must be one of {sorted(OUTCOME_STATUSES)}"
1182
+ )
1183
+ target = graph["nodes"].get(decision_id)
1184
+ if target is None:
1185
+ raise ValueError(f"decision {decision_id!r} not in graph")
1186
+ if target.get("type") != "Decision":
1187
+ raise ValueError(
1188
+ f"node {decision_id!r} is type {target.get('type')!r}, "
1189
+ f"expected Decision"
1190
+ )
1191
+
1192
+ outcome_id = f"outcome_{decision_id}"
1193
+ name = f"Outcome of {target.get('name', decision_id)[:60]}"
1194
+ description = actual_result.strip()
1195
+
1196
+ # Idempotent add/update — re-running replaces the outcome content
1197
+ # but preserves the stable edge.
1198
+ already_existed = outcome_id in graph["nodes"]
1199
+
1200
+ add_node(
1201
+ graph,
1202
+ outcome_id,
1203
+ "Outcome",
1204
+ name=name,
1205
+ description=description,
1206
+ # M14: outcomes recorded by operator are ground truth ("KNOWS").
1207
+ # When future auto-outcome extraction from git history lands,
1208
+ # pass source_type="git_diff" or "operator" explicitly.
1209
+ source_type="operator",
1210
+ extra={
1211
+ "decision_id": decision_id,
1212
+ "outcome_status": status,
1213
+ "actual_result": actual_result.strip(),
1214
+ "lessons_learned": lessons_learned.strip(),
1215
+ "tags": list(tags or []),
1216
+ "recorded_by": recorded_by,
1217
+ },
1218
+ )
1219
+
1220
+ if not already_existed:
1221
+ add_edge(graph, outcome_id, decision_id, "evaluates")
1222
+
1223
+ return graph["nodes"][outcome_id]
1224
+
1225
+
1226
+ def outcome_for(graph: dict, decision_id: str) -> Optional[dict]:
1227
+ """Return the Outcome node for a given decision, or None."""
1228
+ return graph["nodes"].get(f"outcome_{decision_id}")
1229
+
1230
+
1231
+ def find_similar_outcomes(
1232
+ graph: dict,
1233
+ task_text: str,
1234
+ limit: int = 3,
1235
+ ) -> list[dict]:
1236
+ """Find Outcome nodes relevant to the current task (PDF EP-02 STEP 2.5).
1237
+
1238
+ Ranking strategy:
1239
+ 1. Extract keywords from task_text (lowercased, deduped, stopwords dropped)
1240
+ 2. For each Outcome node:
1241
+ - Count tag overlap with task keywords
1242
+ - Check if decision name contains any task keyword
1243
+ - Combine score = tag_overlap * 2 + decision_match
1244
+ 3. Rank by (score DESC, recency DESC) — recent outcomes win ties
1245
+ 4. Return top `limit` nodes
1246
+
1247
+ Only returns outcomes with status != "confirmed" by default —
1248
+ confirmed outcomes are "things that went well", less actionable
1249
+ than reverted/revised/partially_applied ones.
1250
+
1251
+ Args:
1252
+ graph: the graph dict
1253
+ task_text: the current deliberation goal/task string
1254
+ limit: maximum number of outcomes to return (default 3)
1255
+
1256
+ Returns:
1257
+ List of Outcome node dicts, ranked by relevance. Empty if no
1258
+ keywords match or no outcomes exist.
1259
+ """
1260
+ if not task_text.strip():
1261
+ return []
1262
+
1263
+ keywords = _extract_keywords(task_text)
1264
+ if not keywords:
1265
+ return []
1266
+
1267
+ scored: list[tuple[int, str, dict]] = []
1268
+ for node in nodes_by_type(graph, "Outcome"):
1269
+ # Skip confirmed outcomes — they're less useful as cautionary tales
1270
+ if node.get("outcome_status") == "confirmed":
1271
+ continue
1272
+
1273
+ # Score 1: tag overlap
1274
+ node_tags = {str(t).lower() for t in node.get("tags", [])}
1275
+ tag_overlap = len(node_tags & keywords)
1276
+
1277
+ # Score 2: decision name contains any task keyword
1278
+ decision_id = node.get("decision_id", "")
1279
+ decision = graph["nodes"].get(decision_id, {})
1280
+ decision_name = decision.get("name", "").lower()
1281
+ decision_match = sum(1 for kw in keywords if kw in decision_name)
1282
+
1283
+ score = tag_overlap * 2 + decision_match
1284
+ if score > 0:
1285
+ scored.append((score, node.get("updated_at", ""), node))
1286
+
1287
+ # Sort by (score DESC, recency DESC via created_at string compare)
1288
+ scored.sort(key=lambda x: (-x[0], x[1]), reverse=False)
1289
+ # Reverse ordering quirk: negative score + ascending tuple sort
1290
+ scored.sort(key=lambda x: (-x[0], -ord(x[1][0]) if x[1] else 0))
1291
+
1292
+ return [outcome for _score, _ts, outcome in scored[:limit]]
1293
+
1294
+
1295
+ def decisions_without_outcome(
1296
+ graph: dict,
1297
+ stage: str = "",
1298
+ now: Optional[_dt.datetime] = None,
1299
+ ) -> list[dict]:
1300
+ """Return Decision nodes older than the stage threshold that lack an Outcome.
1301
+
1302
+ Used by EP-02 operator review workflow: "Hey, this decision from
1303
+ 14 days ago never got an Outcome. What actually happened?"
1304
+
1305
+ Threshold by stage (PDF EP-02):
1306
+ - idea / mvp: 14 days
1307
+ - growth: 30 days
1308
+ - scale: 60 days
1309
+ - unknown stage: 30 days (DEFAULT_STAGE_THRESHOLD_DAYS)
1310
+
1311
+ Args:
1312
+ graph: the graph dict
1313
+ stage: project stage from project-layer.yaml (optional)
1314
+ now: timestamp to compare against (defaults to current UTC time —
1315
+ parameterized for deterministic testing)
1316
+
1317
+ Returns:
1318
+ List of Decision node dicts, sorted by creation date (oldest first).
1319
+ """
1320
+ threshold_days = STAGE_OUTCOME_THRESHOLDS_DAYS.get(stage, DEFAULT_STAGE_THRESHOLD_DAYS)
1321
+ now = now or _dt.datetime.now(_dt.timezone.utc)
1322
+ cutoff = now - _dt.timedelta(days=threshold_days)
1323
+
1324
+ results: list[tuple[str, dict]] = []
1325
+ for decision in nodes_by_type(graph, "Decision"):
1326
+ created_at_raw = decision.get("created_at", "")
1327
+ try:
1328
+ created_at = _dt.datetime.strptime(
1329
+ created_at_raw.replace("Z", "+0000"),
1330
+ "%Y-%m-%dT%H:%M:%S%z",
1331
+ )
1332
+ except (ValueError, AttributeError):
1333
+ # Unparseable timestamp — skip rather than crash
1334
+ continue
1335
+
1336
+ if created_at > cutoff:
1337
+ continue
1338
+
1339
+ # Skip if Outcome already exists
1340
+ if outcome_for(graph, decision["id"]) is not None:
1341
+ continue
1342
+
1343
+ results.append((created_at_raw, decision))
1344
+
1345
+ # Sort oldest first
1346
+ results.sort(key=lambda x: x[0])
1347
+ return [d for _ts, d in results]
1348
+
1349
+
1350
+ def format_lessons_block(outcomes: list[dict], graph: dict) -> str:
1351
+ """Render a list of Outcome nodes as a `<lessons_learned>` text block.
1352
+
1353
+ Matches the PDF EP-02 STEP 2.5 format for injection into the
1354
+ context slice. Empty list returns empty string.
1355
+
1356
+ Example output:
1357
+ <lessons_learned>
1358
+ Similar past decision "dec_002: Use Kafka for ingestion" was reverted.
1359
+ Reason: Kafka ops overhead too high for 2-person backend team.
1360
+ Lesson: Managed queue SQS/CloudTasks better for teams < 5.
1361
+ </lessons_learned>
1362
+ """
1363
+ if not outcomes:
1364
+ return ""
1365
+
1366
+ lines = ["<lessons_learned>"]
1367
+ for outcome in outcomes:
1368
+ decision_id = outcome.get("decision_id", "")
1369
+ decision = graph["nodes"].get(decision_id, {})
1370
+ decision_name = decision.get("name", decision_id)
1371
+ status = outcome.get("outcome_status", "unknown")
1372
+
1373
+ lines.append(
1374
+ f'Similar past decision "{decision_id}: {decision_name}" was {status}.'
1375
+ )
1376
+ actual = outcome.get("actual_result", "").strip()
1377
+ if actual:
1378
+ lines.append(f"Reason: {actual}")
1379
+ lesson = outcome.get("lessons_learned", "").strip()
1380
+ if lesson:
1381
+ lines.append(f"Lesson: {lesson}")
1382
+ lines.append("") # blank line between entries
1383
+
1384
+ # Strip trailing blank before closing tag
1385
+ while lines and not lines[-1]:
1386
+ lines.pop()
1387
+ lines.append("</lessons_learned>")
1388
+ return "\n".join(lines)
1389
+
1390
+
1391
+ # ---------------------------------------------------------------------------
1392
+ # CLI shim (optional — main entry is generate_project_graph.py)
1393
+ # ---------------------------------------------------------------------------
1394
+
1395
+ def _summary(graph: dict) -> str:
1396
+ """Produce a one-screen summary of graph state for CLI inspection."""
1397
+ meta = graph["meta"]
1398
+ by_type: dict[str, int] = {}
1399
+ for node in graph["nodes"].values():
1400
+ by_type[node["type"]] = by_type.get(node["type"], 0) + 1
1401
+ by_edge_type: dict[str, int] = {}
1402
+ for edge in graph["edges"]:
1403
+ by_edge_type[edge["type"]] = by_edge_type.get(edge["type"], 0) + 1
1404
+
1405
+ lines = [
1406
+ f"Project Context Graph (schema v{meta.get('schema_version')})",
1407
+ f" created_at: {meta.get('created_at')}",
1408
+ f" updated_at: {meta.get('updated_at')}",
1409
+ f" nodes: {meta.get('node_count', 0)}",
1410
+ f" edges: {meta.get('edge_count', 0)}",
1411
+ "",
1412
+ "Node types:",
1413
+ ]
1414
+ for node_type in sorted(by_type.keys()):
1415
+ lines.append(f" {node_type}: {by_type[node_type]}")
1416
+ lines.append("")
1417
+ lines.append("Edge types:")
1418
+ for edge_type in sorted(by_edge_type.keys()):
1419
+ lines.append(f" {edge_type}: {by_edge_type[edge_type]}")
1420
+ return "\n".join(lines)
1421
+
1422
+
1423
+ def main(argv: Optional[list[str]] = None) -> int:
1424
+ """Minimal CLI: `python3 scripts/graph.py <path>` prints summary."""
1425
+ import argparse
1426
+
1427
+ parser = argparse.ArgumentParser(
1428
+ description="Inspect a 0dai project_graph.json file.",
1429
+ )
1430
+ parser.add_argument(
1431
+ "path",
1432
+ nargs="?",
1433
+ default="ai/manifest/project_graph.json",
1434
+ help="Path to project_graph.json (default: ai/manifest/project_graph.json)",
1435
+ )
1436
+ parser.add_argument(
1437
+ "--validate",
1438
+ action="store_true",
1439
+ help="Run full validation and report any errors",
1440
+ )
1441
+ args = parser.parse_args(argv)
1442
+
1443
+ path = pathlib.Path(args.path)
1444
+ try:
1445
+ graph = load_graph(path)
1446
+ except GraphValidationError as exc:
1447
+ print(f"error: {exc}")
1448
+ return 2
1449
+
1450
+ print(_summary(graph))
1451
+
1452
+ if args.validate:
1453
+ errors = validate_graph(graph)
1454
+ if errors:
1455
+ print("\nvalidation errors:")
1456
+ for err in errors:
1457
+ print(f" - {err}")
1458
+ return 1
1459
+ print("\nvalidation: OK")
1460
+
1461
+ return 0
1462
+
1463
+
1464
+ # ---------------------------------------------------------------------------
1465
+ # M19 P0: Graph dogfood — Artifact + Event helpers
1466
+ # ---------------------------------------------------------------------------
1467
+
1468
+ def record_artifact(
1469
+ graph: dict,
1470
+ version: str,
1471
+ *,
1472
+ changelog: str = "",
1473
+ commit_sha: str = "",
1474
+ released_at: str = "",
1475
+ ) -> dict:
1476
+ """Record a release Artifact node in the graph.
1477
+
1478
+ Creates an Artifact node (artifact_vX_Y_Z) and links it to all
1479
+ existing Decision nodes via 'contains' edges. Idempotent — calling
1480
+ twice with the same version updates the existing node.
1481
+
1482
+ Returns the node dict after insertion.
1483
+ """
1484
+ record_graph_usage(graph, "record_artifact", kind="updates")
1485
+ node_id = f"artifact_v{version.replace('.', '_')}"
1486
+ description = f"Release v{version}"
1487
+ if changelog:
1488
+ description += f"\n\n{changelog[:500]}"
1489
+
1490
+ extra: dict[str, Any] = {
1491
+ "version": version,
1492
+ "commit_sha": commit_sha,
1493
+ }
1494
+ if released_at:
1495
+ extra["released_at"] = released_at
1496
+
1497
+ node = add_node(
1498
+ graph, node_id, "Artifact", f"v{version}",
1499
+ status="active",
1500
+ description=description,
1501
+ source_type="KNOWS",
1502
+ extra=extra,
1503
+ )
1504
+
1505
+ # Link to all existing Decision nodes
1506
+ for dec_node in nodes_by_type(graph, "Decision"):
1507
+ add_edge(graph, node_id, dec_node["id"], "contains", weight=0.3)
1508
+
1509
+ return node
1510
+
1511
+
1512
+ def record_event(
1513
+ graph: dict,
1514
+ event_type: str,
1515
+ name: str,
1516
+ *,
1517
+ description: str = "",
1518
+ extra: Optional[dict[str, Any]] = None,
1519
+ ) -> dict:
1520
+ """Record a meta Event node in the graph.
1521
+
1522
+ Events capture session starts, meta-sessions, migrations, or any
1523
+ significant project lifecycle moment that isn't a Decision or
1524
+ Deliberation. Useful for timeline queries later.
1525
+
1526
+ Returns the node dict after insertion.
1527
+ """
1528
+ record_graph_usage(graph, "record_event", kind="updates")
1529
+ slug = _slug(name)[:40]
1530
+ ts = _now_iso().replace(":", "-").replace("T", "_")[:19]
1531
+ node_id = f"event_{event_type}_{slug}_{ts}"
1532
+
1533
+ node = add_node(
1534
+ graph, node_id, "Event", name,
1535
+ status="active",
1536
+ description=description,
1537
+ source_type="KNOWS",
1538
+ extra={"event_type": event_type, **(extra or {})},
1539
+ )
1540
+ return node
1541
+
1542
+
1543
+ def record_deliberation_outcome(
1544
+ graph: dict,
1545
+ deliberation_id: str,
1546
+ verdict: str,
1547
+ goal: str,
1548
+ *,
1549
+ synthesis: str = "",
1550
+ resources: Optional[dict] = None,
1551
+ ) -> dict:
1552
+ """Record a Deliberation node + Outcome from a working-group deliberation.
1553
+
1554
+ Creates a Deliberation node (delib_{id}) and an Outcome node that
1555
+ evaluates the deliberation's verdict. Links them via 'evaluates'.
1556
+
1557
+ Returns the Outcome node dict.
1558
+ """
1559
+ record_graph_usage(graph, "record_deliberation_outcome", kind="updates")
1560
+ # Deliberation node
1561
+ delib_id = f"delib_{deliberation_id}"
1562
+ add_node(
1563
+ graph, delib_id, "Deliberation", goal[:100],
1564
+ status="active" if verdict not in ("REJECTED",) else "closed",
1565
+ description=synthesis[:300] if synthesis else goal,
1566
+ source_type="KNOWS",
1567
+ extra={
1568
+ "deliberation_id": deliberation_id,
1569
+ "verdict": verdict,
1570
+ "resources": resources or {},
1571
+ },
1572
+ )
1573
+
1574
+ # Outcome node — maps verdict to outcome status
1575
+ outcome_status_map = {
1576
+ "APPROVED": "confirmed",
1577
+ "CONDITIONAL": "partially_applied",
1578
+ "NEEDS_WORK": "revised",
1579
+ "REJECTED": "reverted",
1580
+ }
1581
+ outcome_status = outcome_status_map.get(verdict, "revised")
1582
+
1583
+ outcome_id = f"outcome_{deliberation_id}"
1584
+ outcome = add_node(
1585
+ graph, outcome_id, "Outcome", f"Outcome: {goal[:80]}",
1586
+ status=outcome_status,
1587
+ description=f"Deliberation verdict: {verdict}\n\n{synthesis[:200]}",
1588
+ source_type="KNOWS",
1589
+ extra={
1590
+ "deliberation_id": deliberation_id,
1591
+ "verdict": verdict,
1592
+ "auto_generated": True,
1593
+ },
1594
+ )
1595
+
1596
+ # Link: Outcome evaluates Deliberation
1597
+ add_edge(graph, outcome_id, delib_id, "evaluates")
1598
+
1599
+ return outcome
1600
+
1601
+
1602
+ def main(argv: Optional[list[str]] = None) -> int:
1603
+ raise SystemExit(main())
1604
+
1605
+
1606
+ # ---------------------------------------------------------------------------
1607
+ # #479: Architecture Constraints — first-class constraint nodes
1608
+ # ---------------------------------------------------------------------------
1609
+
1610
+ # Valid enforcement levels for Constraint nodes.
1611
+ CONSTRAINT_ENFORCEMENTS = frozenset({
1612
+ "hard", # must never be violated — build/lint blocks
1613
+ "soft", # should be followed — warnings only
1614
+ "guideline", # best-effort suggestion
1615
+ })
1616
+
1617
+ # Decision-key → constraint auto-derivation rules.
1618
+ # When a Decision node's name/description contains the key, the listed
1619
+ # constraint templates are auto-generated. Keys are matched
1620
+ # case-insensitively as substrings against the decision name.
1621
+ CONSTRAINT_DERIVATION_RULES: dict[str, list[dict[str, Any]]] = {
1622
+ "docker": [
1623
+ {
1624
+ "constraint_id": "deployment_mode_containers",
1625
+ "name": "Container-based deployment",
1626
+ "diff_rules": [
1627
+ "no-localhost-on-service-bound",
1628
+ ],
1629
+ "implies": [
1630
+ "All services accessed by container name, not localhost",
1631
+ "Environment variables via compose args, not hardcoded",
1632
+ ],
1633
+ "forbids": [
1634
+ "localhost in connection strings",
1635
+ "hardcoded file paths in app code",
1636
+ "host-style port mapping in app logic",
1637
+ ],
1638
+ },
1639
+ ],
1640
+ "serverless": [
1641
+ {
1642
+ "constraint_id": "deployment_mode_serverless",
1643
+ "name": "Serverless deployment",
1644
+ "implies": [
1645
+ "Stateless function handlers only",
1646
+ "Cold-start optimization required",
1647
+ ],
1648
+ "forbids": [
1649
+ "Local filesystem for persistent state",
1650
+ "Long-running connections",
1651
+ "In-process caching across invocations",
1652
+ ],
1653
+ },
1654
+ ],
1655
+ "monorepo": [
1656
+ {
1657
+ "constraint_id": "repo_structure_monorepo",
1658
+ "name": "Monorepo structure",
1659
+ "implies": [
1660
+ "Shared dependency versions across packages",
1661
+ "Cross-package imports via workspace protocol",
1662
+ ],
1663
+ "forbids": [
1664
+ "Duplicated dependencies across packages",
1665
+ "Relative imports crossing package boundaries",
1666
+ ],
1667
+ },
1668
+ ],
1669
+ "kubernetes": [
1670
+ {
1671
+ "constraint_id": "deployment_mode_k8s",
1672
+ "name": "Kubernetes deployment",
1673
+ "implies": [
1674
+ "Health check endpoints required for all services",
1675
+ "Configuration via ConfigMap and Secret resources",
1676
+ ],
1677
+ "forbids": [
1678
+ "Hardcoded service addresses",
1679
+ "Writing to container filesystem",
1680
+ ],
1681
+ },
1682
+ ],
1683
+ "postgresql": [
1684
+ {
1685
+ "constraint_id": "database_relational_postgres",
1686
+ "name": "PostgreSQL as primary database",
1687
+ "diff_rules": [
1688
+ "connection-string-consistency",
1689
+ ],
1690
+ "implies": [
1691
+ "SQL migrations managed by tooling (Alembic, Prisma, etc.)",
1692
+ "Connection pooling required for production",
1693
+ ],
1694
+ "forbids": [
1695
+ "Raw DDL in application code",
1696
+ "Unparameterized SQL queries",
1697
+ ],
1698
+ },
1699
+ ],
1700
+ "redis": [
1701
+ {
1702
+ "constraint_id": "cache_redis",
1703
+ "name": "Redis for caching/queue",
1704
+ "implies": [
1705
+ "Cache invalidation strategy required",
1706
+ "TTL on all cache keys",
1707
+ ],
1708
+ "forbids": [
1709
+ "Using Redis as primary data store",
1710
+ "Unbounded key growth without eviction policy",
1711
+ ],
1712
+ },
1713
+ ],
1714
+ }
1715
+
1716
+
1717
+ def add_constraint(
1718
+ graph: dict,
1719
+ constraint_id: str,
1720
+ name: str,
1721
+ *,
1722
+ enforcement: str = "hard",
1723
+ implies: Optional[list[str]] = None,
1724
+ forbids: Optional[list[str]] = None,
1725
+ declared_by: Optional[str] = None,
1726
+ constrains: Optional[list[str]] = None,
1727
+ description: str = "",
1728
+ source_type: str = DEFAULT_SOURCE,
1729
+ ) -> dict:
1730
+ """Add an Architecture Constraint node to the graph.
1731
+
1732
+ Constraints encode hard rules that follow from architectural decisions.
1733
+ Unlike context (soft suggestion), constraints are injected into agent
1734
+ prompts as checklists that must be satisfied.
1735
+
1736
+ Args:
1737
+ graph: the graph dict (will be mutated)
1738
+ constraint_id: short slug for the constraint (e.g. "deployment_mode_containers")
1739
+ name: human-readable name
1740
+ enforcement: "hard" (blocks), "soft" (warns), or "guideline" (suggests)
1741
+ implies: list of rules this constraint requires
1742
+ forbids: list of anti-patterns this constraint prohibits
1743
+ declared_by: node id of the Decision that produced this constraint
1744
+ constrains: list of Component/Technology node ids this constraint scopes to
1745
+ description: free-form description
1746
+ source_type: provenance marker
1747
+
1748
+ Returns:
1749
+ The Constraint node dict.
1750
+
1751
+ Raises:
1752
+ ValueError: if enforcement is not in CONSTRAINT_ENFORCEMENTS, or
1753
+ declared_by/constrains reference non-existent nodes.
1754
+ """
1755
+ record_graph_usage(graph, "add_constraint", kind="updates")
1756
+ if enforcement not in CONSTRAINT_ENFORCEMENTS:
1757
+ raise ValueError(
1758
+ f"invalid enforcement {enforcement!r}; "
1759
+ f"must be one of {sorted(CONSTRAINT_ENFORCEMENTS)}"
1760
+ )
1761
+
1762
+ node_id = make_node_id("Constraint", constraint_id)
1763
+
1764
+ node = add_node(
1765
+ graph, node_id, "Constraint", name,
1766
+ status="active",
1767
+ description=description,
1768
+ source_type=source_type,
1769
+ extra={
1770
+ "constraint_id": constraint_id,
1771
+ "enforcement": enforcement,
1772
+ "implies": list(implies or []),
1773
+ "forbids": list(forbids or []),
1774
+ },
1775
+ )
1776
+
1777
+ # Edge: Constraint -> Decision (declared_by)
1778
+ if declared_by:
1779
+ if declared_by not in graph["nodes"]:
1780
+ raise ValueError(f"declared_by node {declared_by!r} not in graph")
1781
+ target = graph["nodes"][declared_by]
1782
+ if target.get("type") != "Decision":
1783
+ raise ValueError(
1784
+ f"declared_by node {declared_by!r} is type "
1785
+ f"{target.get('type')!r}, expected Decision"
1786
+ )
1787
+ if not find_edge(graph, node_id, declared_by, "declared_by"):
1788
+ add_edge(graph, node_id, declared_by, "declared_by")
1789
+
1790
+ # Edges: Constraint -> Component|Technology (constrains)
1791
+ for target_id in (constrains or []):
1792
+ if target_id not in graph["nodes"]:
1793
+ raise ValueError(f"constrains target {target_id!r} not in graph")
1794
+ if not find_edge(graph, node_id, target_id, "constrains"):
1795
+ add_edge(graph, node_id, target_id, "constrains")
1796
+
1797
+ return node
1798
+
1799
+
1800
+ def auto_derive_constraints(graph: dict) -> list[dict]:
1801
+ """Auto-derive Constraint nodes from existing Decision nodes.
1802
+
1803
+ Scans Decision node names (case-insensitive) for known keywords
1804
+ (docker, serverless, monorepo, etc.) and creates Constraint nodes
1805
+ using the templates in CONSTRAINT_DERIVATION_RULES.
1806
+
1807
+ Idempotent — re-running does not create duplicate constraints. If a
1808
+ constraint node already exists, it is updated with the current
1809
+ implies/forbids lists.
1810
+
1811
+ Returns:
1812
+ List of newly created or updated Constraint node dicts.
1813
+ """
1814
+ record_graph_usage(graph, "auto_derive_constraints", kind="updates")
1815
+ results: list[dict] = []
1816
+
1817
+ for decision in nodes_by_type(graph, "Decision"):
1818
+ dec_name = decision.get("name", "").lower()
1819
+ dec_desc = decision.get("description", "").lower()
1820
+ dec_text = f"{dec_name} {dec_desc}"
1821
+
1822
+ for keyword, templates in CONSTRAINT_DERIVATION_RULES.items():
1823
+ if keyword.lower() not in dec_text:
1824
+ continue
1825
+
1826
+ for tmpl in templates:
1827
+ constraint = add_constraint(
1828
+ graph,
1829
+ tmpl["constraint_id"],
1830
+ tmpl["name"],
1831
+ enforcement="hard",
1832
+ implies=tmpl.get("implies"),
1833
+ forbids=tmpl.get("forbids"),
1834
+ declared_by=decision["id"],
1835
+ source_type="operator",
1836
+ )
1837
+ if tmpl.get("diff_rules"):
1838
+ constraint["diff_rules"] = list(tmpl["diff_rules"])
1839
+ results.append(constraint)
1840
+
1841
+ return results
1842
+
1843
+
1844
+ def load_constraints_yaml(
1845
+ graph: dict,
1846
+ yaml_path: pathlib.Path,
1847
+ ) -> list[dict]:
1848
+ """Load manual constraints from an ai/constraints.yaml file.
1849
+
1850
+ The YAML file should contain a top-level `constraints` key with a
1851
+ list of constraint entries. Each entry has:
1852
+ - constraint_id (required)
1853
+ - name (required)
1854
+ - enforcement (optional, default "hard")
1855
+ - implies (optional list of strings)
1856
+ - forbids (optional list of strings)
1857
+ - declared_by (optional Decision node id)
1858
+ - constrains (optional list of Component/Technology node ids)
1859
+ - description (optional)
1860
+
1861
+ No PyYAML dependency — uses a minimal line-based parser that handles
1862
+ the common subset of YAML we need (string scalars, lists of strings).
1863
+
1864
+ Returns:
1865
+ List of created/updated Constraint node dicts.
1866
+
1867
+ Raises:
1868
+ FileNotFoundError: if yaml_path does not exist
1869
+ """
1870
+ record_graph_usage(graph, "load_constraints_yaml", kind="updates")
1871
+ if not yaml_path.exists():
1872
+ raise FileNotFoundError(f"constraints file not found: {yaml_path}")
1873
+
1874
+ raw = yaml_path.read_text(encoding="utf-8")
1875
+ entries = _parse_constraints_yaml(raw)
1876
+
1877
+ results: list[dict] = []
1878
+ for entry in entries:
1879
+ constraint = add_constraint(
1880
+ graph,
1881
+ entry["constraint_id"],
1882
+ entry["name"],
1883
+ enforcement=entry.get("enforcement", "hard"),
1884
+ implies=entry.get("implies"),
1885
+ forbids=entry.get("forbids"),
1886
+ declared_by=entry.get("declared_by"),
1887
+ constrains=entry.get("constrains"),
1888
+ description=entry.get("description", ""),
1889
+ source_type="operator",
1890
+ )
1891
+ results.append(constraint)
1892
+
1893
+ return results
1894
+
1895
+
1896
+ def get_architecture_constraints(graph: dict) -> list[dict]:
1897
+ """Return all Constraint nodes in the graph, sorted by enforcement level.
1898
+
1899
+ Hard constraints first, then soft, then guidelines. Within each
1900
+ enforcement level, sorted by constraint_id for stability.
1901
+ """
1902
+ record_graph_usage(graph, "get_architecture_constraints", kind="queries")
1903
+ constraints = nodes_by_type(graph, "Constraint")
1904
+
1905
+ enforcement_order = {"hard": 0, "soft": 1, "guideline": 2}
1906
+ return sorted(
1907
+ constraints,
1908
+ key=lambda c: (
1909
+ enforcement_order.get(c.get("enforcement", "hard"), 99),
1910
+ c.get("constraint_id", ""),
1911
+ ),
1912
+ )
1913
+
1914
+
1915
+ def _constraint_scope_patterns(graph: dict, constraint_id: str) -> list[str]:
1916
+ patterns: list[str] = []
1917
+ for edge in outgoing_edges(graph, constraint_id):
1918
+ if edge.get("type") != "constrains":
1919
+ continue
1920
+ raw = edge.get("path_patterns")
1921
+ if isinstance(raw, str):
1922
+ patterns.append(raw)
1923
+ elif isinstance(raw, list):
1924
+ patterns.extend(str(item) for item in raw if item)
1925
+ return patterns
1926
+
1927
+
1928
+ def get_active_constraints(
1929
+ target: pathlib.Path,
1930
+ path_patterns: Optional[list[str]] = None,
1931
+ ) -> list[dict]:
1932
+ """Return active constraints for a repo target filtered by path patterns.
1933
+
1934
+ Constraints without any `constrains` edges are treated as globally active.
1935
+ Constraints with `constrains` edges become active when one of the edge
1936
+ `path_patterns` values overlaps the requested path patterns.
1937
+ """
1938
+ graph_path = pathlib.Path(target) / "ai" / "manifest" / "project_graph.json"
1939
+ graph = load_graph(graph_path)
1940
+ auto_derive_constraints(graph)
1941
+ record_graph_usage(graph, "get_active_constraints", kind="queries")
1942
+
1943
+ requested = list(path_patterns or ["*"])
1944
+ constraints = get_architecture_constraints(graph)
1945
+ if not constraints:
1946
+ return []
1947
+
1948
+ active: list[dict] = []
1949
+ for constraint in constraints:
1950
+ scope_patterns = _constraint_scope_patterns(graph, str(constraint.get("id") or ""))
1951
+ if _path_patterns_overlap(requested, scope_patterns):
1952
+ active.append(constraint)
1953
+ return active
1954
+
1955
+
1956
+ def format_constraints_checklist(
1957
+ graph: dict,
1958
+ *,
1959
+ constraints: Optional[list[dict]] = None,
1960
+ ) -> str:
1961
+ """Render all architecture constraints as a checklist for prompt injection.
1962
+
1963
+ Constraints are rendered as a checklist (not context) so agents see
1964
+ them as hard rules that must be satisfied, not soft suggestions.
1965
+
1966
+ Format:
1967
+ ARCHITECTURE CONSTRAINTS (must be satisfied):
1968
+ ☐ No localhost in connection strings (deployment_mode=containers) [hard]
1969
+ ☐ All env vars follow FOO_BAR pattern (env_naming_convention) [hard]
1970
+ ☐ Prefer managed queues for small teams (queue_guideline) [guideline]
1971
+ [before returning code]: self-check against this list
1972
+
1973
+ Returns empty string if no constraints exist.
1974
+ """
1975
+ record_graph_usage(graph, "format_constraints_checklist", kind="queries")
1976
+ constraints = list(constraints) if constraints is not None else get_architecture_constraints(graph)
1977
+ constraints = [c for c in constraints if c.get("enforcement", "hard") in {"hard", "soft"}]
1978
+ if not constraints:
1979
+ return ""
1980
+
1981
+ lines: list[str] = ["ARCHITECTURE CONSTRAINTS (must be satisfied):"]
1982
+
1983
+ for c in constraints:
1984
+ enforcement = c.get("enforcement", "hard")
1985
+ cid = c.get("constraint_id", c["id"])
1986
+ bracket = f"[{enforcement}]" if enforcement != "hard" else ""
1987
+
1988
+ for rule in c.get("forbids", []):
1989
+ parts = f"No {rule}"
1990
+ lines.append(f"☐ {parts} ({cid}) {bracket}".rstrip())
1991
+
1992
+ for rule in c.get("implies", []):
1993
+ lines.append(f"☐ {rule} ({cid}) {bracket}".rstrip())
1994
+
1995
+ lines.append("[before returning code]: self-check against this list")
1996
+ return "\n".join(lines)
1997
+
1998
+
1999
+ def check_constraint_violations(
2000
+ graph: dict,
2001
+ code_artifacts: dict[str, str],
2002
+ ) -> list[dict[str, Any]]:
2003
+ """Check code artifacts against constraint rules and return violations.
2004
+
2005
+ Performs pattern-based matching of constraint `forbids` rules against
2006
+ the provided code artifacts. Each forbids entry is matched as a
2007
+ case-insensitive substring search across all artifact values.
2008
+
2009
+ Args:
2010
+ graph: the graph dict
2011
+ code_artifacts: dict of {filename: content} to check
2012
+
2013
+ Returns:
2014
+ List of violation dicts, each with:
2015
+ - constraint_id: the constraint that was violated
2016
+ - rule: the specific forbids rule that matched
2017
+ - file: the file containing the violation
2018
+ - enforcement: the constraint's enforcement level
2019
+
2020
+ Example:
2021
+ violations = check_constraint_violations(g, {
2022
+ "db.py": "DB_HOST = 'localhost:5432'",
2023
+ "app.py": "redis://my-redis:6379",
2024
+ })
2025
+ # Returns violation for "localhost in connection strings" in db.py
2026
+ """
2027
+ record_graph_usage(graph, "check_constraint_violations", kind="queries")
2028
+ violations: list[dict[str, Any]] = []
2029
+
2030
+ for constraint in get_architecture_constraints(graph):
2031
+ cid = constraint.get("constraint_id", constraint["id"])
2032
+ enforcement = constraint.get("enforcement", "hard")
2033
+
2034
+ for rule in constraint.get("forbids", []):
2035
+ # Extract key terms from the rule for pattern matching.
2036
+ # e.g. "localhost in connection strings" → "localhost"
2037
+ # e.g. "hardcoded file paths" → "hardcoded"
2038
+ pattern = _extract_violation_pattern(rule)
2039
+ if not pattern:
2040
+ continue
2041
+
2042
+ for filename, content in code_artifacts.items():
2043
+ content_lower = content.lower()
2044
+ if pattern.lower() in content_lower:
2045
+ violations.append({
2046
+ "constraint_id": cid,
2047
+ "rule": rule,
2048
+ "file": filename,
2049
+ "enforcement": enforcement,
2050
+ })
2051
+
2052
+ return violations