@0dai-dev/cli 4.3.5 → 4.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -11
- package/bin/0dai.js +214 -40
- package/lib/ai/manifest/mcp-exposure-contract.json +121 -0
- package/lib/ai/meta/manifest/mcp-tool-tiers.json +435 -0
- package/lib/ai/registry/mcp-catalog.json +98 -0
- package/lib/commands/auth.js +55 -1
- package/lib/commands/compliance.js +1 -1
- package/lib/commands/detect.js +10 -4
- package/lib/commands/doctor.js +545 -26
- package/lib/commands/experience.js +40 -5
- package/lib/commands/export.js +73 -0
- package/lib/commands/feedback.js +157 -15
- package/lib/commands/gh.js +26 -0
- package/lib/commands/graph.js +9 -4
- package/lib/commands/heatmap.js +1 -1
- package/lib/commands/init.js +222 -30
- package/lib/commands/mcp.js +129 -21
- package/lib/commands/models.js +138 -41
- package/lib/commands/provider.js +30 -59
- package/lib/commands/quota.js +1 -1
- package/lib/commands/receipt.js +1 -1
- package/lib/commands/run.js +18 -7
- package/lib/commands/runner.js +31 -1
- package/lib/commands/status.js +44 -11
- package/lib/commands/swarm.js +130 -12
- package/lib/commands/trust.js +286 -0
- package/lib/commands/update.js +184 -38
- package/lib/commands/usage.js +1 -1
- package/lib/commands/validate.js +32 -3
- package/lib/commands/vault.js +46 -9
- package/lib/python/__init__.py +0 -0
- package/lib/python/agent_quotas.py +525 -0
- package/lib/python/anomaly_alert.py +397 -0
- package/lib/python/anti_pattern_detector.py +799 -0
- package/lib/python/auth.py +443 -0
- package/lib/python/capi_profile_guard.py +477 -0
- package/lib/python/compliance_report.py +581 -0
- package/lib/python/drift_detector.py +388 -0
- package/lib/python/experience_pipeline.py +1130 -0
- package/lib/python/graph.py +19 -0
- package/lib/python/graph_core.py +293 -0
- package/lib/python/graph_io.py +179 -0
- package/lib/python/graph_legacy.py +2052 -0
- package/lib/python/graph_legacy_helpers.py +221 -0
- package/lib/python/graph_outcomes_core.py +85 -0
- package/lib/python/graph_queries.py +171 -0
- package/lib/python/graph_slice.py +198 -0
- package/lib/python/graph_slicer.py +576 -0
- package/lib/python/graph_slicer_cli.py +60 -0
- package/lib/python/graph_validation.py +64 -0
- package/lib/python/heatmap.py +934 -0
- package/lib/python/json_utils.py +193 -0
- package/lib/python/mcp_exposure_check.py +247 -0
- package/lib/python/model_router.py +1434 -0
- package/lib/python/project_manager.py +621 -0
- package/lib/python/provider_profiles.py +1618 -0
- package/lib/python/provider_registry.py +1211 -0
- package/lib/python/provider_registry_cli.py +125 -0
- package/lib/python/receipt_png.py +727 -0
- package/lib/python/structural_memory.py +325 -0
- package/lib/python/swarm_cost.py +177 -0
- package/lib/python/usage_ledger.py +569 -0
- package/lib/scripts/mcp_tier_config.py +240 -0
- package/lib/shared.js +97 -14
- package/lib/tui/index.mjs +35174 -0
- package/lib/utils/activation_telemetry.js +230 -11
- package/lib/utils/constants.js +7 -1
- package/lib/utils/export-bundler.js +285 -0
- package/lib/utils/identity.js +198 -1
- package/lib/utils/mcp-auth.js +81 -15
- package/lib/utils/plan.js +1 -1
- package/lib/vault/index.js +19 -3
- package/lib/vault/storage.js +21 -2
- package/lib/wizard.js +5 -2
- package/package.json +9 -3
- package/scripts/build-python-bundle.js +106 -0
- package/scripts/build-tui.js +14 -1
- package/scripts/harvest_experience.py +523 -0
- package/scripts/postinstall.js +15 -9
|
@@ -0,0 +1,2052 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# pragma: loc-waiver — #1069 split: pure helpers extracted to graph_legacy_helpers.py (2245->2052); residual surplus is stateful graph-mutation + constraint/outcome/deliberation + CLI logic, deferred to a follow-up
|
|
3
|
+
"""0dai Project Context Graph — typed knowledge graph for project state.
|
|
4
|
+
|
|
5
|
+
Implements the schema from docs/project-context-graph.md (derived from
|
|
6
|
+
`user_submitted/project context graph schema.pdf`):
|
|
7
|
+
|
|
8
|
+
- 10 node types (§1): Component, Technology, Decision, Requirement, Risk,
|
|
9
|
+
TestPlan, Endpoint, DesignArtifact, MarketEntity, Session, Deliberation
|
|
10
|
+
- 20+ edge types (§2): architecture, decision, quality, design, market,
|
|
11
|
+
ownership, session
|
|
12
|
+
- JSON adjacency list format (§3): `project_graph.json`
|
|
13
|
+
- Traversal algorithm (§4): anchor extraction → BFS 2-hop expand →
|
|
14
|
+
role-filter → serialize ≤400 tokens
|
|
15
|
+
- Query patterns (§8): decisions_for, tech_context, open_risks, impact,
|
|
16
|
+
stale_tech, unsatisfied_reqs
|
|
17
|
+
|
|
18
|
+
Distinct from `scripts/generate_project_layer.py` (which generates flat
|
|
19
|
+
YAML) — this module manages the TYPED graph that deliberations mutate.
|
|
20
|
+
|
|
21
|
+
Bootstrap from project-layer.yaml is in `scripts/generate_project_graph.py`.
|
|
22
|
+
Integration into working_group._build_context_slice is M14 work.
|
|
23
|
+
|
|
24
|
+
Design notes:
|
|
25
|
+
- Flat JSON adjacency list until graph exceeds 500 nodes (per PDF §6.3).
|
|
26
|
+
Migration path to Kuzu/SurrealDB/Neo4j is a future concern.
|
|
27
|
+
- No PyYAML dependency. Stdlib only (constraint_no_go in project-layer).
|
|
28
|
+
- Node IDs follow `<type_prefix>_<slug>` convention for grep-friendly
|
|
29
|
+
category filtering without reading node.type field.
|
|
30
|
+
- Thread-safe for single-writer (working_group orchestrator) only.
|
|
31
|
+
Concurrent mutation from multiple processes is not supported.
|
|
32
|
+
"""
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import datetime as _dt
|
|
36
|
+
import fnmatch # noqa: F401 retained for back-compat re-export (graph facade star-imports graph_legacy)
|
|
37
|
+
import json
|
|
38
|
+
import logging
|
|
39
|
+
import pathlib
|
|
40
|
+
import re
|
|
41
|
+
from typing import Any, Iterable, Optional
|
|
42
|
+
|
|
43
|
+
from graph_legacy_helpers import ( # noqa: F401 re-export for back-compat (#757 #1069)
|
|
44
|
+
_extract_keywords,
|
|
45
|
+
_extract_violation_pattern,
|
|
46
|
+
_line_matches_constraint_rule,
|
|
47
|
+
_parse_constraints_yaml,
|
|
48
|
+
_parse_diff_lines,
|
|
49
|
+
_path_patterns_overlap,
|
|
50
|
+
matches_constraint_diff,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
log = logging.getLogger("0dai.graph")
|
|
54
|
+
|
|
55
|
+
SCHEMA_VERSION = 1
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Node + edge type registries (PDF §1-§2)
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
# Node types — the 10 taxonomic categories plus meta categories.
|
|
62
|
+
# Each node's `type` field MUST be a member of this set, or validation fails.
|
|
63
|
+
#
|
|
64
|
+
# `Outcome` added in M13 P2 for EP-02 (Outcome Tracking & Error Memory).
|
|
65
|
+
# An Outcome node records what actually happened after a Decision was
|
|
66
|
+
# applied in practice — status, lessons learned, tags for semantic search.
|
|
67
|
+
NODE_TYPES = frozenset({
|
|
68
|
+
"Component", # product parts: services, modules, pages
|
|
69
|
+
"Technology", # external tech: frameworks, languages, libraries
|
|
70
|
+
"Decision", # architectural/product decisions from deliberations
|
|
71
|
+
"Requirement", # functional + non-functional requirements
|
|
72
|
+
"Risk", # identified concerns + open risks
|
|
73
|
+
"TestPlan", # test strategy nodes
|
|
74
|
+
"Endpoint", # API endpoints + entry points
|
|
75
|
+
"DesignArtifact", # UX mockups, flows, brand artifacts
|
|
76
|
+
"MarketEntity", # competitors, target segments, market forces
|
|
77
|
+
"Session", # meta: a work session
|
|
78
|
+
"Deliberation", # meta: a working-group deliberation
|
|
79
|
+
"Outcome", # M13 P2 EP-02: retrospective evaluation of a Decision
|
|
80
|
+
"Artifact", # M19 P0: release artifacts (versions, tags)
|
|
81
|
+
"Event", # M19 P0: session/meta events for timeline queries
|
|
82
|
+
"Constraint", # #479: architecture constraints — hard rules from decisions
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
# Prefix convention for node IDs. Helps filter/grep by category without
|
|
86
|
+
# loading the full node object.
|
|
87
|
+
NODE_ID_PREFIXES: dict[str, str] = {
|
|
88
|
+
"Component": "comp",
|
|
89
|
+
"Technology": "tech",
|
|
90
|
+
"Decision": "dec",
|
|
91
|
+
"Requirement": "req",
|
|
92
|
+
"Risk": "risk",
|
|
93
|
+
"TestPlan": "test",
|
|
94
|
+
"Endpoint": "ep",
|
|
95
|
+
"DesignArtifact": "design",
|
|
96
|
+
"MarketEntity": "mkt",
|
|
97
|
+
"Session": "session",
|
|
98
|
+
"Deliberation": "delib",
|
|
99
|
+
"Outcome": "outcome", # M13 P2 EP-02
|
|
100
|
+
"Artifact": "artifact", # M19 P0: release artifacts
|
|
101
|
+
"Event": "event", # M19 P0: session/meta events
|
|
102
|
+
"Constraint": "cstr", # #479: architecture constraints
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Edge types — the 20+ relation categories. Each edge's `type` field MUST
|
|
106
|
+
# be a member of this set. See docs/project-context-graph.md §2 for
|
|
107
|
+
# direction semantics + allowed source/target type pairs.
|
|
108
|
+
EDGE_TYPES = frozenset({
|
|
109
|
+
# Architecture
|
|
110
|
+
"uses", # Component -> Technology
|
|
111
|
+
"depends_on", # Component -> Component
|
|
112
|
+
"exposes", # Component -> Endpoint
|
|
113
|
+
"part_of", # Component -> Component (sub-module relation)
|
|
114
|
+
# Decision
|
|
115
|
+
"affects", # Decision -> Component | Technology | Requirement
|
|
116
|
+
"chose", # Decision -> Technology (picked this over alt)
|
|
117
|
+
"satisfies", # Decision -> Requirement
|
|
118
|
+
"supersedes", # Decision -> Decision (new replaces old)
|
|
119
|
+
"introduces", # Decision -> Risk (new risk from this choice)
|
|
120
|
+
"mitigates", # Decision -> Risk (resolves existing risk)
|
|
121
|
+
"decided_in", # Decision -> Deliberation
|
|
122
|
+
# Quality
|
|
123
|
+
"covers", # TestPlan -> Component | Requirement
|
|
124
|
+
"tests", # TestPlan -> Component
|
|
125
|
+
"blocks", # Risk -> Component | Decision
|
|
126
|
+
"violates", # Component -> Requirement (currently failing)
|
|
127
|
+
# Design
|
|
128
|
+
"designs", # DesignArtifact -> Component
|
|
129
|
+
"follows", # DesignArtifact -> DesignArtifact (style guide)
|
|
130
|
+
# Market
|
|
131
|
+
"targets", # Component | Decision -> MarketEntity
|
|
132
|
+
"competes_with", # MarketEntity -> MarketEntity
|
|
133
|
+
# Ownership
|
|
134
|
+
"owned_by", # Component -> Session (last-touched bookkeeping)
|
|
135
|
+
"identified_by", # Risk -> Session | Deliberation
|
|
136
|
+
"created_by", # Decision -> Deliberation
|
|
137
|
+
"approved_by", # Decision -> Session
|
|
138
|
+
# Session
|
|
139
|
+
"produced", # Session -> Decision | Component
|
|
140
|
+
"updated", # Session -> Component
|
|
141
|
+
# M13 P2 EP-02: Outcome Tracking
|
|
142
|
+
"evaluates", # Outcome -> Decision (retrospective evaluation)
|
|
143
|
+
"decision_outcome", # Decision -> Outcome (task-result feedback loop)
|
|
144
|
+
# EP: Decision Ancestry
|
|
145
|
+
"decision_ancestry", # Decision -> Decision (child influenced by parent)
|
|
146
|
+
# M19 P0: Graph dogfood
|
|
147
|
+
"released_as", # Decision -> Artifact (decision shipped in version)
|
|
148
|
+
"contains", # Artifact -> Decision (version contains decisions)
|
|
149
|
+
"triggered_by", # Event -> Session (event caused by session)
|
|
150
|
+
"observed_in", # Event -> Artifact (event observed in version)
|
|
151
|
+
# #479: Architecture Constraints
|
|
152
|
+
"declared_by", # Constraint -> Decision (constraint derived from this decision)
|
|
153
|
+
"implies", # Constraint -> Constraint (constraint A implies constraint B)
|
|
154
|
+
"constrains", # Constraint -> Component | Technology (scope of constraint)
|
|
155
|
+
"forbids", # Constraint -> Component | Technology (anti-pattern scope)
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
# Valid status values for Outcome nodes per PDF enhancement pack EP-02.
|
|
159
|
+
# `confirmed`: decision played out as predicted
|
|
160
|
+
# `revised`: decision needed adjustment but core idea was sound
|
|
161
|
+
# `reverted`: decision was rolled back, lesson learned
|
|
162
|
+
# `partially_applied`: decision implemented partially, rest is still pending
|
|
163
|
+
OUTCOME_STATUSES = frozenset({
|
|
164
|
+
"confirmed",
|
|
165
|
+
"revised",
|
|
166
|
+
"reverted",
|
|
167
|
+
"partially_applied",
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
# Stage-based review threshold for outcomes (PDF EP-02).
|
|
171
|
+
# Decisions older than this threshold without an Outcome get flagged.
|
|
172
|
+
# Matches project-layer.yaml `stage` values; fallback is 30 days.
|
|
173
|
+
STAGE_OUTCOME_THRESHOLDS_DAYS: dict[str, int] = {
|
|
174
|
+
"idea": 14,
|
|
175
|
+
"mvp": 14,
|
|
176
|
+
"growth": 30,
|
|
177
|
+
"scale": 60,
|
|
178
|
+
}
|
|
179
|
+
DEFAULT_STAGE_THRESHOLD_DAYS = 30
|
|
180
|
+
|
|
181
|
+
# Role type-interest map (PDF §4 step 3). When building a context slice
|
|
182
|
+
# for a given role, only include nodes whose type is in the role's
|
|
183
|
+
# interest set. Unknown roles fall back to ALL_TYPES (no filter).
|
|
184
|
+
ROLE_TYPE_INTERESTS: dict[str, frozenset[str]] = {
|
|
185
|
+
"cto": frozenset({"Decision", "Technology", "Risk", "Component", "Requirement", "Constraint"}),
|
|
186
|
+
"arch": frozenset({"Component", "Technology", "Endpoint", "Requirement", "Risk", "Constraint"}),
|
|
187
|
+
"designer": frozenset({"DesignArtifact", "Component", "Requirement"}),
|
|
188
|
+
"art_director": frozenset({"DesignArtifact", "Component", "MarketEntity"}),
|
|
189
|
+
"qa": frozenset({"TestPlan", "Component", "Risk", "Requirement", "Constraint"}),
|
|
190
|
+
"security": frozenset({"Risk", "Requirement", "Component", "Technology", "Constraint"}),
|
|
191
|
+
"sre": frozenset({"Component", "Technology", "Risk", "Endpoint", "Constraint"}),
|
|
192
|
+
"cmo": frozenset({"MarketEntity", "Requirement", "Component", "Decision"}),
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
# Default edge weight when caller does not supply one. Traversal uses
|
|
196
|
+
# weight to decide whether hop-2 edges are worth following (threshold
|
|
197
|
+
# in expand_bfs).
|
|
198
|
+
DEFAULT_EDGE_WEIGHT = 1.0
|
|
199
|
+
HOP2_WEIGHT_THRESHOLD = 0.5 # hop-2 edges below this are pruned
|
|
200
|
+
|
|
201
|
+
# Serialization token budget for context slice (PDF §4 step 4).
|
|
202
|
+
# 4 chars per token is a coarse but conservative estimate (matches OpenAI
|
|
203
|
+
# tokenizer for English text).
|
|
204
|
+
CHARS_PER_TOKEN = 4
|
|
205
|
+
DEFAULT_SLICE_TOKEN_BUDGET = 400
|
|
206
|
+
|
|
207
|
+
# ---------------------------------------------------------------------------
|
|
208
|
+
# M14: Provenance — "what 0dai KNOWS vs what 0dai THINKS" (Torvalds critique)
|
|
209
|
+
# ---------------------------------------------------------------------------
|
|
210
|
+
#
|
|
211
|
+
# Every node carries a `source_type` field indicating how the data was
|
|
212
|
+
# derived. Deterministic sources (file_parse, git_diff, operator) are
|
|
213
|
+
# "KNOWS" — they can be trusted as facts. LLM-derived sources
|
|
214
|
+
# (scout_ai, deliberation_ai) are "THINKS" — they're opinions that may
|
|
215
|
+
# hallucinate. CLI output must visually distinguish the two so users
|
|
216
|
+
# don't treat LLM opinions as facts.
|
|
217
|
+
#
|
|
218
|
+
# Per Torvalds (2026-04-08 01:06 UTC):
|
|
219
|
+
# "Чётко раздели: вот что 0dai ЗНАЕТ (из графа, из файлов, из API headers).
|
|
220
|
+
# Вот что 0dai ДУМАЕТ (из LLM inference). Никогда не смешивай."
|
|
221
|
+
|
|
222
|
+
DETERMINISTIC_SOURCES = frozenset({
|
|
223
|
+
"bootstrap", # generate_project_graph.py from project-layer.yaml
|
|
224
|
+
"file_parse", # detected from package.json, pyproject.toml, etc.
|
|
225
|
+
"git_diff", # parsed from git diff (regex, not LLM)
|
|
226
|
+
"operator", # manually entered via 0dai graph add / 0dai learn
|
|
227
|
+
"tool_output", # npm_audit, eslint, lighthouse, etc. (EP-03)
|
|
228
|
+
})
|
|
229
|
+
|
|
230
|
+
AI_DERIVED_SOURCES = frozenset({
|
|
231
|
+
"scout_ai", # scout CLI web search findings
|
|
232
|
+
"deliberation_ai", # working-group synthesis extraction
|
|
233
|
+
"hard_block", # parsed from deliberation hard block strings (LLM-derived)
|
|
234
|
+
"red_team_ai", # EP-04 adversarial review
|
|
235
|
+
"forecast_ai", # EP-14 temporal simulation
|
|
236
|
+
"pattern_ai", # EP-09 cross-project pattern matching
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
DEFAULT_SOURCE = "operator"
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def is_deterministic_source(source_type: str) -> bool:
|
|
243
|
+
"""Return True if the source is considered 'KNOWS' (vs 'THINKS')."""
|
|
244
|
+
return source_type in DETERMINISTIC_SOURCES
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def source_marker(source_type: str) -> str:
|
|
248
|
+
"""Return a visual marker for CLI output: ✓ for KNOWS, ~ for THINKS."""
|
|
249
|
+
if is_deterministic_source(source_type):
|
|
250
|
+
return "✓"
|
|
251
|
+
if source_type in AI_DERIVED_SOURCES:
|
|
252
|
+
return "~"
|
|
253
|
+
return "?"
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
# ---------------------------------------------------------------------------
|
|
257
|
+
# Graph construction helpers
|
|
258
|
+
# ---------------------------------------------------------------------------
|
|
259
|
+
|
|
260
|
+
def empty_graph() -> dict:
|
|
261
|
+
"""Return a fresh, empty graph with schema metadata populated.
|
|
262
|
+
|
|
263
|
+
Callers should prefer this over building a dict by hand — it
|
|
264
|
+
guarantees `nodes`, `edges`, and `meta` keys are present with the
|
|
265
|
+
right types, which keeps validation passing.
|
|
266
|
+
"""
|
|
267
|
+
now = _now_iso()
|
|
268
|
+
return {
|
|
269
|
+
"nodes": {},
|
|
270
|
+
"edges": [],
|
|
271
|
+
"meta": {
|
|
272
|
+
"schema_version": SCHEMA_VERSION,
|
|
273
|
+
"created_at": now,
|
|
274
|
+
"updated_at": now,
|
|
275
|
+
"node_count": 0,
|
|
276
|
+
"edge_count": 0,
|
|
277
|
+
},
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _now_iso() -> str:
|
|
282
|
+
"""Return UTC timestamp in ISO 8601 format with trailing Z."""
|
|
283
|
+
return _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _slug(text: str) -> str:
|
|
287
|
+
"""Produce a grep-friendly id fragment from arbitrary text."""
|
|
288
|
+
cleaned = re.sub(r"[^a-zA-Z0-9]+", "_", text.strip().lower())
|
|
289
|
+
return cleaned.strip("_") or "unnamed"
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def make_node_id(node_type: str, name: str) -> str:
|
|
293
|
+
"""Construct a canonical node id from type + free-form name.
|
|
294
|
+
|
|
295
|
+
Example:
|
|
296
|
+
make_node_id("Technology", "Next.js 14") -> "tech_next_js_14"
|
|
297
|
+
"""
|
|
298
|
+
prefix = NODE_ID_PREFIXES.get(node_type)
|
|
299
|
+
if not prefix:
|
|
300
|
+
raise ValueError(f"unknown node type: {node_type}")
|
|
301
|
+
return f"{prefix}_{_slug(name)}"
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def add_node(
|
|
305
|
+
graph: dict,
|
|
306
|
+
node_id: str,
|
|
307
|
+
node_type: str,
|
|
308
|
+
name: str,
|
|
309
|
+
*,
|
|
310
|
+
status: str = "active",
|
|
311
|
+
description: str = "",
|
|
312
|
+
source_type: str = DEFAULT_SOURCE,
|
|
313
|
+
extra: Optional[dict[str, Any]] = None,
|
|
314
|
+
) -> dict:
|
|
315
|
+
"""Add (or update in place) a node in the graph.
|
|
316
|
+
|
|
317
|
+
Returns the node dict after insertion. Idempotent — calling twice
|
|
318
|
+
with the same id updates the existing node's fields instead of
|
|
319
|
+
raising. This matches the mutation-queue conflict resolution policy
|
|
320
|
+
from PDF §5.3 (duplicate id → update).
|
|
321
|
+
|
|
322
|
+
M14 addition: `source_type` marks the node as deterministic (KNOWS)
|
|
323
|
+
or AI-derived (THINKS). See DETERMINISTIC_SOURCES / AI_DERIVED_SOURCES
|
|
324
|
+
module-level constants. CLI output distinguishes the two so users
|
|
325
|
+
don't treat LLM opinions as facts (per Torvalds critique).
|
|
326
|
+
"""
|
|
327
|
+
if node_type not in NODE_TYPES:
|
|
328
|
+
raise ValueError(f"unknown node type: {node_type!r}")
|
|
329
|
+
if not node_id:
|
|
330
|
+
raise ValueError("node id must be non-empty")
|
|
331
|
+
|
|
332
|
+
now = _now_iso()
|
|
333
|
+
existing = graph["nodes"].get(node_id)
|
|
334
|
+
if existing:
|
|
335
|
+
existing["type"] = node_type
|
|
336
|
+
existing["name"] = name
|
|
337
|
+
existing["status"] = status
|
|
338
|
+
existing["description"] = description
|
|
339
|
+
existing["updated_at"] = now
|
|
340
|
+
# Source-type update rules (M14 Torvalds-separation):
|
|
341
|
+
# - new is deterministic: always overwrite (operator/file wins)
|
|
342
|
+
# - new is AI AND existing is deterministic: REJECT (protect facts)
|
|
343
|
+
# - new is AI AND existing is AI: overwrite (fresher AI signal)
|
|
344
|
+
existing_source = existing.get("source_type", DEFAULT_SOURCE)
|
|
345
|
+
if is_deterministic_source(source_type):
|
|
346
|
+
existing["source_type"] = source_type
|
|
347
|
+
elif not is_deterministic_source(existing_source):
|
|
348
|
+
existing["source_type"] = source_type
|
|
349
|
+
# else: AI trying to overwrite deterministic → silently ignored
|
|
350
|
+
if extra:
|
|
351
|
+
existing.update(extra)
|
|
352
|
+
graph["meta"]["updated_at"] = now
|
|
353
|
+
return existing
|
|
354
|
+
|
|
355
|
+
node = {
|
|
356
|
+
"id": node_id,
|
|
357
|
+
"type": node_type,
|
|
358
|
+
"name": name,
|
|
359
|
+
"status": status,
|
|
360
|
+
"description": description,
|
|
361
|
+
"source_type": source_type,
|
|
362
|
+
"created_at": now,
|
|
363
|
+
"updated_at": now,
|
|
364
|
+
}
|
|
365
|
+
if extra:
|
|
366
|
+
node.update(extra)
|
|
367
|
+
graph["nodes"][node_id] = node
|
|
368
|
+
graph["meta"]["node_count"] = len(graph["nodes"])
|
|
369
|
+
graph["meta"]["updated_at"] = now
|
|
370
|
+
return node
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def add_edge(
|
|
374
|
+
graph: dict,
|
|
375
|
+
source: str,
|
|
376
|
+
target: str,
|
|
377
|
+
edge_type: str,
|
|
378
|
+
*,
|
|
379
|
+
weight: float = DEFAULT_EDGE_WEIGHT,
|
|
380
|
+
extra: Optional[dict[str, Any]] = None,
|
|
381
|
+
) -> dict:
|
|
382
|
+
"""Add a directed edge to the graph.
|
|
383
|
+
|
|
384
|
+
No duplicate detection — callers that want "upsert" semantics should
|
|
385
|
+
check first with `find_edge`. This matches PDF §5.2: mutation queue
|
|
386
|
+
applies atomically, and orchestrator is the single writer.
|
|
387
|
+
|
|
388
|
+
Edge format:
|
|
389
|
+
{"from": source, "to": target, "type": edge_type,
|
|
390
|
+
"weight": weight, "created_at": iso}
|
|
391
|
+
|
|
392
|
+
Raises ValueError if source/target don't exist in graph (placeholder
|
|
393
|
+
nodes should be created first per §5.3 conflict resolution).
|
|
394
|
+
"""
|
|
395
|
+
if edge_type not in EDGE_TYPES:
|
|
396
|
+
raise ValueError(f"unknown edge type: {edge_type!r}")
|
|
397
|
+
if source not in graph["nodes"]:
|
|
398
|
+
raise ValueError(f"edge source {source!r} not in graph")
|
|
399
|
+
if target not in graph["nodes"]:
|
|
400
|
+
raise ValueError(f"edge target {target!r} not in graph")
|
|
401
|
+
|
|
402
|
+
edge = {
|
|
403
|
+
"from": source,
|
|
404
|
+
"to": target,
|
|
405
|
+
"type": edge_type,
|
|
406
|
+
"weight": float(weight),
|
|
407
|
+
"created_at": _now_iso(),
|
|
408
|
+
}
|
|
409
|
+
if extra:
|
|
410
|
+
edge.update(extra)
|
|
411
|
+
graph["edges"].append(edge)
|
|
412
|
+
graph["meta"]["edge_count"] = len(graph["edges"])
|
|
413
|
+
graph["meta"]["updated_at"] = edge["created_at"]
|
|
414
|
+
return edge
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def find_edge(
|
|
418
|
+
graph: dict,
|
|
419
|
+
source: str,
|
|
420
|
+
target: str,
|
|
421
|
+
edge_type: Optional[str] = None,
|
|
422
|
+
) -> Optional[dict]:
|
|
423
|
+
"""Return first matching edge or None."""
|
|
424
|
+
for edge in graph["edges"]:
|
|
425
|
+
if edge["from"] == source and edge["to"] == target:
|
|
426
|
+
if edge_type is None or edge["type"] == edge_type:
|
|
427
|
+
return edge
|
|
428
|
+
return None
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def outgoing_edges(graph: dict, node_id: str) -> list[dict]:
|
|
432
|
+
"""Return all edges where node_id is the source."""
|
|
433
|
+
return [e for e in graph["edges"] if e["from"] == node_id]
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def incoming_edges(graph: dict, node_id: str) -> list[dict]:
|
|
437
|
+
"""Return all edges where node_id is the target."""
|
|
438
|
+
return [e for e in graph["edges"] if e["to"] == node_id]
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def nodes_by_type(graph: dict, node_type: str) -> list[dict]:
|
|
442
|
+
"""Return all nodes of a given type, sorted by id for stability."""
|
|
443
|
+
return sorted(
|
|
444
|
+
(n for n in graph["nodes"].values() if n.get("type") == node_type),
|
|
445
|
+
key=lambda n: n["id"],
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def _usage_path(path: pathlib.Path) -> pathlib.Path:
|
|
450
|
+
"""Return the sidecar path used for graph usage counters."""
|
|
451
|
+
path = pathlib.Path(path)
|
|
452
|
+
return path.with_name(f"{path.stem}_usage{path.suffix}")
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def _default_usage() -> dict:
|
|
456
|
+
"""Return a fresh graph usage payload."""
|
|
457
|
+
return {
|
|
458
|
+
"schema_version": 1,
|
|
459
|
+
"updated_at": _now_iso(),
|
|
460
|
+
"totals": {
|
|
461
|
+
"loads": 0,
|
|
462
|
+
"queries": 0,
|
|
463
|
+
"updates": 0,
|
|
464
|
+
"saves": 0,
|
|
465
|
+
},
|
|
466
|
+
"operations": {},
|
|
467
|
+
"recent": [],
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def load_graph_usage(path: pathlib.Path) -> dict:
|
|
472
|
+
"""Load graph usage counters for a graph path."""
|
|
473
|
+
usage_path = _usage_path(path)
|
|
474
|
+
if not usage_path.exists():
|
|
475
|
+
return _default_usage()
|
|
476
|
+
try:
|
|
477
|
+
payload = json.loads(usage_path.read_text(encoding="utf-8"))
|
|
478
|
+
except (json.JSONDecodeError, OSError):
|
|
479
|
+
return _default_usage()
|
|
480
|
+
|
|
481
|
+
default = _default_usage()
|
|
482
|
+
if not isinstance(payload, dict):
|
|
483
|
+
return default
|
|
484
|
+
|
|
485
|
+
totals = payload.get("totals")
|
|
486
|
+
if not isinstance(totals, dict):
|
|
487
|
+
payload["totals"] = default["totals"]
|
|
488
|
+
else:
|
|
489
|
+
for key, value in default["totals"].items():
|
|
490
|
+
totals[key] = int(totals.get(key, value) or 0)
|
|
491
|
+
|
|
492
|
+
operations = payload.get("operations")
|
|
493
|
+
if not isinstance(operations, dict):
|
|
494
|
+
payload["operations"] = {}
|
|
495
|
+
|
|
496
|
+
recent = payload.get("recent")
|
|
497
|
+
if not isinstance(recent, list):
|
|
498
|
+
payload["recent"] = []
|
|
499
|
+
|
|
500
|
+
payload["schema_version"] = int(payload.get("schema_version", 1) or 1)
|
|
501
|
+
payload["updated_at"] = str(payload.get("updated_at") or default["updated_at"])
|
|
502
|
+
return payload
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def save_graph_usage(path: pathlib.Path, usage: dict) -> None:
|
|
506
|
+
"""Persist graph usage counters to the sidecar file."""
|
|
507
|
+
usage_path = _usage_path(path)
|
|
508
|
+
usage_path.parent.mkdir(parents=True, exist_ok=True)
|
|
509
|
+
usage_path.write_text(json.dumps(usage, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def record_graph_usage(
|
|
513
|
+
graph: dict,
|
|
514
|
+
operation: str,
|
|
515
|
+
*,
|
|
516
|
+
kind: str = "queries",
|
|
517
|
+
count: int = 1,
|
|
518
|
+
) -> None:
|
|
519
|
+
"""Increment usage counters for a graph-backed operation.
|
|
520
|
+
|
|
521
|
+
Graphs loaded through load_graph() carry a private `_usage_path`
|
|
522
|
+
marker so callers can bump counters without threading file paths
|
|
523
|
+
through every query helper.
|
|
524
|
+
"""
|
|
525
|
+
usage_path_raw = graph.get("_usage_path")
|
|
526
|
+
if not usage_path_raw:
|
|
527
|
+
return
|
|
528
|
+
|
|
529
|
+
try:
|
|
530
|
+
graph_path = pathlib.Path(str(usage_path_raw))
|
|
531
|
+
payload = load_graph_usage(graph_path)
|
|
532
|
+
except (TypeError, ValueError):
|
|
533
|
+
return
|
|
534
|
+
|
|
535
|
+
totals = payload.setdefault("totals", _default_usage()["totals"])
|
|
536
|
+
if kind in totals:
|
|
537
|
+
totals[kind] = int(totals.get(kind, 0) or 0) + count
|
|
538
|
+
operations = payload.setdefault("operations", {})
|
|
539
|
+
operations[operation] = int(operations.get(operation, 0) or 0) + count
|
|
540
|
+
payload["updated_at"] = _now_iso()
|
|
541
|
+
recent = payload.setdefault("recent", [])
|
|
542
|
+
recent.insert(0, {
|
|
543
|
+
"operation": operation,
|
|
544
|
+
"kind": kind,
|
|
545
|
+
"count": count,
|
|
546
|
+
"at": payload["updated_at"],
|
|
547
|
+
})
|
|
548
|
+
del recent[20:]
|
|
549
|
+
|
|
550
|
+
try:
|
|
551
|
+
save_graph_usage(graph_path, payload)
|
|
552
|
+
except OSError:
|
|
553
|
+
return
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def summarize_graph_usage(path: pathlib.Path) -> dict:
|
|
557
|
+
"""Load a graph usage ledger and add a compact top-operations view."""
|
|
558
|
+
usage = load_graph_usage(path)
|
|
559
|
+
operations = usage.get("operations", {})
|
|
560
|
+
if isinstance(operations, dict):
|
|
561
|
+
usage["top_operations"] = [
|
|
562
|
+
{"operation": name, "count": count}
|
|
563
|
+
for name, count in sorted(
|
|
564
|
+
((str(name), int(count or 0)) for name, count in operations.items()),
|
|
565
|
+
key=lambda item: (-item[1], item[0]),
|
|
566
|
+
)[:8]
|
|
567
|
+
]
|
|
568
|
+
else:
|
|
569
|
+
usage["top_operations"] = []
|
|
570
|
+
return usage
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
# ---------------------------------------------------------------------------
|
|
574
|
+
# Validation (for round-trip + bootstrap safety)
|
|
575
|
+
# ---------------------------------------------------------------------------
|
|
576
|
+
|
|
577
|
+
class GraphValidationError(ValueError):
|
|
578
|
+
"""Raised when a graph fails structural validation."""
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def validate_graph(graph: dict) -> list[str]:
|
|
582
|
+
"""Return a list of validation errors (empty list == valid).
|
|
583
|
+
|
|
584
|
+
Soft-checks (logs warning but does NOT fail):
|
|
585
|
+
- orphan nodes (no incoming + no outgoing edges)
|
|
586
|
+
- dangling edges (referencing unknown node id)
|
|
587
|
+
|
|
588
|
+
Hard-checks (appended to error list):
|
|
589
|
+
- missing `nodes`, `edges`, `meta` keys
|
|
590
|
+
- node missing `type` or `id`
|
|
591
|
+
- node type not in NODE_TYPES
|
|
592
|
+
- edge type not in EDGE_TYPES
|
|
593
|
+
- schema_version mismatch
|
|
594
|
+
"""
|
|
595
|
+
errors: list[str] = []
|
|
596
|
+
|
|
597
|
+
for key in ("nodes", "edges", "meta"):
|
|
598
|
+
if key not in graph:
|
|
599
|
+
errors.append(f"missing top-level key: {key}")
|
|
600
|
+
if errors:
|
|
601
|
+
return errors
|
|
602
|
+
|
|
603
|
+
if not isinstance(graph["nodes"], dict):
|
|
604
|
+
errors.append("nodes must be a dict")
|
|
605
|
+
if not isinstance(graph["edges"], list):
|
|
606
|
+
errors.append("edges must be a list")
|
|
607
|
+
if errors:
|
|
608
|
+
return errors
|
|
609
|
+
|
|
610
|
+
meta_version = graph["meta"].get("schema_version")
|
|
611
|
+
if meta_version != SCHEMA_VERSION:
|
|
612
|
+
errors.append(
|
|
613
|
+
f"schema_version mismatch: expected {SCHEMA_VERSION}, got {meta_version}"
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
for node_id, node in graph["nodes"].items():
|
|
617
|
+
if node.get("id") != node_id:
|
|
618
|
+
errors.append(f"node id mismatch: key={node_id}, node.id={node.get('id')}")
|
|
619
|
+
node_type = node.get("type")
|
|
620
|
+
if node_type not in NODE_TYPES:
|
|
621
|
+
errors.append(f"node {node_id} has unknown type: {node_type!r}")
|
|
622
|
+
for field in ("name", "status", "created_at", "updated_at"):
|
|
623
|
+
if field not in node:
|
|
624
|
+
errors.append(f"node {node_id} missing required field: {field}")
|
|
625
|
+
|
|
626
|
+
seen_edges: set[tuple] = set()
|
|
627
|
+
for i, edge in enumerate(graph["edges"]):
|
|
628
|
+
for field in ("from", "to", "type"):
|
|
629
|
+
if field not in edge:
|
|
630
|
+
errors.append(f"edge[{i}] missing field: {field}")
|
|
631
|
+
continue
|
|
632
|
+
edge_type = edge.get("type")
|
|
633
|
+
if edge_type not in EDGE_TYPES:
|
|
634
|
+
errors.append(f"edge[{i}] has unknown type: {edge_type!r}")
|
|
635
|
+
src, tgt = edge.get("from"), edge.get("to")
|
|
636
|
+
if src not in graph["nodes"]:
|
|
637
|
+
log.warning("edge[%d] dangling source: %s", i, src)
|
|
638
|
+
if tgt not in graph["nodes"]:
|
|
639
|
+
log.warning("edge[%d] dangling target: %s", i, tgt)
|
|
640
|
+
key = (src, tgt, edge_type)
|
|
641
|
+
if key in seen_edges:
|
|
642
|
+
log.warning("edge[%d] duplicate: %s -> %s [%s]", i, src, tgt, edge_type)
|
|
643
|
+
seen_edges.add(key)
|
|
644
|
+
|
|
645
|
+
return errors
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
# ---------------------------------------------------------------------------
|
|
649
|
+
# JSON I/O
|
|
650
|
+
# ---------------------------------------------------------------------------
|
|
651
|
+
|
|
652
|
+
def load_graph(path: pathlib.Path) -> dict:
|
|
653
|
+
"""Load graph from a JSON file. Returns empty_graph() if file absent.
|
|
654
|
+
|
|
655
|
+
Absence is not an error — many 0dai projects start without a graph
|
|
656
|
+
and bootstrap lazily. Callers that need "must exist" semantics should
|
|
657
|
+
check path.exists() first.
|
|
658
|
+
"""
|
|
659
|
+
path = pathlib.Path(path)
|
|
660
|
+
if not path.exists():
|
|
661
|
+
log.info("graph file %s does not exist; returning empty graph", path)
|
|
662
|
+
graph = empty_graph()
|
|
663
|
+
graph["_usage_path"] = str(path)
|
|
664
|
+
record_graph_usage(graph, "load_graph", kind="loads")
|
|
665
|
+
return graph
|
|
666
|
+
|
|
667
|
+
with path.open("r", encoding="utf-8") as f:
|
|
668
|
+
data = json.load(f)
|
|
669
|
+
data["_usage_path"] = str(path)
|
|
670
|
+
record_graph_usage(data, "load_graph", kind="loads")
|
|
671
|
+
|
|
672
|
+
errors = validate_graph(data)
|
|
673
|
+
if errors:
|
|
674
|
+
raise GraphValidationError(
|
|
675
|
+
f"graph at {path} failed validation:\n " + "\n ".join(errors)
|
|
676
|
+
)
|
|
677
|
+
return data
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def save_graph(path: pathlib.Path, graph: dict, *, validate: bool = True) -> None:
|
|
681
|
+
"""Write graph to a JSON file with stable key ordering.
|
|
682
|
+
|
|
683
|
+
Stable ordering is critical for Git-friendly diffs (per M13 plan
|
|
684
|
+
"Git-friendly — stable file names, predictable frontmatter order,
|
|
685
|
+
minimal diff noise"). Nodes sorted by id, edges sorted by
|
|
686
|
+
(from, to, type).
|
|
687
|
+
"""
|
|
688
|
+
path = pathlib.Path(path)
|
|
689
|
+
if validate:
|
|
690
|
+
errors = validate_graph(graph)
|
|
691
|
+
if errors:
|
|
692
|
+
raise GraphValidationError(
|
|
693
|
+
"refusing to save invalid graph:\n " + "\n ".join(errors)
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
graph["meta"]["updated_at"] = _now_iso()
|
|
697
|
+
graph["meta"]["node_count"] = len(graph["nodes"])
|
|
698
|
+
graph["meta"]["edge_count"] = len(graph["edges"])
|
|
699
|
+
|
|
700
|
+
stable = {
|
|
701
|
+
"meta": graph["meta"],
|
|
702
|
+
"nodes": dict(sorted(graph["nodes"].items())),
|
|
703
|
+
"edges": sorted(
|
|
704
|
+
graph["edges"],
|
|
705
|
+
key=lambda e: (e.get("from", ""), e.get("to", ""), e.get("type", "")),
|
|
706
|
+
),
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
710
|
+
with path.open("w", encoding="utf-8") as f:
|
|
711
|
+
json.dump(stable, f, indent=2, ensure_ascii=False)
|
|
712
|
+
f.write("\n")
|
|
713
|
+
graph["_usage_path"] = str(path)
|
|
714
|
+
record_graph_usage(graph, "save_graph", kind="saves")
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
# ---------------------------------------------------------------------------
|
|
718
|
+
# Traversal: anchor extraction → BFS expand → role filter → serialize (§4)
|
|
719
|
+
# ---------------------------------------------------------------------------
|
|
720
|
+
|
|
721
|
+
def extract_anchors(graph: dict, task_text: str, max_anchors: int = 8) -> list[str]:
|
|
722
|
+
"""Find node ids that plausibly anchor to the task text (PDF §4 step 1).
|
|
723
|
+
|
|
724
|
+
Matching strategy (in priority order):
|
|
725
|
+
1. Exact node id match (`comp_api_gateway` appears verbatim in text)
|
|
726
|
+
2. Node name substring match (case-insensitive)
|
|
727
|
+
3. Slugified name match (for tech like "Next.js" -> "next_js")
|
|
728
|
+
|
|
729
|
+
Returns at most `max_anchors` ids, deduped, order preserved by
|
|
730
|
+
match score (id match beats name match beats slug match).
|
|
731
|
+
|
|
732
|
+
Empty task text returns an empty list — traversal caller should
|
|
733
|
+
fall back to "all tech nodes" via tech_context() in that case.
|
|
734
|
+
"""
|
|
735
|
+
record_graph_usage(graph, "extract_anchors", kind="queries")
|
|
736
|
+
if not task_text.strip():
|
|
737
|
+
return []
|
|
738
|
+
|
|
739
|
+
text_lower = task_text.lower()
|
|
740
|
+
scores: dict[str, int] = {}
|
|
741
|
+
|
|
742
|
+
for node_id, node in graph["nodes"].items():
|
|
743
|
+
# Score 3: exact id match
|
|
744
|
+
if node_id in task_text:
|
|
745
|
+
scores[node_id] = max(scores.get(node_id, 0), 3)
|
|
746
|
+
continue
|
|
747
|
+
|
|
748
|
+
name = node.get("name", "")
|
|
749
|
+
name_lower = name.lower()
|
|
750
|
+
if name_lower and name_lower in text_lower:
|
|
751
|
+
scores[node_id] = max(scores.get(node_id, 0), 2)
|
|
752
|
+
continue
|
|
753
|
+
|
|
754
|
+
slug = _slug(name)
|
|
755
|
+
if slug and len(slug) >= 3 and slug in text_lower:
|
|
756
|
+
scores[node_id] = max(scores.get(node_id, 0), 1)
|
|
757
|
+
|
|
758
|
+
ranked = sorted(scores.items(), key=lambda kv: (-kv[1], kv[0]))
|
|
759
|
+
return [node_id for node_id, _score in ranked[:max_anchors]]
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
def expand_bfs(
|
|
763
|
+
graph: dict,
|
|
764
|
+
anchors: Iterable[str],
|
|
765
|
+
*,
|
|
766
|
+
hops: int = 2,
|
|
767
|
+
hop2_weight_threshold: float = HOP2_WEIGHT_THRESHOLD,
|
|
768
|
+
) -> set[str]:
|
|
769
|
+
"""BFS expand from anchor nodes up to `hops` (PDF §4 step 2).
|
|
770
|
+
|
|
771
|
+
Hop 1 follows all edges unconditionally. Hop 2 only follows edges
|
|
772
|
+
with weight >= hop2_weight_threshold (the "edge weight filter" from
|
|
773
|
+
the spec — prevents hop-2 explosion for densely-connected graphs).
|
|
774
|
+
|
|
775
|
+
Both outgoing and incoming edges are traversed — for a context
|
|
776
|
+
slice, "components that use this tech" and "tech used by this
|
|
777
|
+
component" are equally relevant.
|
|
778
|
+
"""
|
|
779
|
+
record_graph_usage(graph, "expand_bfs", kind="queries")
|
|
780
|
+
if hops < 1:
|
|
781
|
+
raise ValueError(f"hops must be >= 1, got {hops}")
|
|
782
|
+
|
|
783
|
+
visited: set[str] = set()
|
|
784
|
+
frontier: set[str] = set()
|
|
785
|
+
|
|
786
|
+
for anchor in anchors:
|
|
787
|
+
if anchor in graph["nodes"]:
|
|
788
|
+
visited.add(anchor)
|
|
789
|
+
frontier.add(anchor)
|
|
790
|
+
|
|
791
|
+
for hop in range(1, hops + 1):
|
|
792
|
+
next_frontier: set[str] = set()
|
|
793
|
+
for node_id in frontier:
|
|
794
|
+
for edge in graph["edges"]:
|
|
795
|
+
if edge["from"] != node_id and edge["to"] != node_id:
|
|
796
|
+
continue
|
|
797
|
+
if hop >= 2 and edge.get("weight", DEFAULT_EDGE_WEIGHT) < hop2_weight_threshold:
|
|
798
|
+
continue
|
|
799
|
+
neighbor = edge["to"] if edge["from"] == node_id else edge["from"]
|
|
800
|
+
if neighbor not in visited and neighbor in graph["nodes"]:
|
|
801
|
+
next_frontier.add(neighbor)
|
|
802
|
+
visited.add(neighbor)
|
|
803
|
+
if not next_frontier:
|
|
804
|
+
break
|
|
805
|
+
frontier = next_frontier
|
|
806
|
+
|
|
807
|
+
return visited
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
def filter_by_role(
|
|
811
|
+
graph: dict,
|
|
812
|
+
node_ids: Iterable[str],
|
|
813
|
+
role: str,
|
|
814
|
+
) -> set[str]:
|
|
815
|
+
"""Keep only nodes whose type is in the role's interest set (§4 step 3).
|
|
816
|
+
|
|
817
|
+
Unknown roles (not in ROLE_TYPE_INTERESTS) return the input set
|
|
818
|
+
unchanged — cautious default, better to over-include for an
|
|
819
|
+
unfamiliar role than drop context silently.
|
|
820
|
+
"""
|
|
821
|
+
record_graph_usage(graph, "filter_by_role", kind="queries")
|
|
822
|
+
interests = ROLE_TYPE_INTERESTS.get(role)
|
|
823
|
+
if interests is None:
|
|
824
|
+
return set(node_ids)
|
|
825
|
+
|
|
826
|
+
result: set[str] = set()
|
|
827
|
+
for node_id in node_ids:
|
|
828
|
+
node = graph["nodes"].get(node_id)
|
|
829
|
+
if node and node.get("type") in interests:
|
|
830
|
+
result.add(node_id)
|
|
831
|
+
return result
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
def serialize_slice(
|
|
835
|
+
graph: dict,
|
|
836
|
+
node_ids: Iterable[str],
|
|
837
|
+
*,
|
|
838
|
+
token_budget: int = DEFAULT_SLICE_TOKEN_BUDGET,
|
|
839
|
+
) -> str:
|
|
840
|
+
"""Render selected nodes as compact text for prompt injection (§4 step 4).
|
|
841
|
+
|
|
842
|
+
Format is deliberately terse — one line per node, grouped by type.
|
|
843
|
+
Edges between selected nodes are rendered as trailing `→ <target>`
|
|
844
|
+
suffixes when they fit in the token budget.
|
|
845
|
+
|
|
846
|
+
Token budget uses the CHARS_PER_TOKEN constant (coarse estimate).
|
|
847
|
+
When the budget is exceeded, nodes are truncated tail-first with a
|
|
848
|
+
`... (N more)` sentinel.
|
|
849
|
+
|
|
850
|
+
Returns empty string if no nodes selected.
|
|
851
|
+
"""
|
|
852
|
+
record_graph_usage(graph, "serialize_slice", kind="queries")
|
|
853
|
+
selected = [graph["nodes"][nid] for nid in node_ids if nid in graph["nodes"]]
|
|
854
|
+
if not selected:
|
|
855
|
+
return ""
|
|
856
|
+
|
|
857
|
+
char_budget = token_budget * CHARS_PER_TOKEN
|
|
858
|
+
|
|
859
|
+
by_type: dict[str, list[dict]] = {}
|
|
860
|
+
for node in selected:
|
|
861
|
+
by_type.setdefault(node["type"], []).append(node)
|
|
862
|
+
|
|
863
|
+
selected_ids = {n["id"] for n in selected}
|
|
864
|
+
edges_by_source: dict[str, list[str]] = {}
|
|
865
|
+
for edge in graph["edges"]:
|
|
866
|
+
src, tgt = edge["from"], edge["to"]
|
|
867
|
+
if src in selected_ids and tgt in selected_ids:
|
|
868
|
+
edges_by_source.setdefault(src, []).append(f"{edge['type']}->{tgt}")
|
|
869
|
+
|
|
870
|
+
lines: list[str] = []
|
|
871
|
+
for node_type in sorted(by_type.keys()):
|
|
872
|
+
lines.append(f"[{node_type}]")
|
|
873
|
+
for node in sorted(by_type[node_type], key=lambda n: n["id"]):
|
|
874
|
+
parts = [f" {node['id']}: {node.get('name', '')}"]
|
|
875
|
+
status = node.get("status", "")
|
|
876
|
+
if status and status != "active":
|
|
877
|
+
parts.append(f"({status})")
|
|
878
|
+
desc = node.get("description", "")
|
|
879
|
+
if desc:
|
|
880
|
+
parts.append(f"— {desc[:100]}")
|
|
881
|
+
out_edges = edges_by_source.get(node["id"], [])
|
|
882
|
+
if out_edges:
|
|
883
|
+
parts.append(f"[{', '.join(out_edges[:3])}]")
|
|
884
|
+
lines.append(" ".join(parts))
|
|
885
|
+
|
|
886
|
+
text = "\n".join(lines)
|
|
887
|
+
|
|
888
|
+
if len(text) > char_budget:
|
|
889
|
+
truncated_lines: list[str] = []
|
|
890
|
+
running = 0
|
|
891
|
+
for line in lines:
|
|
892
|
+
if running + len(line) + 1 > char_budget:
|
|
893
|
+
remaining = len(lines) - len(truncated_lines)
|
|
894
|
+
truncated_lines.append(f"... ({remaining} more)")
|
|
895
|
+
break
|
|
896
|
+
truncated_lines.append(line)
|
|
897
|
+
running += len(line) + 1
|
|
898
|
+
text = "\n".join(truncated_lines)
|
|
899
|
+
|
|
900
|
+
return text
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
def build_context_slice(
|
|
904
|
+
graph: dict,
|
|
905
|
+
task_text: str,
|
|
906
|
+
role: str,
|
|
907
|
+
*,
|
|
908
|
+
hops: int = 2,
|
|
909
|
+
token_budget: int = DEFAULT_SLICE_TOKEN_BUDGET,
|
|
910
|
+
) -> str:
|
|
911
|
+
"""Full §4 pipeline: anchor → expand → filter → serialize.
|
|
912
|
+
|
|
913
|
+
This is the entry point M14's `working_group._build_context_slice`
|
|
914
|
+
will call. When the graph is empty or no anchors match, returns
|
|
915
|
+
empty string — caller should fall back to flat-YAML context.
|
|
916
|
+
"""
|
|
917
|
+
record_graph_usage(graph, "build_context_slice", kind="queries")
|
|
918
|
+
anchors = extract_anchors(graph, task_text)
|
|
919
|
+
if not anchors:
|
|
920
|
+
return ""
|
|
921
|
+
expanded = expand_bfs(graph, anchors, hops=hops)
|
|
922
|
+
filtered = filter_by_role(graph, expanded, role)
|
|
923
|
+
return serialize_slice(graph, filtered, token_budget=token_budget)
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
# ---------------------------------------------------------------------------
|
|
927
|
+
# Query patterns (PDF §8)
|
|
928
|
+
# ---------------------------------------------------------------------------
|
|
929
|
+
|
|
930
|
+
def decisions_for(graph: dict, node_id: str) -> list[dict]:
|
|
931
|
+
"""Return all Decision nodes that `affect` or `satisfy` the given node.
|
|
932
|
+
|
|
933
|
+
Useful for "why did we choose X for this component?" queries.
|
|
934
|
+
Supersede chains followed — superseded decisions are marked in
|
|
935
|
+
their `status` field but still returned for history.
|
|
936
|
+
"""
|
|
937
|
+
record_graph_usage(graph, "decisions_for", kind="queries")
|
|
938
|
+
results: list[dict] = []
|
|
939
|
+
for edge in graph["edges"]:
|
|
940
|
+
if edge["to"] != node_id:
|
|
941
|
+
continue
|
|
942
|
+
if edge["type"] not in ("affects", "satisfies", "chose"):
|
|
943
|
+
continue
|
|
944
|
+
dec = graph["nodes"].get(edge["from"])
|
|
945
|
+
if dec and dec.get("type") == "Decision":
|
|
946
|
+
results.append(dec)
|
|
947
|
+
return sorted(results, key=lambda n: n.get("created_at", ""))
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
def ancestors_of(graph: dict, node_id: str, max_depth: int = 10) -> list[dict]:
|
|
951
|
+
"""Trace decision ancestry backward: which past decisions influenced this one.
|
|
952
|
+
|
|
953
|
+
Follows `decision_ancestry` edges (from=child -> to=parent) and
|
|
954
|
+
`supersedes` edges forward (from=newer -> to=older).
|
|
955
|
+
Returns a list of (decision, depth, edge_type) tuples as dicts.
|
|
956
|
+
"""
|
|
957
|
+
record_graph_usage(graph, "ancestors_of", kind="queries")
|
|
958
|
+
visited: set[str] = {node_id}
|
|
959
|
+
results: list[dict] = []
|
|
960
|
+
queue: list[tuple[str, int, str]] = [(node_id, 0, "self")]
|
|
961
|
+
|
|
962
|
+
while queue:
|
|
963
|
+
current_id, depth, edge_type = queue.pop(0)
|
|
964
|
+
if depth >= max_depth:
|
|
965
|
+
continue
|
|
966
|
+
for edge in graph["edges"]:
|
|
967
|
+
# decision_ancestry: from=child -> to=parent
|
|
968
|
+
if edge["from"] == current_id and edge["type"] == "decision_ancestry":
|
|
969
|
+
parent_id = edge["to"]
|
|
970
|
+
if parent_id not in visited:
|
|
971
|
+
visited.add(parent_id)
|
|
972
|
+
parent = graph["nodes"].get(parent_id)
|
|
973
|
+
if parent and parent.get("type") == "Decision":
|
|
974
|
+
results.append({
|
|
975
|
+
"node": parent,
|
|
976
|
+
"depth": depth + 1,
|
|
977
|
+
"edge_type": "decision_ancestry",
|
|
978
|
+
"edge_reason": edge.get("reason", ""),
|
|
979
|
+
})
|
|
980
|
+
queue.append((parent_id, depth + 1, "decision_ancestry"))
|
|
981
|
+
# supersedes: from=newer -> to=older
|
|
982
|
+
if edge["from"] == current_id and edge["type"] == "supersedes":
|
|
983
|
+
old_id = edge["to"]
|
|
984
|
+
if old_id not in visited:
|
|
985
|
+
visited.add(old_id)
|
|
986
|
+
old = graph["nodes"].get(old_id)
|
|
987
|
+
if old and old.get("type") == "Decision":
|
|
988
|
+
results.append({
|
|
989
|
+
"node": old,
|
|
990
|
+
"depth": depth + 1,
|
|
991
|
+
"edge_type": "supersedes",
|
|
992
|
+
"edge_reason": edge.get("reason", ""),
|
|
993
|
+
})
|
|
994
|
+
queue.append((old_id, depth + 1, "supersedes"))
|
|
995
|
+
|
|
996
|
+
return results
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
def descendants_of(graph: dict, node_id: str, max_depth: int = 10) -> list[dict]:
|
|
1000
|
+
"""Trace decision ancestry forward: which later decisions were influenced by this one.
|
|
1001
|
+
|
|
1002
|
+
Follows `decision_ancestry` edges in reverse (to=parent -> from=child)
|
|
1003
|
+
and `supersedes` edges forward (to=older -> from=newer).
|
|
1004
|
+
"""
|
|
1005
|
+
record_graph_usage(graph, "descendants_of", kind="queries")
|
|
1006
|
+
visited: set[str] = {node_id}
|
|
1007
|
+
results: list[dict] = []
|
|
1008
|
+
queue: list[tuple[str, int, str]] = [(node_id, 0, "self")]
|
|
1009
|
+
|
|
1010
|
+
while queue:
|
|
1011
|
+
current_id, depth, edge_type = queue.pop(0)
|
|
1012
|
+
if depth >= max_depth:
|
|
1013
|
+
continue
|
|
1014
|
+
for edge in graph["edges"]:
|
|
1015
|
+
# decision_ancestry: from=child -> to=parent, so to=current means current is parent
|
|
1016
|
+
if edge["to"] == current_id and edge["type"] == "decision_ancestry":
|
|
1017
|
+
child_id = edge["from"]
|
|
1018
|
+
if child_id not in visited:
|
|
1019
|
+
visited.add(child_id)
|
|
1020
|
+
child = graph["nodes"].get(child_id)
|
|
1021
|
+
if child and child.get("type") == "Decision":
|
|
1022
|
+
results.append({
|
|
1023
|
+
"node": child,
|
|
1024
|
+
"depth": depth + 1,
|
|
1025
|
+
"edge_type": "decision_ancestry",
|
|
1026
|
+
"edge_reason": edge.get("reason", ""),
|
|
1027
|
+
})
|
|
1028
|
+
queue.append((child_id, depth + 1, "decision_ancestry"))
|
|
1029
|
+
# supersedes: from=newer -> to=older, so to=current means current is older
|
|
1030
|
+
if edge["to"] == current_id and edge["type"] == "supersedes":
|
|
1031
|
+
new_id = edge["from"]
|
|
1032
|
+
if new_id not in visited:
|
|
1033
|
+
visited.add(new_id)
|
|
1034
|
+
new = graph["nodes"].get(new_id)
|
|
1035
|
+
if new and new.get("type") == "Decision":
|
|
1036
|
+
results.append({
|
|
1037
|
+
"node": new,
|
|
1038
|
+
"depth": depth + 1,
|
|
1039
|
+
"edge_type": "supersedes",
|
|
1040
|
+
"edge_reason": edge.get("reason", ""),
|
|
1041
|
+
})
|
|
1042
|
+
queue.append((new_id, depth + 1, "supersedes"))
|
|
1043
|
+
|
|
1044
|
+
return results
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
def tech_context(graph: dict) -> list[dict]:
|
|
1048
|
+
"""Return all Technology nodes in the graph, sorted by id."""
|
|
1049
|
+
record_graph_usage(graph, "tech_context", kind="queries")
|
|
1050
|
+
return nodes_by_type(graph, "Technology")
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
def open_risks(graph: dict) -> list[dict]:
|
|
1054
|
+
"""Return Risk nodes that are not mitigated.
|
|
1055
|
+
|
|
1056
|
+
A risk is "mitigated" if any Decision has a `mitigates` edge
|
|
1057
|
+
pointing to it. Otherwise it's "open".
|
|
1058
|
+
"""
|
|
1059
|
+
record_graph_usage(graph, "open_risks", kind="queries")
|
|
1060
|
+
mitigated: set[str] = set()
|
|
1061
|
+
for edge in graph["edges"]:
|
|
1062
|
+
if edge["type"] == "mitigates":
|
|
1063
|
+
mitigated.add(edge["to"])
|
|
1064
|
+
|
|
1065
|
+
return [
|
|
1066
|
+
n for n in nodes_by_type(graph, "Risk")
|
|
1067
|
+
if n["id"] not in mitigated and n.get("status", "active") != "resolved"
|
|
1068
|
+
]
|
|
1069
|
+
|
|
1070
|
+
|
|
1071
|
+
def impact(graph: dict, node_id: str) -> dict[str, list[str]]:
|
|
1072
|
+
"""Return "what depends on this node" impact analysis.
|
|
1073
|
+
|
|
1074
|
+
Groups incoming edges by edge type for easy scanning:
|
|
1075
|
+
{
|
|
1076
|
+
"uses": ["comp_api", "comp_worker"],
|
|
1077
|
+
"depends_on": ["comp_admin"],
|
|
1078
|
+
"blocks": ["risk_042"],
|
|
1079
|
+
}
|
|
1080
|
+
"""
|
|
1081
|
+
record_graph_usage(graph, "impact", kind="queries")
|
|
1082
|
+
result: dict[str, list[str]] = {}
|
|
1083
|
+
for edge in graph["edges"]:
|
|
1084
|
+
if edge["to"] != node_id:
|
|
1085
|
+
continue
|
|
1086
|
+
result.setdefault(edge["type"], []).append(edge["from"])
|
|
1087
|
+
return result
|
|
1088
|
+
|
|
1089
|
+
|
|
1090
|
+
def stale_tech(graph: dict, max_age_days: int = 7) -> list[dict]:
|
|
1091
|
+
"""Return Technology nodes whose `scout_checked_at` is older than threshold.
|
|
1092
|
+
|
|
1093
|
+
Used by scout integration (PDF §6.1 STEP 0) — fresh data prevents
|
|
1094
|
+
redundant web searches. A tech node without `scout_checked_at` is
|
|
1095
|
+
considered stale (never checked).
|
|
1096
|
+
"""
|
|
1097
|
+
record_graph_usage(graph, "stale_tech", kind="queries")
|
|
1098
|
+
cutoff = _dt.datetime.now(_dt.timezone.utc) - _dt.timedelta(days=max_age_days)
|
|
1099
|
+
result: list[dict] = []
|
|
1100
|
+
for node in nodes_by_type(graph, "Technology"):
|
|
1101
|
+
checked_at_raw = node.get("scout_checked_at")
|
|
1102
|
+
if not checked_at_raw:
|
|
1103
|
+
result.append(node)
|
|
1104
|
+
continue
|
|
1105
|
+
try:
|
|
1106
|
+
checked_at = _dt.datetime.strptime(
|
|
1107
|
+
checked_at_raw.replace("Z", "+0000"),
|
|
1108
|
+
"%Y-%m-%dT%H:%M:%S%z",
|
|
1109
|
+
)
|
|
1110
|
+
except (ValueError, AttributeError):
|
|
1111
|
+
result.append(node)
|
|
1112
|
+
continue
|
|
1113
|
+
if checked_at < cutoff:
|
|
1114
|
+
result.append(node)
|
|
1115
|
+
return result
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
def unsatisfied_reqs(graph: dict) -> list[dict]:
|
|
1119
|
+
"""Return Requirement nodes that have no satisfying Decision.
|
|
1120
|
+
|
|
1121
|
+
A requirement is "satisfied" when any Decision has a `satisfies`
|
|
1122
|
+
edge pointing to it. Violated requirements (Component->Req via
|
|
1123
|
+
`violates`) are also flagged as unsatisfied regardless of Decision
|
|
1124
|
+
state.
|
|
1125
|
+
"""
|
|
1126
|
+
record_graph_usage(graph, "unsatisfied_reqs", kind="queries")
|
|
1127
|
+
satisfied: set[str] = set()
|
|
1128
|
+
violated: set[str] = set()
|
|
1129
|
+
for edge in graph["edges"]:
|
|
1130
|
+
if edge["type"] == "satisfies":
|
|
1131
|
+
satisfied.add(edge["to"])
|
|
1132
|
+
elif edge["type"] == "violates":
|
|
1133
|
+
violated.add(edge["to"])
|
|
1134
|
+
|
|
1135
|
+
return [
|
|
1136
|
+
n for n in nodes_by_type(graph, "Requirement")
|
|
1137
|
+
if n["id"] not in satisfied or n["id"] in violated
|
|
1138
|
+
]
|
|
1139
|
+
|
|
1140
|
+
|
|
1141
|
+
# ---------------------------------------------------------------------------
|
|
1142
|
+
# M13 P2 EP-02: Outcome Tracking & Error Memory
|
|
1143
|
+
# ---------------------------------------------------------------------------
|
|
1144
|
+
|
|
1145
|
+
def record_outcome(
|
|
1146
|
+
graph: dict,
|
|
1147
|
+
decision_id: str,
|
|
1148
|
+
status: str,
|
|
1149
|
+
actual_result: str,
|
|
1150
|
+
lessons_learned: str = "",
|
|
1151
|
+
tags: Optional[list[str]] = None,
|
|
1152
|
+
recorded_by: str = "operator",
|
|
1153
|
+
) -> dict:
|
|
1154
|
+
"""Record an Outcome node for an existing Decision.
|
|
1155
|
+
|
|
1156
|
+
Creates a new Outcome node with id `outcome_{decision_id}` and adds
|
|
1157
|
+
an `evaluates` edge from Outcome → Decision. If an Outcome already
|
|
1158
|
+
exists for this decision, it is updated in place per §5.3 conflict
|
|
1159
|
+
resolution (duplicate id → update).
|
|
1160
|
+
|
|
1161
|
+
Args:
|
|
1162
|
+
graph: the graph dict (will be mutated)
|
|
1163
|
+
decision_id: id of the Decision being evaluated (must exist)
|
|
1164
|
+
status: one of OUTCOME_STATUSES
|
|
1165
|
+
actual_result: free-form description of what happened
|
|
1166
|
+
lessons_learned: optional guidance for future similar decisions
|
|
1167
|
+
tags: optional semantic tags for find_similar_outcomes matching
|
|
1168
|
+
recorded_by: who recorded this (default "operator")
|
|
1169
|
+
|
|
1170
|
+
Returns:
|
|
1171
|
+
The Outcome node dict.
|
|
1172
|
+
|
|
1173
|
+
Raises:
|
|
1174
|
+
ValueError: if decision_id not in graph, status invalid, or
|
|
1175
|
+
target node is not a Decision.
|
|
1176
|
+
"""
|
|
1177
|
+
record_graph_usage(graph, "record_outcome", kind="updates")
|
|
1178
|
+
if status not in OUTCOME_STATUSES:
|
|
1179
|
+
raise ValueError(
|
|
1180
|
+
f"invalid outcome status {status!r}; "
|
|
1181
|
+
f"must be one of {sorted(OUTCOME_STATUSES)}"
|
|
1182
|
+
)
|
|
1183
|
+
target = graph["nodes"].get(decision_id)
|
|
1184
|
+
if target is None:
|
|
1185
|
+
raise ValueError(f"decision {decision_id!r} not in graph")
|
|
1186
|
+
if target.get("type") != "Decision":
|
|
1187
|
+
raise ValueError(
|
|
1188
|
+
f"node {decision_id!r} is type {target.get('type')!r}, "
|
|
1189
|
+
f"expected Decision"
|
|
1190
|
+
)
|
|
1191
|
+
|
|
1192
|
+
outcome_id = f"outcome_{decision_id}"
|
|
1193
|
+
name = f"Outcome of {target.get('name', decision_id)[:60]}"
|
|
1194
|
+
description = actual_result.strip()
|
|
1195
|
+
|
|
1196
|
+
# Idempotent add/update — re-running replaces the outcome content
|
|
1197
|
+
# but preserves the stable edge.
|
|
1198
|
+
already_existed = outcome_id in graph["nodes"]
|
|
1199
|
+
|
|
1200
|
+
add_node(
|
|
1201
|
+
graph,
|
|
1202
|
+
outcome_id,
|
|
1203
|
+
"Outcome",
|
|
1204
|
+
name=name,
|
|
1205
|
+
description=description,
|
|
1206
|
+
# M14: outcomes recorded by operator are ground truth ("KNOWS").
|
|
1207
|
+
# When future auto-outcome extraction from git history lands,
|
|
1208
|
+
# pass source_type="git_diff" or "operator" explicitly.
|
|
1209
|
+
source_type="operator",
|
|
1210
|
+
extra={
|
|
1211
|
+
"decision_id": decision_id,
|
|
1212
|
+
"outcome_status": status,
|
|
1213
|
+
"actual_result": actual_result.strip(),
|
|
1214
|
+
"lessons_learned": lessons_learned.strip(),
|
|
1215
|
+
"tags": list(tags or []),
|
|
1216
|
+
"recorded_by": recorded_by,
|
|
1217
|
+
},
|
|
1218
|
+
)
|
|
1219
|
+
|
|
1220
|
+
if not already_existed:
|
|
1221
|
+
add_edge(graph, outcome_id, decision_id, "evaluates")
|
|
1222
|
+
|
|
1223
|
+
return graph["nodes"][outcome_id]
|
|
1224
|
+
|
|
1225
|
+
|
|
1226
|
+
def outcome_for(graph: dict, decision_id: str) -> Optional[dict]:
|
|
1227
|
+
"""Return the Outcome node for a given decision, or None."""
|
|
1228
|
+
return graph["nodes"].get(f"outcome_{decision_id}")
|
|
1229
|
+
|
|
1230
|
+
|
|
1231
|
+
def find_similar_outcomes(
|
|
1232
|
+
graph: dict,
|
|
1233
|
+
task_text: str,
|
|
1234
|
+
limit: int = 3,
|
|
1235
|
+
) -> list[dict]:
|
|
1236
|
+
"""Find Outcome nodes relevant to the current task (PDF EP-02 STEP 2.5).
|
|
1237
|
+
|
|
1238
|
+
Ranking strategy:
|
|
1239
|
+
1. Extract keywords from task_text (lowercased, deduped, stopwords dropped)
|
|
1240
|
+
2. For each Outcome node:
|
|
1241
|
+
- Count tag overlap with task keywords
|
|
1242
|
+
- Check if decision name contains any task keyword
|
|
1243
|
+
- Combine score = tag_overlap * 2 + decision_match
|
|
1244
|
+
3. Rank by (score DESC, recency DESC) — recent outcomes win ties
|
|
1245
|
+
4. Return top `limit` nodes
|
|
1246
|
+
|
|
1247
|
+
Only returns outcomes with status != "confirmed" by default —
|
|
1248
|
+
confirmed outcomes are "things that went well", less actionable
|
|
1249
|
+
than reverted/revised/partially_applied ones.
|
|
1250
|
+
|
|
1251
|
+
Args:
|
|
1252
|
+
graph: the graph dict
|
|
1253
|
+
task_text: the current deliberation goal/task string
|
|
1254
|
+
limit: maximum number of outcomes to return (default 3)
|
|
1255
|
+
|
|
1256
|
+
Returns:
|
|
1257
|
+
List of Outcome node dicts, ranked by relevance. Empty if no
|
|
1258
|
+
keywords match or no outcomes exist.
|
|
1259
|
+
"""
|
|
1260
|
+
if not task_text.strip():
|
|
1261
|
+
return []
|
|
1262
|
+
|
|
1263
|
+
keywords = _extract_keywords(task_text)
|
|
1264
|
+
if not keywords:
|
|
1265
|
+
return []
|
|
1266
|
+
|
|
1267
|
+
scored: list[tuple[int, str, dict]] = []
|
|
1268
|
+
for node in nodes_by_type(graph, "Outcome"):
|
|
1269
|
+
# Skip confirmed outcomes — they're less useful as cautionary tales
|
|
1270
|
+
if node.get("outcome_status") == "confirmed":
|
|
1271
|
+
continue
|
|
1272
|
+
|
|
1273
|
+
# Score 1: tag overlap
|
|
1274
|
+
node_tags = {str(t).lower() for t in node.get("tags", [])}
|
|
1275
|
+
tag_overlap = len(node_tags & keywords)
|
|
1276
|
+
|
|
1277
|
+
# Score 2: decision name contains any task keyword
|
|
1278
|
+
decision_id = node.get("decision_id", "")
|
|
1279
|
+
decision = graph["nodes"].get(decision_id, {})
|
|
1280
|
+
decision_name = decision.get("name", "").lower()
|
|
1281
|
+
decision_match = sum(1 for kw in keywords if kw in decision_name)
|
|
1282
|
+
|
|
1283
|
+
score = tag_overlap * 2 + decision_match
|
|
1284
|
+
if score > 0:
|
|
1285
|
+
scored.append((score, node.get("updated_at", ""), node))
|
|
1286
|
+
|
|
1287
|
+
# Sort by (score DESC, recency DESC via created_at string compare)
|
|
1288
|
+
scored.sort(key=lambda x: (-x[0], x[1]), reverse=False)
|
|
1289
|
+
# Reverse ordering quirk: negative score + ascending tuple sort
|
|
1290
|
+
scored.sort(key=lambda x: (-x[0], -ord(x[1][0]) if x[1] else 0))
|
|
1291
|
+
|
|
1292
|
+
return [outcome for _score, _ts, outcome in scored[:limit]]
|
|
1293
|
+
|
|
1294
|
+
|
|
1295
|
+
def decisions_without_outcome(
|
|
1296
|
+
graph: dict,
|
|
1297
|
+
stage: str = "",
|
|
1298
|
+
now: Optional[_dt.datetime] = None,
|
|
1299
|
+
) -> list[dict]:
|
|
1300
|
+
"""Return Decision nodes older than the stage threshold that lack an Outcome.
|
|
1301
|
+
|
|
1302
|
+
Used by EP-02 operator review workflow: "Hey, this decision from
|
|
1303
|
+
14 days ago never got an Outcome. What actually happened?"
|
|
1304
|
+
|
|
1305
|
+
Threshold by stage (PDF EP-02):
|
|
1306
|
+
- idea / mvp: 14 days
|
|
1307
|
+
- growth: 30 days
|
|
1308
|
+
- scale: 60 days
|
|
1309
|
+
- unknown stage: 30 days (DEFAULT_STAGE_THRESHOLD_DAYS)
|
|
1310
|
+
|
|
1311
|
+
Args:
|
|
1312
|
+
graph: the graph dict
|
|
1313
|
+
stage: project stage from project-layer.yaml (optional)
|
|
1314
|
+
now: timestamp to compare against (defaults to current UTC time —
|
|
1315
|
+
parameterized for deterministic testing)
|
|
1316
|
+
|
|
1317
|
+
Returns:
|
|
1318
|
+
List of Decision node dicts, sorted by creation date (oldest first).
|
|
1319
|
+
"""
|
|
1320
|
+
threshold_days = STAGE_OUTCOME_THRESHOLDS_DAYS.get(stage, DEFAULT_STAGE_THRESHOLD_DAYS)
|
|
1321
|
+
now = now or _dt.datetime.now(_dt.timezone.utc)
|
|
1322
|
+
cutoff = now - _dt.timedelta(days=threshold_days)
|
|
1323
|
+
|
|
1324
|
+
results: list[tuple[str, dict]] = []
|
|
1325
|
+
for decision in nodes_by_type(graph, "Decision"):
|
|
1326
|
+
created_at_raw = decision.get("created_at", "")
|
|
1327
|
+
try:
|
|
1328
|
+
created_at = _dt.datetime.strptime(
|
|
1329
|
+
created_at_raw.replace("Z", "+0000"),
|
|
1330
|
+
"%Y-%m-%dT%H:%M:%S%z",
|
|
1331
|
+
)
|
|
1332
|
+
except (ValueError, AttributeError):
|
|
1333
|
+
# Unparseable timestamp — skip rather than crash
|
|
1334
|
+
continue
|
|
1335
|
+
|
|
1336
|
+
if created_at > cutoff:
|
|
1337
|
+
continue
|
|
1338
|
+
|
|
1339
|
+
# Skip if Outcome already exists
|
|
1340
|
+
if outcome_for(graph, decision["id"]) is not None:
|
|
1341
|
+
continue
|
|
1342
|
+
|
|
1343
|
+
results.append((created_at_raw, decision))
|
|
1344
|
+
|
|
1345
|
+
# Sort oldest first
|
|
1346
|
+
results.sort(key=lambda x: x[0])
|
|
1347
|
+
return [d for _ts, d in results]
|
|
1348
|
+
|
|
1349
|
+
|
|
1350
|
+
def format_lessons_block(outcomes: list[dict], graph: dict) -> str:
|
|
1351
|
+
"""Render a list of Outcome nodes as a `<lessons_learned>` text block.
|
|
1352
|
+
|
|
1353
|
+
Matches the PDF EP-02 STEP 2.5 format for injection into the
|
|
1354
|
+
context slice. Empty list returns empty string.
|
|
1355
|
+
|
|
1356
|
+
Example output:
|
|
1357
|
+
<lessons_learned>
|
|
1358
|
+
Similar past decision "dec_002: Use Kafka for ingestion" was reverted.
|
|
1359
|
+
Reason: Kafka ops overhead too high for 2-person backend team.
|
|
1360
|
+
Lesson: Managed queue SQS/CloudTasks better for teams < 5.
|
|
1361
|
+
</lessons_learned>
|
|
1362
|
+
"""
|
|
1363
|
+
if not outcomes:
|
|
1364
|
+
return ""
|
|
1365
|
+
|
|
1366
|
+
lines = ["<lessons_learned>"]
|
|
1367
|
+
for outcome in outcomes:
|
|
1368
|
+
decision_id = outcome.get("decision_id", "")
|
|
1369
|
+
decision = graph["nodes"].get(decision_id, {})
|
|
1370
|
+
decision_name = decision.get("name", decision_id)
|
|
1371
|
+
status = outcome.get("outcome_status", "unknown")
|
|
1372
|
+
|
|
1373
|
+
lines.append(
|
|
1374
|
+
f'Similar past decision "{decision_id}: {decision_name}" was {status}.'
|
|
1375
|
+
)
|
|
1376
|
+
actual = outcome.get("actual_result", "").strip()
|
|
1377
|
+
if actual:
|
|
1378
|
+
lines.append(f"Reason: {actual}")
|
|
1379
|
+
lesson = outcome.get("lessons_learned", "").strip()
|
|
1380
|
+
if lesson:
|
|
1381
|
+
lines.append(f"Lesson: {lesson}")
|
|
1382
|
+
lines.append("") # blank line between entries
|
|
1383
|
+
|
|
1384
|
+
# Strip trailing blank before closing tag
|
|
1385
|
+
while lines and not lines[-1]:
|
|
1386
|
+
lines.pop()
|
|
1387
|
+
lines.append("</lessons_learned>")
|
|
1388
|
+
return "\n".join(lines)
|
|
1389
|
+
|
|
1390
|
+
|
|
1391
|
+
# ---------------------------------------------------------------------------
|
|
1392
|
+
# CLI shim (optional — main entry is generate_project_graph.py)
|
|
1393
|
+
# ---------------------------------------------------------------------------
|
|
1394
|
+
|
|
1395
|
+
def _summary(graph: dict) -> str:
|
|
1396
|
+
"""Produce a one-screen summary of graph state for CLI inspection."""
|
|
1397
|
+
meta = graph["meta"]
|
|
1398
|
+
by_type: dict[str, int] = {}
|
|
1399
|
+
for node in graph["nodes"].values():
|
|
1400
|
+
by_type[node["type"]] = by_type.get(node["type"], 0) + 1
|
|
1401
|
+
by_edge_type: dict[str, int] = {}
|
|
1402
|
+
for edge in graph["edges"]:
|
|
1403
|
+
by_edge_type[edge["type"]] = by_edge_type.get(edge["type"], 0) + 1
|
|
1404
|
+
|
|
1405
|
+
lines = [
|
|
1406
|
+
f"Project Context Graph (schema v{meta.get('schema_version')})",
|
|
1407
|
+
f" created_at: {meta.get('created_at')}",
|
|
1408
|
+
f" updated_at: {meta.get('updated_at')}",
|
|
1409
|
+
f" nodes: {meta.get('node_count', 0)}",
|
|
1410
|
+
f" edges: {meta.get('edge_count', 0)}",
|
|
1411
|
+
"",
|
|
1412
|
+
"Node types:",
|
|
1413
|
+
]
|
|
1414
|
+
for node_type in sorted(by_type.keys()):
|
|
1415
|
+
lines.append(f" {node_type}: {by_type[node_type]}")
|
|
1416
|
+
lines.append("")
|
|
1417
|
+
lines.append("Edge types:")
|
|
1418
|
+
for edge_type in sorted(by_edge_type.keys()):
|
|
1419
|
+
lines.append(f" {edge_type}: {by_edge_type[edge_type]}")
|
|
1420
|
+
return "\n".join(lines)
|
|
1421
|
+
|
|
1422
|
+
|
|
1423
|
+
def main(argv: Optional[list[str]] = None) -> int:
|
|
1424
|
+
"""Minimal CLI: `python3 scripts/graph.py <path>` prints summary."""
|
|
1425
|
+
import argparse
|
|
1426
|
+
|
|
1427
|
+
parser = argparse.ArgumentParser(
|
|
1428
|
+
description="Inspect a 0dai project_graph.json file.",
|
|
1429
|
+
)
|
|
1430
|
+
parser.add_argument(
|
|
1431
|
+
"path",
|
|
1432
|
+
nargs="?",
|
|
1433
|
+
default="ai/manifest/project_graph.json",
|
|
1434
|
+
help="Path to project_graph.json (default: ai/manifest/project_graph.json)",
|
|
1435
|
+
)
|
|
1436
|
+
parser.add_argument(
|
|
1437
|
+
"--validate",
|
|
1438
|
+
action="store_true",
|
|
1439
|
+
help="Run full validation and report any errors",
|
|
1440
|
+
)
|
|
1441
|
+
args = parser.parse_args(argv)
|
|
1442
|
+
|
|
1443
|
+
path = pathlib.Path(args.path)
|
|
1444
|
+
try:
|
|
1445
|
+
graph = load_graph(path)
|
|
1446
|
+
except GraphValidationError as exc:
|
|
1447
|
+
print(f"error: {exc}")
|
|
1448
|
+
return 2
|
|
1449
|
+
|
|
1450
|
+
print(_summary(graph))
|
|
1451
|
+
|
|
1452
|
+
if args.validate:
|
|
1453
|
+
errors = validate_graph(graph)
|
|
1454
|
+
if errors:
|
|
1455
|
+
print("\nvalidation errors:")
|
|
1456
|
+
for err in errors:
|
|
1457
|
+
print(f" - {err}")
|
|
1458
|
+
return 1
|
|
1459
|
+
print("\nvalidation: OK")
|
|
1460
|
+
|
|
1461
|
+
return 0
|
|
1462
|
+
|
|
1463
|
+
|
|
1464
|
+
# ---------------------------------------------------------------------------
|
|
1465
|
+
# M19 P0: Graph dogfood — Artifact + Event helpers
|
|
1466
|
+
# ---------------------------------------------------------------------------
|
|
1467
|
+
|
|
1468
|
+
def record_artifact(
|
|
1469
|
+
graph: dict,
|
|
1470
|
+
version: str,
|
|
1471
|
+
*,
|
|
1472
|
+
changelog: str = "",
|
|
1473
|
+
commit_sha: str = "",
|
|
1474
|
+
released_at: str = "",
|
|
1475
|
+
) -> dict:
|
|
1476
|
+
"""Record a release Artifact node in the graph.
|
|
1477
|
+
|
|
1478
|
+
Creates an Artifact node (artifact_vX_Y_Z) and links it to all
|
|
1479
|
+
existing Decision nodes via 'contains' edges. Idempotent — calling
|
|
1480
|
+
twice with the same version updates the existing node.
|
|
1481
|
+
|
|
1482
|
+
Returns the node dict after insertion.
|
|
1483
|
+
"""
|
|
1484
|
+
record_graph_usage(graph, "record_artifact", kind="updates")
|
|
1485
|
+
node_id = f"artifact_v{version.replace('.', '_')}"
|
|
1486
|
+
description = f"Release v{version}"
|
|
1487
|
+
if changelog:
|
|
1488
|
+
description += f"\n\n{changelog[:500]}"
|
|
1489
|
+
|
|
1490
|
+
extra: dict[str, Any] = {
|
|
1491
|
+
"version": version,
|
|
1492
|
+
"commit_sha": commit_sha,
|
|
1493
|
+
}
|
|
1494
|
+
if released_at:
|
|
1495
|
+
extra["released_at"] = released_at
|
|
1496
|
+
|
|
1497
|
+
node = add_node(
|
|
1498
|
+
graph, node_id, "Artifact", f"v{version}",
|
|
1499
|
+
status="active",
|
|
1500
|
+
description=description,
|
|
1501
|
+
source_type="KNOWS",
|
|
1502
|
+
extra=extra,
|
|
1503
|
+
)
|
|
1504
|
+
|
|
1505
|
+
# Link to all existing Decision nodes
|
|
1506
|
+
for dec_node in nodes_by_type(graph, "Decision"):
|
|
1507
|
+
add_edge(graph, node_id, dec_node["id"], "contains", weight=0.3)
|
|
1508
|
+
|
|
1509
|
+
return node
|
|
1510
|
+
|
|
1511
|
+
|
|
1512
|
+
def record_event(
|
|
1513
|
+
graph: dict,
|
|
1514
|
+
event_type: str,
|
|
1515
|
+
name: str,
|
|
1516
|
+
*,
|
|
1517
|
+
description: str = "",
|
|
1518
|
+
extra: Optional[dict[str, Any]] = None,
|
|
1519
|
+
) -> dict:
|
|
1520
|
+
"""Record a meta Event node in the graph.
|
|
1521
|
+
|
|
1522
|
+
Events capture session starts, meta-sessions, migrations, or any
|
|
1523
|
+
significant project lifecycle moment that isn't a Decision or
|
|
1524
|
+
Deliberation. Useful for timeline queries later.
|
|
1525
|
+
|
|
1526
|
+
Returns the node dict after insertion.
|
|
1527
|
+
"""
|
|
1528
|
+
record_graph_usage(graph, "record_event", kind="updates")
|
|
1529
|
+
slug = _slug(name)[:40]
|
|
1530
|
+
ts = _now_iso().replace(":", "-").replace("T", "_")[:19]
|
|
1531
|
+
node_id = f"event_{event_type}_{slug}_{ts}"
|
|
1532
|
+
|
|
1533
|
+
node = add_node(
|
|
1534
|
+
graph, node_id, "Event", name,
|
|
1535
|
+
status="active",
|
|
1536
|
+
description=description,
|
|
1537
|
+
source_type="KNOWS",
|
|
1538
|
+
extra={"event_type": event_type, **(extra or {})},
|
|
1539
|
+
)
|
|
1540
|
+
return node
|
|
1541
|
+
|
|
1542
|
+
|
|
1543
|
+
def record_deliberation_outcome(
|
|
1544
|
+
graph: dict,
|
|
1545
|
+
deliberation_id: str,
|
|
1546
|
+
verdict: str,
|
|
1547
|
+
goal: str,
|
|
1548
|
+
*,
|
|
1549
|
+
synthesis: str = "",
|
|
1550
|
+
resources: Optional[dict] = None,
|
|
1551
|
+
) -> dict:
|
|
1552
|
+
"""Record a Deliberation node + Outcome from a working-group deliberation.
|
|
1553
|
+
|
|
1554
|
+
Creates a Deliberation node (delib_{id}) and an Outcome node that
|
|
1555
|
+
evaluates the deliberation's verdict. Links them via 'evaluates'.
|
|
1556
|
+
|
|
1557
|
+
Returns the Outcome node dict.
|
|
1558
|
+
"""
|
|
1559
|
+
record_graph_usage(graph, "record_deliberation_outcome", kind="updates")
|
|
1560
|
+
# Deliberation node
|
|
1561
|
+
delib_id = f"delib_{deliberation_id}"
|
|
1562
|
+
add_node(
|
|
1563
|
+
graph, delib_id, "Deliberation", goal[:100],
|
|
1564
|
+
status="active" if verdict not in ("REJECTED",) else "closed",
|
|
1565
|
+
description=synthesis[:300] if synthesis else goal,
|
|
1566
|
+
source_type="KNOWS",
|
|
1567
|
+
extra={
|
|
1568
|
+
"deliberation_id": deliberation_id,
|
|
1569
|
+
"verdict": verdict,
|
|
1570
|
+
"resources": resources or {},
|
|
1571
|
+
},
|
|
1572
|
+
)
|
|
1573
|
+
|
|
1574
|
+
# Outcome node — maps verdict to outcome status
|
|
1575
|
+
outcome_status_map = {
|
|
1576
|
+
"APPROVED": "confirmed",
|
|
1577
|
+
"CONDITIONAL": "partially_applied",
|
|
1578
|
+
"NEEDS_WORK": "revised",
|
|
1579
|
+
"REJECTED": "reverted",
|
|
1580
|
+
}
|
|
1581
|
+
outcome_status = outcome_status_map.get(verdict, "revised")
|
|
1582
|
+
|
|
1583
|
+
outcome_id = f"outcome_{deliberation_id}"
|
|
1584
|
+
outcome = add_node(
|
|
1585
|
+
graph, outcome_id, "Outcome", f"Outcome: {goal[:80]}",
|
|
1586
|
+
status=outcome_status,
|
|
1587
|
+
description=f"Deliberation verdict: {verdict}\n\n{synthesis[:200]}",
|
|
1588
|
+
source_type="KNOWS",
|
|
1589
|
+
extra={
|
|
1590
|
+
"deliberation_id": deliberation_id,
|
|
1591
|
+
"verdict": verdict,
|
|
1592
|
+
"auto_generated": True,
|
|
1593
|
+
},
|
|
1594
|
+
)
|
|
1595
|
+
|
|
1596
|
+
# Link: Outcome evaluates Deliberation
|
|
1597
|
+
add_edge(graph, outcome_id, delib_id, "evaluates")
|
|
1598
|
+
|
|
1599
|
+
return outcome
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
def main(argv: Optional[list[str]] = None) -> int:
|
|
1603
|
+
raise SystemExit(main())
|
|
1604
|
+
|
|
1605
|
+
|
|
1606
|
+
# ---------------------------------------------------------------------------
|
|
1607
|
+
# #479: Architecture Constraints — first-class constraint nodes
|
|
1608
|
+
# ---------------------------------------------------------------------------
|
|
1609
|
+
|
|
1610
|
+
# Valid enforcement levels for Constraint nodes.
|
|
1611
|
+
CONSTRAINT_ENFORCEMENTS = frozenset({
|
|
1612
|
+
"hard", # must never be violated — build/lint blocks
|
|
1613
|
+
"soft", # should be followed — warnings only
|
|
1614
|
+
"guideline", # best-effort suggestion
|
|
1615
|
+
})
|
|
1616
|
+
|
|
1617
|
+
# Decision-key → constraint auto-derivation rules.
|
|
1618
|
+
# When a Decision node's name/description contains the key, the listed
|
|
1619
|
+
# constraint templates are auto-generated. Keys are matched
|
|
1620
|
+
# case-insensitively as substrings against the decision name.
|
|
1621
|
+
CONSTRAINT_DERIVATION_RULES: dict[str, list[dict[str, Any]]] = {
|
|
1622
|
+
"docker": [
|
|
1623
|
+
{
|
|
1624
|
+
"constraint_id": "deployment_mode_containers",
|
|
1625
|
+
"name": "Container-based deployment",
|
|
1626
|
+
"diff_rules": [
|
|
1627
|
+
"no-localhost-on-service-bound",
|
|
1628
|
+
],
|
|
1629
|
+
"implies": [
|
|
1630
|
+
"All services accessed by container name, not localhost",
|
|
1631
|
+
"Environment variables via compose args, not hardcoded",
|
|
1632
|
+
],
|
|
1633
|
+
"forbids": [
|
|
1634
|
+
"localhost in connection strings",
|
|
1635
|
+
"hardcoded file paths in app code",
|
|
1636
|
+
"host-style port mapping in app logic",
|
|
1637
|
+
],
|
|
1638
|
+
},
|
|
1639
|
+
],
|
|
1640
|
+
"serverless": [
|
|
1641
|
+
{
|
|
1642
|
+
"constraint_id": "deployment_mode_serverless",
|
|
1643
|
+
"name": "Serverless deployment",
|
|
1644
|
+
"implies": [
|
|
1645
|
+
"Stateless function handlers only",
|
|
1646
|
+
"Cold-start optimization required",
|
|
1647
|
+
],
|
|
1648
|
+
"forbids": [
|
|
1649
|
+
"Local filesystem for persistent state",
|
|
1650
|
+
"Long-running connections",
|
|
1651
|
+
"In-process caching across invocations",
|
|
1652
|
+
],
|
|
1653
|
+
},
|
|
1654
|
+
],
|
|
1655
|
+
"monorepo": [
|
|
1656
|
+
{
|
|
1657
|
+
"constraint_id": "repo_structure_monorepo",
|
|
1658
|
+
"name": "Monorepo structure",
|
|
1659
|
+
"implies": [
|
|
1660
|
+
"Shared dependency versions across packages",
|
|
1661
|
+
"Cross-package imports via workspace protocol",
|
|
1662
|
+
],
|
|
1663
|
+
"forbids": [
|
|
1664
|
+
"Duplicated dependencies across packages",
|
|
1665
|
+
"Relative imports crossing package boundaries",
|
|
1666
|
+
],
|
|
1667
|
+
},
|
|
1668
|
+
],
|
|
1669
|
+
"kubernetes": [
|
|
1670
|
+
{
|
|
1671
|
+
"constraint_id": "deployment_mode_k8s",
|
|
1672
|
+
"name": "Kubernetes deployment",
|
|
1673
|
+
"implies": [
|
|
1674
|
+
"Health check endpoints required for all services",
|
|
1675
|
+
"Configuration via ConfigMap and Secret resources",
|
|
1676
|
+
],
|
|
1677
|
+
"forbids": [
|
|
1678
|
+
"Hardcoded service addresses",
|
|
1679
|
+
"Writing to container filesystem",
|
|
1680
|
+
],
|
|
1681
|
+
},
|
|
1682
|
+
],
|
|
1683
|
+
"postgresql": [
|
|
1684
|
+
{
|
|
1685
|
+
"constraint_id": "database_relational_postgres",
|
|
1686
|
+
"name": "PostgreSQL as primary database",
|
|
1687
|
+
"diff_rules": [
|
|
1688
|
+
"connection-string-consistency",
|
|
1689
|
+
],
|
|
1690
|
+
"implies": [
|
|
1691
|
+
"SQL migrations managed by tooling (Alembic, Prisma, etc.)",
|
|
1692
|
+
"Connection pooling required for production",
|
|
1693
|
+
],
|
|
1694
|
+
"forbids": [
|
|
1695
|
+
"Raw DDL in application code",
|
|
1696
|
+
"Unparameterized SQL queries",
|
|
1697
|
+
],
|
|
1698
|
+
},
|
|
1699
|
+
],
|
|
1700
|
+
"redis": [
|
|
1701
|
+
{
|
|
1702
|
+
"constraint_id": "cache_redis",
|
|
1703
|
+
"name": "Redis for caching/queue",
|
|
1704
|
+
"implies": [
|
|
1705
|
+
"Cache invalidation strategy required",
|
|
1706
|
+
"TTL on all cache keys",
|
|
1707
|
+
],
|
|
1708
|
+
"forbids": [
|
|
1709
|
+
"Using Redis as primary data store",
|
|
1710
|
+
"Unbounded key growth without eviction policy",
|
|
1711
|
+
],
|
|
1712
|
+
},
|
|
1713
|
+
],
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
|
|
1717
|
+
def add_constraint(
|
|
1718
|
+
graph: dict,
|
|
1719
|
+
constraint_id: str,
|
|
1720
|
+
name: str,
|
|
1721
|
+
*,
|
|
1722
|
+
enforcement: str = "hard",
|
|
1723
|
+
implies: Optional[list[str]] = None,
|
|
1724
|
+
forbids: Optional[list[str]] = None,
|
|
1725
|
+
declared_by: Optional[str] = None,
|
|
1726
|
+
constrains: Optional[list[str]] = None,
|
|
1727
|
+
description: str = "",
|
|
1728
|
+
source_type: str = DEFAULT_SOURCE,
|
|
1729
|
+
) -> dict:
|
|
1730
|
+
"""Add an Architecture Constraint node to the graph.
|
|
1731
|
+
|
|
1732
|
+
Constraints encode hard rules that follow from architectural decisions.
|
|
1733
|
+
Unlike context (soft suggestion), constraints are injected into agent
|
|
1734
|
+
prompts as checklists that must be satisfied.
|
|
1735
|
+
|
|
1736
|
+
Args:
|
|
1737
|
+
graph: the graph dict (will be mutated)
|
|
1738
|
+
constraint_id: short slug for the constraint (e.g. "deployment_mode_containers")
|
|
1739
|
+
name: human-readable name
|
|
1740
|
+
enforcement: "hard" (blocks), "soft" (warns), or "guideline" (suggests)
|
|
1741
|
+
implies: list of rules this constraint requires
|
|
1742
|
+
forbids: list of anti-patterns this constraint prohibits
|
|
1743
|
+
declared_by: node id of the Decision that produced this constraint
|
|
1744
|
+
constrains: list of Component/Technology node ids this constraint scopes to
|
|
1745
|
+
description: free-form description
|
|
1746
|
+
source_type: provenance marker
|
|
1747
|
+
|
|
1748
|
+
Returns:
|
|
1749
|
+
The Constraint node dict.
|
|
1750
|
+
|
|
1751
|
+
Raises:
|
|
1752
|
+
ValueError: if enforcement is not in CONSTRAINT_ENFORCEMENTS, or
|
|
1753
|
+
declared_by/constrains reference non-existent nodes.
|
|
1754
|
+
"""
|
|
1755
|
+
record_graph_usage(graph, "add_constraint", kind="updates")
|
|
1756
|
+
if enforcement not in CONSTRAINT_ENFORCEMENTS:
|
|
1757
|
+
raise ValueError(
|
|
1758
|
+
f"invalid enforcement {enforcement!r}; "
|
|
1759
|
+
f"must be one of {sorted(CONSTRAINT_ENFORCEMENTS)}"
|
|
1760
|
+
)
|
|
1761
|
+
|
|
1762
|
+
node_id = make_node_id("Constraint", constraint_id)
|
|
1763
|
+
|
|
1764
|
+
node = add_node(
|
|
1765
|
+
graph, node_id, "Constraint", name,
|
|
1766
|
+
status="active",
|
|
1767
|
+
description=description,
|
|
1768
|
+
source_type=source_type,
|
|
1769
|
+
extra={
|
|
1770
|
+
"constraint_id": constraint_id,
|
|
1771
|
+
"enforcement": enforcement,
|
|
1772
|
+
"implies": list(implies or []),
|
|
1773
|
+
"forbids": list(forbids or []),
|
|
1774
|
+
},
|
|
1775
|
+
)
|
|
1776
|
+
|
|
1777
|
+
# Edge: Constraint -> Decision (declared_by)
|
|
1778
|
+
if declared_by:
|
|
1779
|
+
if declared_by not in graph["nodes"]:
|
|
1780
|
+
raise ValueError(f"declared_by node {declared_by!r} not in graph")
|
|
1781
|
+
target = graph["nodes"][declared_by]
|
|
1782
|
+
if target.get("type") != "Decision":
|
|
1783
|
+
raise ValueError(
|
|
1784
|
+
f"declared_by node {declared_by!r} is type "
|
|
1785
|
+
f"{target.get('type')!r}, expected Decision"
|
|
1786
|
+
)
|
|
1787
|
+
if not find_edge(graph, node_id, declared_by, "declared_by"):
|
|
1788
|
+
add_edge(graph, node_id, declared_by, "declared_by")
|
|
1789
|
+
|
|
1790
|
+
# Edges: Constraint -> Component|Technology (constrains)
|
|
1791
|
+
for target_id in (constrains or []):
|
|
1792
|
+
if target_id not in graph["nodes"]:
|
|
1793
|
+
raise ValueError(f"constrains target {target_id!r} not in graph")
|
|
1794
|
+
if not find_edge(graph, node_id, target_id, "constrains"):
|
|
1795
|
+
add_edge(graph, node_id, target_id, "constrains")
|
|
1796
|
+
|
|
1797
|
+
return node
|
|
1798
|
+
|
|
1799
|
+
|
|
1800
|
+
def auto_derive_constraints(graph: dict) -> list[dict]:
|
|
1801
|
+
"""Auto-derive Constraint nodes from existing Decision nodes.
|
|
1802
|
+
|
|
1803
|
+
Scans Decision node names (case-insensitive) for known keywords
|
|
1804
|
+
(docker, serverless, monorepo, etc.) and creates Constraint nodes
|
|
1805
|
+
using the templates in CONSTRAINT_DERIVATION_RULES.
|
|
1806
|
+
|
|
1807
|
+
Idempotent — re-running does not create duplicate constraints. If a
|
|
1808
|
+
constraint node already exists, it is updated with the current
|
|
1809
|
+
implies/forbids lists.
|
|
1810
|
+
|
|
1811
|
+
Returns:
|
|
1812
|
+
List of newly created or updated Constraint node dicts.
|
|
1813
|
+
"""
|
|
1814
|
+
record_graph_usage(graph, "auto_derive_constraints", kind="updates")
|
|
1815
|
+
results: list[dict] = []
|
|
1816
|
+
|
|
1817
|
+
for decision in nodes_by_type(graph, "Decision"):
|
|
1818
|
+
dec_name = decision.get("name", "").lower()
|
|
1819
|
+
dec_desc = decision.get("description", "").lower()
|
|
1820
|
+
dec_text = f"{dec_name} {dec_desc}"
|
|
1821
|
+
|
|
1822
|
+
for keyword, templates in CONSTRAINT_DERIVATION_RULES.items():
|
|
1823
|
+
if keyword.lower() not in dec_text:
|
|
1824
|
+
continue
|
|
1825
|
+
|
|
1826
|
+
for tmpl in templates:
|
|
1827
|
+
constraint = add_constraint(
|
|
1828
|
+
graph,
|
|
1829
|
+
tmpl["constraint_id"],
|
|
1830
|
+
tmpl["name"],
|
|
1831
|
+
enforcement="hard",
|
|
1832
|
+
implies=tmpl.get("implies"),
|
|
1833
|
+
forbids=tmpl.get("forbids"),
|
|
1834
|
+
declared_by=decision["id"],
|
|
1835
|
+
source_type="operator",
|
|
1836
|
+
)
|
|
1837
|
+
if tmpl.get("diff_rules"):
|
|
1838
|
+
constraint["diff_rules"] = list(tmpl["diff_rules"])
|
|
1839
|
+
results.append(constraint)
|
|
1840
|
+
|
|
1841
|
+
return results
|
|
1842
|
+
|
|
1843
|
+
|
|
1844
|
+
def load_constraints_yaml(
|
|
1845
|
+
graph: dict,
|
|
1846
|
+
yaml_path: pathlib.Path,
|
|
1847
|
+
) -> list[dict]:
|
|
1848
|
+
"""Load manual constraints from an ai/constraints.yaml file.
|
|
1849
|
+
|
|
1850
|
+
The YAML file should contain a top-level `constraints` key with a
|
|
1851
|
+
list of constraint entries. Each entry has:
|
|
1852
|
+
- constraint_id (required)
|
|
1853
|
+
- name (required)
|
|
1854
|
+
- enforcement (optional, default "hard")
|
|
1855
|
+
- implies (optional list of strings)
|
|
1856
|
+
- forbids (optional list of strings)
|
|
1857
|
+
- declared_by (optional Decision node id)
|
|
1858
|
+
- constrains (optional list of Component/Technology node ids)
|
|
1859
|
+
- description (optional)
|
|
1860
|
+
|
|
1861
|
+
No PyYAML dependency — uses a minimal line-based parser that handles
|
|
1862
|
+
the common subset of YAML we need (string scalars, lists of strings).
|
|
1863
|
+
|
|
1864
|
+
Returns:
|
|
1865
|
+
List of created/updated Constraint node dicts.
|
|
1866
|
+
|
|
1867
|
+
Raises:
|
|
1868
|
+
FileNotFoundError: if yaml_path does not exist
|
|
1869
|
+
"""
|
|
1870
|
+
record_graph_usage(graph, "load_constraints_yaml", kind="updates")
|
|
1871
|
+
if not yaml_path.exists():
|
|
1872
|
+
raise FileNotFoundError(f"constraints file not found: {yaml_path}")
|
|
1873
|
+
|
|
1874
|
+
raw = yaml_path.read_text(encoding="utf-8")
|
|
1875
|
+
entries = _parse_constraints_yaml(raw)
|
|
1876
|
+
|
|
1877
|
+
results: list[dict] = []
|
|
1878
|
+
for entry in entries:
|
|
1879
|
+
constraint = add_constraint(
|
|
1880
|
+
graph,
|
|
1881
|
+
entry["constraint_id"],
|
|
1882
|
+
entry["name"],
|
|
1883
|
+
enforcement=entry.get("enforcement", "hard"),
|
|
1884
|
+
implies=entry.get("implies"),
|
|
1885
|
+
forbids=entry.get("forbids"),
|
|
1886
|
+
declared_by=entry.get("declared_by"),
|
|
1887
|
+
constrains=entry.get("constrains"),
|
|
1888
|
+
description=entry.get("description", ""),
|
|
1889
|
+
source_type="operator",
|
|
1890
|
+
)
|
|
1891
|
+
results.append(constraint)
|
|
1892
|
+
|
|
1893
|
+
return results
|
|
1894
|
+
|
|
1895
|
+
|
|
1896
|
+
def get_architecture_constraints(graph: dict) -> list[dict]:
|
|
1897
|
+
"""Return all Constraint nodes in the graph, sorted by enforcement level.
|
|
1898
|
+
|
|
1899
|
+
Hard constraints first, then soft, then guidelines. Within each
|
|
1900
|
+
enforcement level, sorted by constraint_id for stability.
|
|
1901
|
+
"""
|
|
1902
|
+
record_graph_usage(graph, "get_architecture_constraints", kind="queries")
|
|
1903
|
+
constraints = nodes_by_type(graph, "Constraint")
|
|
1904
|
+
|
|
1905
|
+
enforcement_order = {"hard": 0, "soft": 1, "guideline": 2}
|
|
1906
|
+
return sorted(
|
|
1907
|
+
constraints,
|
|
1908
|
+
key=lambda c: (
|
|
1909
|
+
enforcement_order.get(c.get("enforcement", "hard"), 99),
|
|
1910
|
+
c.get("constraint_id", ""),
|
|
1911
|
+
),
|
|
1912
|
+
)
|
|
1913
|
+
|
|
1914
|
+
|
|
1915
|
+
def _constraint_scope_patterns(graph: dict, constraint_id: str) -> list[str]:
|
|
1916
|
+
patterns: list[str] = []
|
|
1917
|
+
for edge in outgoing_edges(graph, constraint_id):
|
|
1918
|
+
if edge.get("type") != "constrains":
|
|
1919
|
+
continue
|
|
1920
|
+
raw = edge.get("path_patterns")
|
|
1921
|
+
if isinstance(raw, str):
|
|
1922
|
+
patterns.append(raw)
|
|
1923
|
+
elif isinstance(raw, list):
|
|
1924
|
+
patterns.extend(str(item) for item in raw if item)
|
|
1925
|
+
return patterns
|
|
1926
|
+
|
|
1927
|
+
|
|
1928
|
+
def get_active_constraints(
|
|
1929
|
+
target: pathlib.Path,
|
|
1930
|
+
path_patterns: Optional[list[str]] = None,
|
|
1931
|
+
) -> list[dict]:
|
|
1932
|
+
"""Return active constraints for a repo target filtered by path patterns.
|
|
1933
|
+
|
|
1934
|
+
Constraints without any `constrains` edges are treated as globally active.
|
|
1935
|
+
Constraints with `constrains` edges become active when one of the edge
|
|
1936
|
+
`path_patterns` values overlaps the requested path patterns.
|
|
1937
|
+
"""
|
|
1938
|
+
graph_path = pathlib.Path(target) / "ai" / "manifest" / "project_graph.json"
|
|
1939
|
+
graph = load_graph(graph_path)
|
|
1940
|
+
auto_derive_constraints(graph)
|
|
1941
|
+
record_graph_usage(graph, "get_active_constraints", kind="queries")
|
|
1942
|
+
|
|
1943
|
+
requested = list(path_patterns or ["*"])
|
|
1944
|
+
constraints = get_architecture_constraints(graph)
|
|
1945
|
+
if not constraints:
|
|
1946
|
+
return []
|
|
1947
|
+
|
|
1948
|
+
active: list[dict] = []
|
|
1949
|
+
for constraint in constraints:
|
|
1950
|
+
scope_patterns = _constraint_scope_patterns(graph, str(constraint.get("id") or ""))
|
|
1951
|
+
if _path_patterns_overlap(requested, scope_patterns):
|
|
1952
|
+
active.append(constraint)
|
|
1953
|
+
return active
|
|
1954
|
+
|
|
1955
|
+
|
|
1956
|
+
def format_constraints_checklist(
|
|
1957
|
+
graph: dict,
|
|
1958
|
+
*,
|
|
1959
|
+
constraints: Optional[list[dict]] = None,
|
|
1960
|
+
) -> str:
|
|
1961
|
+
"""Render all architecture constraints as a checklist for prompt injection.
|
|
1962
|
+
|
|
1963
|
+
Constraints are rendered as a checklist (not context) so agents see
|
|
1964
|
+
them as hard rules that must be satisfied, not soft suggestions.
|
|
1965
|
+
|
|
1966
|
+
Format:
|
|
1967
|
+
ARCHITECTURE CONSTRAINTS (must be satisfied):
|
|
1968
|
+
☐ No localhost in connection strings (deployment_mode=containers) [hard]
|
|
1969
|
+
☐ All env vars follow FOO_BAR pattern (env_naming_convention) [hard]
|
|
1970
|
+
☐ Prefer managed queues for small teams (queue_guideline) [guideline]
|
|
1971
|
+
[before returning code]: self-check against this list
|
|
1972
|
+
|
|
1973
|
+
Returns empty string if no constraints exist.
|
|
1974
|
+
"""
|
|
1975
|
+
record_graph_usage(graph, "format_constraints_checklist", kind="queries")
|
|
1976
|
+
constraints = list(constraints) if constraints is not None else get_architecture_constraints(graph)
|
|
1977
|
+
constraints = [c for c in constraints if c.get("enforcement", "hard") in {"hard", "soft"}]
|
|
1978
|
+
if not constraints:
|
|
1979
|
+
return ""
|
|
1980
|
+
|
|
1981
|
+
lines: list[str] = ["ARCHITECTURE CONSTRAINTS (must be satisfied):"]
|
|
1982
|
+
|
|
1983
|
+
for c in constraints:
|
|
1984
|
+
enforcement = c.get("enforcement", "hard")
|
|
1985
|
+
cid = c.get("constraint_id", c["id"])
|
|
1986
|
+
bracket = f"[{enforcement}]" if enforcement != "hard" else ""
|
|
1987
|
+
|
|
1988
|
+
for rule in c.get("forbids", []):
|
|
1989
|
+
parts = f"No {rule}"
|
|
1990
|
+
lines.append(f"☐ {parts} ({cid}) {bracket}".rstrip())
|
|
1991
|
+
|
|
1992
|
+
for rule in c.get("implies", []):
|
|
1993
|
+
lines.append(f"☐ {rule} ({cid}) {bracket}".rstrip())
|
|
1994
|
+
|
|
1995
|
+
lines.append("[before returning code]: self-check against this list")
|
|
1996
|
+
return "\n".join(lines)
|
|
1997
|
+
|
|
1998
|
+
|
|
1999
|
+
def check_constraint_violations(
|
|
2000
|
+
graph: dict,
|
|
2001
|
+
code_artifacts: dict[str, str],
|
|
2002
|
+
) -> list[dict[str, Any]]:
|
|
2003
|
+
"""Check code artifacts against constraint rules and return violations.
|
|
2004
|
+
|
|
2005
|
+
Performs pattern-based matching of constraint `forbids` rules against
|
|
2006
|
+
the provided code artifacts. Each forbids entry is matched as a
|
|
2007
|
+
case-insensitive substring search across all artifact values.
|
|
2008
|
+
|
|
2009
|
+
Args:
|
|
2010
|
+
graph: the graph dict
|
|
2011
|
+
code_artifacts: dict of {filename: content} to check
|
|
2012
|
+
|
|
2013
|
+
Returns:
|
|
2014
|
+
List of violation dicts, each with:
|
|
2015
|
+
- constraint_id: the constraint that was violated
|
|
2016
|
+
- rule: the specific forbids rule that matched
|
|
2017
|
+
- file: the file containing the violation
|
|
2018
|
+
- enforcement: the constraint's enforcement level
|
|
2019
|
+
|
|
2020
|
+
Example:
|
|
2021
|
+
violations = check_constraint_violations(g, {
|
|
2022
|
+
"db.py": "DB_HOST = 'localhost:5432'",
|
|
2023
|
+
"app.py": "redis://my-redis:6379",
|
|
2024
|
+
})
|
|
2025
|
+
# Returns violation for "localhost in connection strings" in db.py
|
|
2026
|
+
"""
|
|
2027
|
+
record_graph_usage(graph, "check_constraint_violations", kind="queries")
|
|
2028
|
+
violations: list[dict[str, Any]] = []
|
|
2029
|
+
|
|
2030
|
+
for constraint in get_architecture_constraints(graph):
|
|
2031
|
+
cid = constraint.get("constraint_id", constraint["id"])
|
|
2032
|
+
enforcement = constraint.get("enforcement", "hard")
|
|
2033
|
+
|
|
2034
|
+
for rule in constraint.get("forbids", []):
|
|
2035
|
+
# Extract key terms from the rule for pattern matching.
|
|
2036
|
+
# e.g. "localhost in connection strings" → "localhost"
|
|
2037
|
+
# e.g. "hardcoded file paths" → "hardcoded"
|
|
2038
|
+
pattern = _extract_violation_pattern(rule)
|
|
2039
|
+
if not pattern:
|
|
2040
|
+
continue
|
|
2041
|
+
|
|
2042
|
+
for filename, content in code_artifacts.items():
|
|
2043
|
+
content_lower = content.lower()
|
|
2044
|
+
if pattern.lower() in content_lower:
|
|
2045
|
+
violations.append({
|
|
2046
|
+
"constraint_id": cid,
|
|
2047
|
+
"rule": rule,
|
|
2048
|
+
"file": filename,
|
|
2049
|
+
"enforcement": enforcement,
|
|
2050
|
+
})
|
|
2051
|
+
|
|
2052
|
+
return violations
|