codd-dev 0.2.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codd/__init__.py +3 -0
- codd/cli.py +344 -0
- codd/config.py +62 -0
- codd/defaults.yaml +30 -0
- codd/generator.py +644 -0
- codd/graph.py +288 -0
- codd/hooks.py +104 -0
- codd/implementer.py +846 -0
- codd/planner.py +576 -0
- codd/propagate.py +308 -0
- codd/scanner.py +445 -0
- codd/validator.py +499 -0
- codd/verifier.py +426 -0
- codd_dev-0.2.0a1.dist-info/METADATA +241 -0
- codd_dev-0.2.0a1.dist-info/RECORD +18 -0
- codd_dev-0.2.0a1.dist-info/WHEEL +4 -0
- codd_dev-0.2.0a1.dist-info/entry_points.txt +2 -0
- codd_dev-0.2.0a1.dist-info/licenses/LICENSE +21 -0
codd/graph.py
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""CEG (Conditioned Evidence Graph) — JSONL file-backed dependency graph.
|
|
2
|
+
|
|
3
|
+
Design: All data lives in JSONL files (one record per line).
|
|
4
|
+
Files are loaded into memory on init, flushed to disk on close().
|
|
5
|
+
Git-friendly: every change is a line-level diff.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CEG:
|
|
14
|
+
"""Conditioned Evidence Graph — JSONL-backed dependency graph.
|
|
15
|
+
|
|
16
|
+
Storage:
|
|
17
|
+
{scan_dir}/nodes.jsonl — one JSON object per line
|
|
18
|
+
{scan_dir}/edges.jsonl — one JSON object per line
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, scan_dir: Path):
|
|
22
|
+
self.scan_dir = Path(scan_dir)
|
|
23
|
+
self.scan_dir.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
|
|
25
|
+
self.nodes_path = self.scan_dir / "nodes.jsonl"
|
|
26
|
+
self.edges_path = self.scan_dir / "edges.jsonl"
|
|
27
|
+
|
|
28
|
+
# In-memory stores
|
|
29
|
+
self.nodes: dict[str, dict] = {} # keyed by node_id
|
|
30
|
+
self.edges: list[dict] = [] # list of edge dicts
|
|
31
|
+
self._next_edge_id = 1
|
|
32
|
+
self._dirty = False
|
|
33
|
+
|
|
34
|
+
# Load existing data
|
|
35
|
+
self._load()
|
|
36
|
+
|
|
37
|
+
def _load(self):
|
|
38
|
+
"""Load JSONL files into memory."""
|
|
39
|
+
if self.nodes_path.exists():
|
|
40
|
+
for line in self.nodes_path.read_text().splitlines():
|
|
41
|
+
line = line.strip()
|
|
42
|
+
if line:
|
|
43
|
+
node = json.loads(line)
|
|
44
|
+
self.nodes[node["id"]] = node
|
|
45
|
+
|
|
46
|
+
if self.edges_path.exists():
|
|
47
|
+
for line in self.edges_path.read_text().splitlines():
|
|
48
|
+
line = line.strip()
|
|
49
|
+
if line:
|
|
50
|
+
edge = json.loads(line)
|
|
51
|
+
self.edges.append(edge)
|
|
52
|
+
if edge.get("id", 0) >= self._next_edge_id:
|
|
53
|
+
self._next_edge_id = edge["id"] + 1
|
|
54
|
+
|
|
55
|
+
def close(self):
|
|
56
|
+
"""Flush to disk."""
|
|
57
|
+
if self._dirty:
|
|
58
|
+
self._flush()
|
|
59
|
+
|
|
60
|
+
def _flush(self):
|
|
61
|
+
"""Write all data back to JSONL files."""
|
|
62
|
+
# Sort nodes by id for stable output
|
|
63
|
+
sorted_nodes = sorted(self.nodes.values(), key=lambda n: n["id"])
|
|
64
|
+
with open(self.nodes_path, "w") as f:
|
|
65
|
+
for node in sorted_nodes:
|
|
66
|
+
f.write(json.dumps(node, ensure_ascii=False) + "\n")
|
|
67
|
+
|
|
68
|
+
# Sort edges by id for stable output
|
|
69
|
+
sorted_edges = sorted(self.edges, key=lambda e: e.get("id", 0))
|
|
70
|
+
with open(self.edges_path, "w") as f:
|
|
71
|
+
for edge in sorted_edges:
|
|
72
|
+
f.write(json.dumps(edge, ensure_ascii=False) + "\n")
|
|
73
|
+
|
|
74
|
+
self._dirty = False
|
|
75
|
+
|
|
76
|
+
# ── Node operations ──
|
|
77
|
+
|
|
78
|
+
def upsert_node(self, node_id: str, node_type: str, path: str = None,
|
|
79
|
+
name: str = None, module: str = None):
|
|
80
|
+
node = self.nodes.get(node_id, {"id": node_id})
|
|
81
|
+
node["type"] = node_type
|
|
82
|
+
if path is not None:
|
|
83
|
+
node["path"] = path
|
|
84
|
+
if name is not None:
|
|
85
|
+
node["name"] = name
|
|
86
|
+
if module is not None:
|
|
87
|
+
node["module"] = module
|
|
88
|
+
self.nodes[node_id] = node
|
|
89
|
+
self._dirty = True
|
|
90
|
+
|
|
91
|
+
def get_node(self, node_id: str) -> Optional[dict]:
|
|
92
|
+
return self.nodes.get(node_id)
|
|
93
|
+
|
|
94
|
+
def count_nodes(self) -> int:
|
|
95
|
+
return len(self.nodes)
|
|
96
|
+
|
|
97
|
+
def find_nodes_by_path(self, path: str) -> list:
|
|
98
|
+
return [n for n in self.nodes.values() if n.get("path") == path]
|
|
99
|
+
|
|
100
|
+
def get_convention_edges(self, node_id: str) -> list:
|
|
101
|
+
results = []
|
|
102
|
+
for e in self.edges:
|
|
103
|
+
if e["source_id"] == node_id and e["relation"] == "must_review" and e.get("is_active", True):
|
|
104
|
+
target = self.nodes.get(e["target_id"], {})
|
|
105
|
+
result = {**e, "target_name": target.get("name"), "target_type": target.get("type")}
|
|
106
|
+
results.append(result)
|
|
107
|
+
results.sort(key=lambda x: x.get("confidence", 0), reverse=True)
|
|
108
|
+
return results
|
|
109
|
+
|
|
110
|
+
# ── Edge operations ──
|
|
111
|
+
|
|
112
|
+
def add_edge(self, source_id: str, target_id: str, relation: str,
|
|
113
|
+
semantic: str, confidence: float = 0.5,
|
|
114
|
+
condition: str = None) -> int:
|
|
115
|
+
edge_id = self._next_edge_id
|
|
116
|
+
self._next_edge_id += 1
|
|
117
|
+
edge = {
|
|
118
|
+
"id": edge_id,
|
|
119
|
+
"source_id": source_id,
|
|
120
|
+
"target_id": target_id,
|
|
121
|
+
"relation": relation,
|
|
122
|
+
"semantic": semantic,
|
|
123
|
+
"confidence": confidence,
|
|
124
|
+
"is_active": True,
|
|
125
|
+
"evidence": [],
|
|
126
|
+
}
|
|
127
|
+
if condition:
|
|
128
|
+
edge["condition"] = condition
|
|
129
|
+
self.edges.append(edge)
|
|
130
|
+
self._dirty = True
|
|
131
|
+
return edge_id
|
|
132
|
+
|
|
133
|
+
def get_outgoing_edges(self, node_id: str, min_confidence: float = 0.0) -> list:
|
|
134
|
+
results = []
|
|
135
|
+
for e in self.edges:
|
|
136
|
+
if (e["source_id"] == node_id and e.get("is_active", True)
|
|
137
|
+
and e.get("confidence", 0) >= min_confidence):
|
|
138
|
+
target = self.nodes.get(e["target_id"], {})
|
|
139
|
+
result = {**e, "target_name": target.get("name"), "target_type": target.get("type")}
|
|
140
|
+
results.append(result)
|
|
141
|
+
results.sort(key=lambda x: x.get("confidence", 0), reverse=True)
|
|
142
|
+
return results
|
|
143
|
+
|
|
144
|
+
def get_incoming_edges(self, node_id: str, min_confidence: float = 0.0) -> list:
|
|
145
|
+
results = []
|
|
146
|
+
for e in self.edges:
|
|
147
|
+
if (e["target_id"] == node_id and e.get("is_active", True)
|
|
148
|
+
and e.get("confidence", 0) >= min_confidence):
|
|
149
|
+
source = self.nodes.get(e["source_id"], {})
|
|
150
|
+
result = {**e, "source_name": source.get("name"), "source_type": source.get("type")}
|
|
151
|
+
results.append(result)
|
|
152
|
+
results.sort(key=lambda x: x.get("confidence", 0), reverse=True)
|
|
153
|
+
return results
|
|
154
|
+
|
|
155
|
+
def count_edges(self) -> int:
|
|
156
|
+
return sum(1 for e in self.edges if e.get("is_active", True))
|
|
157
|
+
|
|
158
|
+
# ── Evidence operations ──
|
|
159
|
+
|
|
160
|
+
def add_evidence(self, edge_id: int, source_type: str, method: str,
|
|
161
|
+
score: float, detail: str = None, is_negative: bool = False) -> int:
|
|
162
|
+
for edge in self.edges:
|
|
163
|
+
if edge["id"] == edge_id:
|
|
164
|
+
ev = {"source_type": source_type, "method": method, "score": score}
|
|
165
|
+
if detail:
|
|
166
|
+
ev["detail"] = detail
|
|
167
|
+
if is_negative:
|
|
168
|
+
ev["is_negative"] = True
|
|
169
|
+
edge.setdefault("evidence", []).append(ev)
|
|
170
|
+
# Recalculate confidence via Noisy-OR
|
|
171
|
+
edge["confidence"] = self._noisy_or(edge["evidence"])
|
|
172
|
+
self._dirty = True
|
|
173
|
+
return len(edge["evidence"])
|
|
174
|
+
return 0
|
|
175
|
+
|
|
176
|
+
@staticmethod
|
|
177
|
+
def _noisy_or(evidence: list) -> float:
|
|
178
|
+
"""Noisy-OR: P(at least one fires) = 1 - product(1 - p_i)."""
|
|
179
|
+
positive_product = 1.0
|
|
180
|
+
negative_product = 1.0
|
|
181
|
+
for ev in evidence:
|
|
182
|
+
if ev.get("is_negative"):
|
|
183
|
+
negative_product *= (1.0 - ev["score"])
|
|
184
|
+
else:
|
|
185
|
+
positive_product *= (1.0 - ev["score"])
|
|
186
|
+
return round(max(0.0, (1.0 - positive_product) - (1.0 - negative_product)), 4)
|
|
187
|
+
|
|
188
|
+
# ── Propagation ──
|
|
189
|
+
|
|
190
|
+
def propagate_impact(self, start_node_id: str, max_depth: int = 10,
|
|
191
|
+
min_confidence: float = 0.0) -> dict:
|
|
192
|
+
"""BFS propagation from a changed node."""
|
|
193
|
+
visited = {}
|
|
194
|
+
queue = [(start_node_id, 0, [start_node_id])]
|
|
195
|
+
|
|
196
|
+
while queue:
|
|
197
|
+
current, depth, path = queue.pop(0)
|
|
198
|
+
if depth > max_depth:
|
|
199
|
+
continue
|
|
200
|
+
if current in visited:
|
|
201
|
+
continue
|
|
202
|
+
visited[current] = {"depth": depth, "path": path}
|
|
203
|
+
|
|
204
|
+
for edge in self.get_outgoing_edges(current, min_confidence):
|
|
205
|
+
target = edge["target_id"]
|
|
206
|
+
if target not in visited:
|
|
207
|
+
queue.append((target, depth + 1, path + [target]))
|
|
208
|
+
|
|
209
|
+
if start_node_id in visited:
|
|
210
|
+
del visited[start_node_id]
|
|
211
|
+
return visited
|
|
212
|
+
|
|
213
|
+
# ── Band classification ──
|
|
214
|
+
|
|
215
|
+
def classify_band(self, confidence: float, evidence_count: int,
|
|
216
|
+
green_threshold: float = 0.90,
|
|
217
|
+
green_min_evidence: int = 2,
|
|
218
|
+
amber_threshold: float = 0.50) -> str:
|
|
219
|
+
if confidence >= green_threshold and evidence_count >= green_min_evidence:
|
|
220
|
+
return "green"
|
|
221
|
+
elif confidence >= amber_threshold:
|
|
222
|
+
return "amber"
|
|
223
|
+
else:
|
|
224
|
+
return "gray"
|
|
225
|
+
|
|
226
|
+
# ── Selective refresh ──
|
|
227
|
+
|
|
228
|
+
AUTO_SOURCE_TYPES = ("static", "framework", "frontmatter", "inferred")
|
|
229
|
+
HUMAN_SOURCE_TYPES = ("human", "dynamic", "history")
|
|
230
|
+
|
|
231
|
+
def purge_auto_generated(self) -> dict:
|
|
232
|
+
"""Delete auto-generated evidence/edges/nodes, preserve human knowledge."""
|
|
233
|
+
deleted_evidence = 0
|
|
234
|
+
deleted_edges = 0
|
|
235
|
+
|
|
236
|
+
# Remove auto evidence from edges
|
|
237
|
+
surviving_edges = []
|
|
238
|
+
for edge in self.edges:
|
|
239
|
+
original_count = len(edge.get("evidence", []))
|
|
240
|
+
edge["evidence"] = [
|
|
241
|
+
ev for ev in edge.get("evidence", [])
|
|
242
|
+
if ev.get("source_type") not in self.AUTO_SOURCE_TYPES
|
|
243
|
+
]
|
|
244
|
+
deleted_evidence += original_count - len(edge["evidence"])
|
|
245
|
+
|
|
246
|
+
if edge["evidence"]:
|
|
247
|
+
edge["confidence"] = self._noisy_or(edge["evidence"])
|
|
248
|
+
surviving_edges.append(edge)
|
|
249
|
+
else:
|
|
250
|
+
deleted_edges += 1
|
|
251
|
+
|
|
252
|
+
self.edges = surviving_edges
|
|
253
|
+
|
|
254
|
+
# Remove orphan nodes
|
|
255
|
+
referenced = set()
|
|
256
|
+
for edge in self.edges:
|
|
257
|
+
referenced.add(edge["source_id"])
|
|
258
|
+
referenced.add(edge["target_id"])
|
|
259
|
+
|
|
260
|
+
orphans = [nid for nid in self.nodes if nid not in referenced]
|
|
261
|
+
for nid in orphans:
|
|
262
|
+
del self.nodes[nid]
|
|
263
|
+
|
|
264
|
+
self._dirty = True
|
|
265
|
+
return {
|
|
266
|
+
"evidence": deleted_evidence,
|
|
267
|
+
"edges": deleted_edges,
|
|
268
|
+
"nodes": len(orphans),
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
def count_human_evidence(self) -> int:
|
|
272
|
+
count = 0
|
|
273
|
+
for edge in self.edges:
|
|
274
|
+
for ev in edge.get("evidence", []):
|
|
275
|
+
if ev.get("source_type") in self.HUMAN_SOURCE_TYPES:
|
|
276
|
+
count += 1
|
|
277
|
+
return count
|
|
278
|
+
|
|
279
|
+
# ── Stats ──
|
|
280
|
+
|
|
281
|
+
def stats(self) -> dict:
|
|
282
|
+
total_evidence = sum(len(e.get("evidence", [])) for e in self.edges)
|
|
283
|
+
return {
|
|
284
|
+
"nodes": self.count_nodes(),
|
|
285
|
+
"edges": self.count_edges(),
|
|
286
|
+
"evidence": total_evidence,
|
|
287
|
+
"human_evidence": self.count_human_evidence(),
|
|
288
|
+
}
|
codd/hooks.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Git hook helpers for CoDD pre-commit enforcement."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
from pathlib import Path, PurePosixPath
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
from codd.scanner import _extract_frontmatter
|
|
11
|
+
from codd.validator import run_validate
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
HOOK_SOURCE = Path(__file__).parent.parent / "hooks" / "pre-commit"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def install_pre_commit_hook(project_root: Path) -> tuple[Path, bool]:
|
|
18
|
+
"""Install the packaged pre-commit hook into a Git repository."""
|
|
19
|
+
config_path = project_root / "codd" / "codd.yaml"
|
|
20
|
+
if not config_path.exists():
|
|
21
|
+
raise FileNotFoundError(f"{config_path} not found")
|
|
22
|
+
|
|
23
|
+
git_dir = project_root / ".git"
|
|
24
|
+
if not git_dir.exists():
|
|
25
|
+
raise FileNotFoundError(f"{git_dir} not found")
|
|
26
|
+
|
|
27
|
+
if not HOOK_SOURCE.exists():
|
|
28
|
+
raise FileNotFoundError(f"{HOOK_SOURCE} not found")
|
|
29
|
+
|
|
30
|
+
destination = git_dir / "hooks" / "pre-commit"
|
|
31
|
+
source = HOOK_SOURCE.resolve()
|
|
32
|
+
source.chmod(source.stat().st_mode | 0o111)
|
|
33
|
+
|
|
34
|
+
if destination.is_symlink():
|
|
35
|
+
if destination.resolve() == source:
|
|
36
|
+
return destination, False
|
|
37
|
+
raise FileExistsError(f"{destination} already exists and points to {destination.resolve()}")
|
|
38
|
+
|
|
39
|
+
if destination.exists():
|
|
40
|
+
raise FileExistsError(f"{destination} already exists")
|
|
41
|
+
|
|
42
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
destination.symlink_to(source)
|
|
44
|
+
return destination, True
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def run_pre_commit(project_root: Path) -> int:
|
|
48
|
+
"""Validate staged CoDD documents before commit."""
|
|
49
|
+
config_path = project_root / "codd" / "codd.yaml"
|
|
50
|
+
if not config_path.exists():
|
|
51
|
+
print("ERROR: codd/codd.yaml not found.")
|
|
52
|
+
return 1
|
|
53
|
+
|
|
54
|
+
config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
|
|
55
|
+
try:
|
|
56
|
+
staged_docs = _get_staged_markdown_files(project_root, config)
|
|
57
|
+
except RuntimeError as exc:
|
|
58
|
+
print(f"ERROR: {exc}")
|
|
59
|
+
return 1
|
|
60
|
+
|
|
61
|
+
for relative_path in staged_docs:
|
|
62
|
+
if _extract_frontmatter(project_root / relative_path) is not None:
|
|
63
|
+
continue
|
|
64
|
+
print(f"ERROR: {relative_path} is missing CoDD YAML frontmatter")
|
|
65
|
+
return 1
|
|
66
|
+
|
|
67
|
+
return run_validate(project_root, project_root / "codd")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _get_staged_markdown_files(project_root: Path, config: dict) -> list[Path]:
|
|
71
|
+
result = subprocess.run(
|
|
72
|
+
["git", "diff", "--cached", "--name-only", "--diff-filter=ACMR"],
|
|
73
|
+
cwd=project_root,
|
|
74
|
+
capture_output=True,
|
|
75
|
+
text=True,
|
|
76
|
+
check=False,
|
|
77
|
+
)
|
|
78
|
+
if result.returncode != 0:
|
|
79
|
+
raise RuntimeError(result.stderr.strip() or "git diff --cached failed")
|
|
80
|
+
|
|
81
|
+
doc_dirs = ((config.get("scan") or {}).get("doc_dirs") or [])
|
|
82
|
+
staged_docs: list[Path] = []
|
|
83
|
+
|
|
84
|
+
for entry in result.stdout.splitlines():
|
|
85
|
+
relative_path = entry.strip()
|
|
86
|
+
if not relative_path.endswith(".md"):
|
|
87
|
+
continue
|
|
88
|
+
if not _is_in_doc_dirs(relative_path, doc_dirs):
|
|
89
|
+
continue
|
|
90
|
+
staged_docs.append(Path(relative_path))
|
|
91
|
+
|
|
92
|
+
return staged_docs
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _is_in_doc_dirs(relative_path: str, doc_dirs: list[str]) -> bool:
|
|
96
|
+
rel = PurePosixPath(relative_path)
|
|
97
|
+
for doc_dir in doc_dirs:
|
|
98
|
+
base = PurePosixPath(str(doc_dir).rstrip("/"))
|
|
99
|
+
try:
|
|
100
|
+
rel.relative_to(base)
|
|
101
|
+
return True
|
|
102
|
+
except ValueError:
|
|
103
|
+
continue
|
|
104
|
+
return False
|