sdtk-wiki-kit 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/assets/atlas/build_atlas.py +775 -775
- package/assets/atlas/doc_atlas_viewer_template.html +3796 -3796
- package/assets/atlas/vendor/mermaid.min.js +2029 -2029
- package/bin/sdtk-wiki.js +0 -0
- package/package.json +2 -2
- package/src/commands/context.js +67 -0
- package/src/commands/help.js +7 -0
- package/src/commands/update.js +11 -0
- package/src/index.js +8 -0
- package/src/lib/update.js +217 -0
- package/src/lib/wiki-context-pack.js +267 -0
|
@@ -1,141 +1,141 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
2
|
"""
|
|
3
3
|
SDTK-WIKI Builder -- generic local-project edition.
|
|
4
|
-
|
|
5
|
-
Scans markdown files under configured scan roots, builds a document index
|
|
6
|
-
and graph, and generates a static local viewer.
|
|
7
|
-
|
|
8
|
-
Usage:
|
|
9
|
-
python build_atlas.py --project-root <path> --output-dir <path>
|
|
10
|
-
[--scan-root <path> ...] [--exclude <frag> ...]
|
|
11
|
-
[--verbose]
|
|
12
|
-
|
|
13
|
-
Outputs (written to <output-dir>/):
|
|
14
|
-
ATLAS_STATE.json - incremental scan/build state
|
|
15
|
-
SDTK_DOC_INDEX.json - full document index
|
|
16
|
-
SDTK_DOC_GRAPH.json - nodes + typed edges
|
|
17
|
-
SDTK_DOC_ATLAS_SUMMARY.md - human-readable summary
|
|
18
|
-
viewer.html - static local viewer (data embedded)
|
|
19
|
-
vendor/mermaid.min.js - vendored viewer asset
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
from __future__ import annotations
|
|
23
|
-
|
|
24
|
-
import argparse
|
|
25
|
-
import hashlib
|
|
26
|
-
import json
|
|
27
|
-
import re
|
|
28
|
-
import shutil
|
|
29
|
-
import sys
|
|
30
|
-
from datetime import datetime, timezone
|
|
31
|
-
from pathlib import Path
|
|
32
|
-
from typing import Any
|
|
33
|
-
|
|
4
|
+
|
|
5
|
+
Scans markdown files under configured scan roots, builds a document index
|
|
6
|
+
and graph, and generates a static local viewer.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python build_atlas.py --project-root <path> --output-dir <path>
|
|
10
|
+
[--scan-root <path> ...] [--exclude <frag> ...]
|
|
11
|
+
[--verbose]
|
|
12
|
+
|
|
13
|
+
Outputs (written to <output-dir>/):
|
|
14
|
+
ATLAS_STATE.json - incremental scan/build state
|
|
15
|
+
SDTK_DOC_INDEX.json - full document index
|
|
16
|
+
SDTK_DOC_GRAPH.json - nodes + typed edges
|
|
17
|
+
SDTK_DOC_ATLAS_SUMMARY.md - human-readable summary
|
|
18
|
+
viewer.html - static local viewer (data embedded)
|
|
19
|
+
vendor/mermaid.min.js - vendored viewer asset
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
import hashlib
|
|
26
|
+
import json
|
|
27
|
+
import re
|
|
28
|
+
import shutil
|
|
29
|
+
import sys
|
|
30
|
+
from datetime import datetime, timezone
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
34
|
ATLAS_STATE_VERSION = 6
|
|
35
35
|
WIKI_PAGE_SCHEMA_VERSION = 1
|
|
36
36
|
WIKI_PROVENANCE_SCHEMA_VERSION = 1
|
|
37
37
|
MERMAID_VENDOR_PATH = Path(__file__).parent / "vendor" / "mermaid.min.js"
|
|
38
38
|
MERMAID_ASSET_NAME = "mermaid.min.js"
|
|
39
39
|
_VIEWER_TEMPLATE_PATH = Path(__file__).parent / "doc_atlas_viewer_template.html"
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _json_for_inline_script(value: Any) -> str:
|
|
43
|
-
return (
|
|
44
|
-
json.dumps(value, ensure_ascii=True, separators=(",", ":"))
|
|
45
|
-
.replace("</", "<\\/")
|
|
46
|
-
.replace("<!--", "<\\!--")
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
# ---------------------------------------------------------------------------
|
|
50
|
-
# Default consumer project exclude fragments
|
|
51
|
-
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _json_for_inline_script(value: Any) -> str:
|
|
43
|
+
return (
|
|
44
|
+
json.dumps(value, ensure_ascii=True, separators=(",", ":"))
|
|
45
|
+
.replace("</", "<\\/")
|
|
46
|
+
.replace("<!--", "<\\!--")
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
# Default consumer project exclude fragments
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
52
|
DEFAULT_EXCLUDE_FRAGS: list[str] = [
|
|
53
53
|
".git",
|
|
54
54
|
".sdtk/wiki",
|
|
55
55
|
".sdtk/atlas",
|
|
56
56
|
"node_modules",
|
|
57
|
-
".venv",
|
|
58
|
-
"venv",
|
|
59
|
-
"dist",
|
|
60
|
-
"build",
|
|
61
|
-
"coverage",
|
|
62
|
-
".next",
|
|
63
|
-
".turbo",
|
|
64
|
-
".cache",
|
|
65
|
-
"__pycache__",
|
|
66
|
-
]
|
|
67
|
-
|
|
68
|
-
# ---------------------------------------------------------------------------
|
|
69
|
-
# Reference patterns
|
|
70
|
-
# ---------------------------------------------------------------------------
|
|
71
|
-
RE_BK = re.compile(r"\bBK-(\d{3,})\b")
|
|
72
|
-
RE_KNOWLEDGE_ID = re.compile(r"\b(KD|KT|KP|KA|KR|KRB|KF)-(\d{4})\b")
|
|
73
|
-
RE_REPO_PATH = re.compile(
|
|
74
|
-
r"(?:^|[\s`(\[])([a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-. ]+)+\."
|
|
75
|
-
r"(?:md|py|ps1|json|yaml|yml|html|txt))"
|
|
76
|
-
)
|
|
77
|
-
RE_WIKI_LINK = re.compile(r"\[\[([^\]]+)\]\]")
|
|
78
|
-
RE_MARKDOWN_LINK = re.compile(r"(?<!!)\[[^\]]+\]\(([^)]+)\)")
|
|
79
|
-
RE_SKILL_REF = re.compile(r"\b(sdtk-[a-z0-9][a-z0-9-]*)\b")
|
|
80
|
-
RE_RELEASE_REF = re.compile(r"\b(?:sdtk-spec-kit@)?(0\.\d+\.\d+)\b")
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
# ---------------------------------------------------------------------------
|
|
84
|
-
# Generic doc-family classifier (project-scope, no maintainer assumptions)
|
|
85
|
-
# ---------------------------------------------------------------------------
|
|
86
|
-
def classify_family(rel: str) -> str:
|
|
87
|
-
p = rel.replace("\\", "/").lower()
|
|
88
|
-
name = Path(rel).name.lower()
|
|
89
|
-
is_guide_path = p.startswith("guides/") or "/guides/" in p
|
|
90
|
-
if p == "readme.md":
|
|
91
|
-
return "root-readme"
|
|
92
|
-
if "backlog" in name:
|
|
93
|
-
return "backlog"
|
|
94
|
-
if "skills" in p:
|
|
95
|
-
return "skill"
|
|
96
|
-
if "templates" in p:
|
|
97
|
-
return "template"
|
|
98
|
-
if "docs/database" in p or "database/" in p:
|
|
99
|
-
return "database"
|
|
100
|
-
if "docs/specs" in p or "specs/" in p:
|
|
101
|
-
return "spec"
|
|
102
|
-
if "docs/architecture" in p or "architecture/" in p:
|
|
103
|
-
return "architecture"
|
|
104
|
-
if "docs/api" in p or "api/" in p:
|
|
105
|
-
return "api"
|
|
106
|
-
if "docs/qa" in p or "qa/" in p:
|
|
107
|
-
return "qa"
|
|
108
|
-
if "docs/design" in p or "design/" in p:
|
|
109
|
-
return "design"
|
|
110
|
-
if "docs/dev" in p or "dev/" in p:
|
|
111
|
-
return "dev"
|
|
112
|
-
if "docs/product" in p or "product/" in p:
|
|
113
|
-
return "product"
|
|
114
|
-
if is_guide_path:
|
|
115
|
-
return "guide"
|
|
116
|
-
if "governance" in p:
|
|
117
|
-
return "governance"
|
|
118
|
-
return "other-markdown"
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def classify_role(rel: str) -> str:
|
|
122
|
-
p = rel.replace("\\", "/").lower()
|
|
123
|
-
if "governance" in p:
|
|
124
|
-
return "governance"
|
|
125
|
-
if "spec" in p or "architecture" in p:
|
|
126
|
-
return "spec-artifact"
|
|
127
|
-
if "skill" in p:
|
|
128
|
-
return "skill"
|
|
129
|
-
return "other"
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# ---------------------------------------------------------------------------
|
|
133
|
-
# Scanner helpers
|
|
134
|
-
# ---------------------------------------------------------------------------
|
|
135
|
-
def _now_utc() -> str:
|
|
136
|
-
return datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
137
|
-
|
|
138
|
-
|
|
57
|
+
".venv",
|
|
58
|
+
"venv",
|
|
59
|
+
"dist",
|
|
60
|
+
"build",
|
|
61
|
+
"coverage",
|
|
62
|
+
".next",
|
|
63
|
+
".turbo",
|
|
64
|
+
".cache",
|
|
65
|
+
"__pycache__",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# Reference patterns
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
RE_BK = re.compile(r"\bBK-(\d{3,})\b")
|
|
72
|
+
RE_KNOWLEDGE_ID = re.compile(r"\b(KD|KT|KP|KA|KR|KRB|KF)-(\d{4})\b")
|
|
73
|
+
RE_REPO_PATH = re.compile(
|
|
74
|
+
r"(?:^|[\s`(\[])([a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-. ]+)+\."
|
|
75
|
+
r"(?:md|py|ps1|json|yaml|yml|html|txt))"
|
|
76
|
+
)
|
|
77
|
+
RE_WIKI_LINK = re.compile(r"\[\[([^\]]+)\]\]")
|
|
78
|
+
RE_MARKDOWN_LINK = re.compile(r"(?<!!)\[[^\]]+\]\(([^)]+)\)")
|
|
79
|
+
RE_SKILL_REF = re.compile(r"\b(sdtk-[a-z0-9][a-z0-9-]*)\b")
|
|
80
|
+
RE_RELEASE_REF = re.compile(r"\b(?:sdtk-spec-kit@)?(0\.\d+\.\d+)\b")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
# Generic doc-family classifier (project-scope, no maintainer assumptions)
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
def classify_family(rel: str) -> str:
|
|
87
|
+
p = rel.replace("\\", "/").lower()
|
|
88
|
+
name = Path(rel).name.lower()
|
|
89
|
+
is_guide_path = p.startswith("guides/") or "/guides/" in p
|
|
90
|
+
if p == "readme.md":
|
|
91
|
+
return "root-readme"
|
|
92
|
+
if "backlog" in name:
|
|
93
|
+
return "backlog"
|
|
94
|
+
if "skills" in p:
|
|
95
|
+
return "skill"
|
|
96
|
+
if "templates" in p:
|
|
97
|
+
return "template"
|
|
98
|
+
if "docs/database" in p or "database/" in p:
|
|
99
|
+
return "database"
|
|
100
|
+
if "docs/specs" in p or "specs/" in p:
|
|
101
|
+
return "spec"
|
|
102
|
+
if "docs/architecture" in p or "architecture/" in p:
|
|
103
|
+
return "architecture"
|
|
104
|
+
if "docs/api" in p or "api/" in p:
|
|
105
|
+
return "api"
|
|
106
|
+
if "docs/qa" in p or "qa/" in p:
|
|
107
|
+
return "qa"
|
|
108
|
+
if "docs/design" in p or "design/" in p:
|
|
109
|
+
return "design"
|
|
110
|
+
if "docs/dev" in p or "dev/" in p:
|
|
111
|
+
return "dev"
|
|
112
|
+
if "docs/product" in p or "product/" in p:
|
|
113
|
+
return "product"
|
|
114
|
+
if is_guide_path:
|
|
115
|
+
return "guide"
|
|
116
|
+
if "governance" in p:
|
|
117
|
+
return "governance"
|
|
118
|
+
return "other-markdown"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def classify_role(rel: str) -> str:
|
|
122
|
+
p = rel.replace("\\", "/").lower()
|
|
123
|
+
if "governance" in p:
|
|
124
|
+
return "governance"
|
|
125
|
+
if "spec" in p or "architecture" in p:
|
|
126
|
+
return "spec-artifact"
|
|
127
|
+
if "skill" in p:
|
|
128
|
+
return "skill"
|
|
129
|
+
return "other"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ---------------------------------------------------------------------------
|
|
133
|
+
# Scanner helpers
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
def _now_utc() -> str:
|
|
136
|
+
return datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
137
|
+
|
|
138
|
+
|
|
139
139
|
def _write_text_lf(path: Path, content: str) -> None:
|
|
140
140
|
path.write_text(content, encoding="utf-8", newline="\n")
|
|
141
141
|
|
|
@@ -145,8 +145,8 @@ def _assert_inside(base: Path, target: Path) -> None:
|
|
|
145
145
|
resolved_target = target.resolve()
|
|
146
146
|
if resolved_target != resolved_base and resolved_base not in resolved_target.parents:
|
|
147
147
|
raise ValueError(f"Refusing to write outside SDTK-WIKI workspace: {resolved_target}")
|
|
148
|
-
|
|
149
|
-
|
|
148
|
+
|
|
149
|
+
|
|
150
150
|
def _is_excluded(
|
|
151
151
|
path: Path,
|
|
152
152
|
root: Path,
|
|
@@ -189,208 +189,208 @@ def _match_exclude(
|
|
|
189
189
|
return frag
|
|
190
190
|
|
|
191
191
|
return None
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def _extract_title(text: str) -> str:
|
|
195
|
-
for line in text.splitlines():
|
|
196
|
-
stripped = line.strip()
|
|
197
|
-
if stripped.startswith("# "):
|
|
198
|
-
return stripped[2:].strip()
|
|
199
|
-
return ""
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
def _extract_headings(text: str) -> list[str]:
|
|
203
|
-
headings: list[str] = []
|
|
204
|
-
for line in text.splitlines():
|
|
205
|
-
stripped = line.strip()
|
|
206
|
-
if not stripped.startswith("#"):
|
|
207
|
-
continue
|
|
208
|
-
level = len(stripped) - len(stripped.lstrip("#"))
|
|
209
|
-
if 1 <= level <= 6 and len(stripped) > level and stripped[level] == " ":
|
|
210
|
-
headings.append(stripped[level + 1:].strip())
|
|
211
|
-
return headings
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
def _parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
|
|
215
|
-
lines = text.splitlines()
|
|
216
|
-
if not lines or lines[0].strip() != "---":
|
|
217
|
-
return {}, text
|
|
218
|
-
|
|
219
|
-
fields: dict[str, Any] = {}
|
|
220
|
-
current_list_key: str | None = None
|
|
221
|
-
for idx in range(1, len(lines)):
|
|
222
|
-
raw = lines[idx]
|
|
223
|
-
stripped = raw.strip()
|
|
224
|
-
if stripped in {"---", "..."}:
|
|
225
|
-
body = "\n".join(lines[idx + 1:])
|
|
226
|
-
if text.endswith("\n"):
|
|
227
|
-
body += "\n"
|
|
228
|
-
return fields, body
|
|
229
|
-
if not stripped:
|
|
230
|
-
current_list_key = None
|
|
231
|
-
continue
|
|
232
|
-
if stripped.startswith("- ") and current_list_key and isinstance(fields.get(current_list_key), list):
|
|
233
|
-
fields[current_list_key].append(stripped[2:].strip().strip('"\''))
|
|
234
|
-
continue
|
|
235
|
-
if ":" not in raw:
|
|
236
|
-
current_list_key = None
|
|
237
|
-
continue
|
|
238
|
-
key, value = raw.split(":", 1)
|
|
239
|
-
key = key.strip()
|
|
240
|
-
value = value.strip()
|
|
241
|
-
if not key:
|
|
242
|
-
current_list_key = None
|
|
243
|
-
continue
|
|
244
|
-
if not value:
|
|
245
|
-
fields[key] = []
|
|
246
|
-
current_list_key = key
|
|
247
|
-
continue
|
|
248
|
-
if value.startswith("[") and value.endswith("]"):
|
|
249
|
-
inner = value[1:-1].strip()
|
|
250
|
-
if inner:
|
|
251
|
-
fields[key] = [part.strip().strip('"\'') for part in inner.split(",") if part.strip()]
|
|
252
|
-
else:
|
|
253
|
-
fields[key] = []
|
|
254
|
-
current_list_key = None
|
|
255
|
-
continue
|
|
256
|
-
fields[key] = value.strip('"\'')
|
|
257
|
-
current_list_key = None
|
|
258
|
-
|
|
259
|
-
return {}, text
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
def _normalize_internal_ref(raw: str) -> str:
|
|
263
|
-
value = raw.strip()
|
|
264
|
-
if not value:
|
|
265
|
-
return ""
|
|
266
|
-
value = value.split("|", 1)[0].strip()
|
|
267
|
-
value = value.split("#", 1)[0].strip()
|
|
268
|
-
value = value.replace("\\", "/")
|
|
269
|
-
while value.startswith("./"):
|
|
270
|
-
value = value[2:]
|
|
271
|
-
if value.startswith("/"):
|
|
272
|
-
value = value[1:]
|
|
273
|
-
return value.strip()
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
def _extract_references(text: str) -> tuple[list[str], list[str], list[str]]:
|
|
277
|
-
issues = sorted(set(f"BK-{m}" for m in RE_BK.findall(text)))
|
|
278
|
-
knowledge_ids = sorted(
|
|
279
|
-
set(f"{m[0]}-{m[1]}" for m in RE_KNOWLEDGE_ID.findall(text))
|
|
280
|
-
)
|
|
281
|
-
raw_paths = RE_REPO_PATH.findall(text)
|
|
282
|
-
paths: list[str] = []
|
|
283
|
-
seen: set[str] = set()
|
|
284
|
-
for rp in raw_paths:
|
|
285
|
-
normalised = _normalize_internal_ref(rp)
|
|
286
|
-
if normalised and normalised not in seen:
|
|
287
|
-
seen.add(normalised)
|
|
288
|
-
paths.append(normalised)
|
|
289
|
-
return issues, knowledge_ids, paths
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
def _extract_wiki_links(text: str) -> list[str]:
|
|
293
|
-
links: list[str] = []
|
|
294
|
-
seen: set[str] = set()
|
|
295
|
-
for raw in RE_WIKI_LINK.findall(text):
|
|
296
|
-
normalised = _normalize_internal_ref(raw)
|
|
297
|
-
if normalised and normalised not in seen:
|
|
298
|
-
seen.add(normalised)
|
|
299
|
-
links.append(normalised)
|
|
300
|
-
return links
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
def _extract_markdown_links(text: str) -> list[str]:
|
|
304
|
-
links: list[str] = []
|
|
305
|
-
seen: set[str] = set()
|
|
306
|
-
for raw in RE_MARKDOWN_LINK.findall(text):
|
|
307
|
-
target = raw.strip().strip('<>')
|
|
308
|
-
lower = target.lower()
|
|
309
|
-
if not target or lower.startswith(("http://", "https://", "mailto:", "#")) or "://" in target:
|
|
310
|
-
continue
|
|
311
|
-
# Markdown links may include optional titles: [x](path.md "title").
|
|
312
|
-
if ' "' in target:
|
|
313
|
-
target = target.split(' "', 1)[0]
|
|
314
|
-
if " '" in target:
|
|
315
|
-
target = target.split(" '", 1)[0]
|
|
316
|
-
normalised = _normalize_internal_ref(target)
|
|
317
|
-
if normalised and normalised not in seen:
|
|
318
|
-
seen.add(normalised)
|
|
319
|
-
links.append(normalised)
|
|
320
|
-
return links
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
def _extract_skill_refs(text: str, path_refs: list[str], wiki_links: list[str]) -> list[str]:
|
|
324
|
-
refs = set(match.lower() for match in RE_SKILL_REF.findall(text))
|
|
325
|
-
for ref in path_refs + wiki_links:
|
|
326
|
-
parts = [part for part in ref.split("/") if part]
|
|
327
|
-
for marker in ("skills", "skills-claude"):
|
|
328
|
-
if marker in parts:
|
|
329
|
-
idx = parts.index(marker)
|
|
330
|
-
if idx + 1 < len(parts):
|
|
331
|
-
refs.add(parts[idx + 1].lower())
|
|
332
|
-
return sorted(refs)
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
def _extract_template_refs(path_refs: list[str], wiki_links: list[str]) -> list[str]:
|
|
336
|
-
refs: set[str] = set()
|
|
337
|
-
for ref in path_refs + wiki_links:
|
|
338
|
-
norm = _normalize_internal_ref(ref)
|
|
339
|
-
if "/templates/" in f"/{norm}":
|
|
340
|
-
refs.add(norm)
|
|
341
|
-
return sorted(refs)
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
def _extract_release_refs(text: str) -> list[str]:
|
|
345
|
-
return sorted(set(RE_RELEASE_REF.findall(text)))
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
def _compute_file_hash(md_file: Path) -> str:
|
|
349
|
-
content = md_file.read_bytes()
|
|
350
|
-
return hashlib.sha256(content).hexdigest()
|
|
351
|
-
|
|
352
|
-
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _extract_title(text: str) -> str:
|
|
195
|
+
for line in text.splitlines():
|
|
196
|
+
stripped = line.strip()
|
|
197
|
+
if stripped.startswith("# "):
|
|
198
|
+
return stripped[2:].strip()
|
|
199
|
+
return ""
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _extract_headings(text: str) -> list[str]:
|
|
203
|
+
headings: list[str] = []
|
|
204
|
+
for line in text.splitlines():
|
|
205
|
+
stripped = line.strip()
|
|
206
|
+
if not stripped.startswith("#"):
|
|
207
|
+
continue
|
|
208
|
+
level = len(stripped) - len(stripped.lstrip("#"))
|
|
209
|
+
if 1 <= level <= 6 and len(stripped) > level and stripped[level] == " ":
|
|
210
|
+
headings.append(stripped[level + 1:].strip())
|
|
211
|
+
return headings
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
|
|
215
|
+
lines = text.splitlines()
|
|
216
|
+
if not lines or lines[0].strip() != "---":
|
|
217
|
+
return {}, text
|
|
218
|
+
|
|
219
|
+
fields: dict[str, Any] = {}
|
|
220
|
+
current_list_key: str | None = None
|
|
221
|
+
for idx in range(1, len(lines)):
|
|
222
|
+
raw = lines[idx]
|
|
223
|
+
stripped = raw.strip()
|
|
224
|
+
if stripped in {"---", "..."}:
|
|
225
|
+
body = "\n".join(lines[idx + 1:])
|
|
226
|
+
if text.endswith("\n"):
|
|
227
|
+
body += "\n"
|
|
228
|
+
return fields, body
|
|
229
|
+
if not stripped:
|
|
230
|
+
current_list_key = None
|
|
231
|
+
continue
|
|
232
|
+
if stripped.startswith("- ") and current_list_key and isinstance(fields.get(current_list_key), list):
|
|
233
|
+
fields[current_list_key].append(stripped[2:].strip().strip('"\''))
|
|
234
|
+
continue
|
|
235
|
+
if ":" not in raw:
|
|
236
|
+
current_list_key = None
|
|
237
|
+
continue
|
|
238
|
+
key, value = raw.split(":", 1)
|
|
239
|
+
key = key.strip()
|
|
240
|
+
value = value.strip()
|
|
241
|
+
if not key:
|
|
242
|
+
current_list_key = None
|
|
243
|
+
continue
|
|
244
|
+
if not value:
|
|
245
|
+
fields[key] = []
|
|
246
|
+
current_list_key = key
|
|
247
|
+
continue
|
|
248
|
+
if value.startswith("[") and value.endswith("]"):
|
|
249
|
+
inner = value[1:-1].strip()
|
|
250
|
+
if inner:
|
|
251
|
+
fields[key] = [part.strip().strip('"\'') for part in inner.split(",") if part.strip()]
|
|
252
|
+
else:
|
|
253
|
+
fields[key] = []
|
|
254
|
+
current_list_key = None
|
|
255
|
+
continue
|
|
256
|
+
fields[key] = value.strip('"\'')
|
|
257
|
+
current_list_key = None
|
|
258
|
+
|
|
259
|
+
return {}, text
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _normalize_internal_ref(raw: str) -> str:
|
|
263
|
+
value = raw.strip()
|
|
264
|
+
if not value:
|
|
265
|
+
return ""
|
|
266
|
+
value = value.split("|", 1)[0].strip()
|
|
267
|
+
value = value.split("#", 1)[0].strip()
|
|
268
|
+
value = value.replace("\\", "/")
|
|
269
|
+
while value.startswith("./"):
|
|
270
|
+
value = value[2:]
|
|
271
|
+
if value.startswith("/"):
|
|
272
|
+
value = value[1:]
|
|
273
|
+
return value.strip()
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _extract_references(text: str) -> tuple[list[str], list[str], list[str]]:
|
|
277
|
+
issues = sorted(set(f"BK-{m}" for m in RE_BK.findall(text)))
|
|
278
|
+
knowledge_ids = sorted(
|
|
279
|
+
set(f"{m[0]}-{m[1]}" for m in RE_KNOWLEDGE_ID.findall(text))
|
|
280
|
+
)
|
|
281
|
+
raw_paths = RE_REPO_PATH.findall(text)
|
|
282
|
+
paths: list[str] = []
|
|
283
|
+
seen: set[str] = set()
|
|
284
|
+
for rp in raw_paths:
|
|
285
|
+
normalised = _normalize_internal_ref(rp)
|
|
286
|
+
if normalised and normalised not in seen:
|
|
287
|
+
seen.add(normalised)
|
|
288
|
+
paths.append(normalised)
|
|
289
|
+
return issues, knowledge_ids, paths
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _extract_wiki_links(text: str) -> list[str]:
|
|
293
|
+
links: list[str] = []
|
|
294
|
+
seen: set[str] = set()
|
|
295
|
+
for raw in RE_WIKI_LINK.findall(text):
|
|
296
|
+
normalised = _normalize_internal_ref(raw)
|
|
297
|
+
if normalised and normalised not in seen:
|
|
298
|
+
seen.add(normalised)
|
|
299
|
+
links.append(normalised)
|
|
300
|
+
return links
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _extract_markdown_links(text: str) -> list[str]:
|
|
304
|
+
links: list[str] = []
|
|
305
|
+
seen: set[str] = set()
|
|
306
|
+
for raw in RE_MARKDOWN_LINK.findall(text):
|
|
307
|
+
target = raw.strip().strip('<>')
|
|
308
|
+
lower = target.lower()
|
|
309
|
+
if not target or lower.startswith(("http://", "https://", "mailto:", "#")) or "://" in target:
|
|
310
|
+
continue
|
|
311
|
+
# Markdown links may include optional titles: [x](path.md "title").
|
|
312
|
+
if ' "' in target:
|
|
313
|
+
target = target.split(' "', 1)[0]
|
|
314
|
+
if " '" in target:
|
|
315
|
+
target = target.split(" '", 1)[0]
|
|
316
|
+
normalised = _normalize_internal_ref(target)
|
|
317
|
+
if normalised and normalised not in seen:
|
|
318
|
+
seen.add(normalised)
|
|
319
|
+
links.append(normalised)
|
|
320
|
+
return links
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _extract_skill_refs(text: str, path_refs: list[str], wiki_links: list[str]) -> list[str]:
|
|
324
|
+
refs = set(match.lower() for match in RE_SKILL_REF.findall(text))
|
|
325
|
+
for ref in path_refs + wiki_links:
|
|
326
|
+
parts = [part for part in ref.split("/") if part]
|
|
327
|
+
for marker in ("skills", "skills-claude"):
|
|
328
|
+
if marker in parts:
|
|
329
|
+
idx = parts.index(marker)
|
|
330
|
+
if idx + 1 < len(parts):
|
|
331
|
+
refs.add(parts[idx + 1].lower())
|
|
332
|
+
return sorted(refs)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _extract_template_refs(path_refs: list[str], wiki_links: list[str]) -> list[str]:
|
|
336
|
+
refs: set[str] = set()
|
|
337
|
+
for ref in path_refs + wiki_links:
|
|
338
|
+
norm = _normalize_internal_ref(ref)
|
|
339
|
+
if "/templates/" in f"/{norm}":
|
|
340
|
+
refs.add(norm)
|
|
341
|
+
return sorted(refs)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _extract_release_refs(text: str) -> list[str]:
|
|
345
|
+
return sorted(set(RE_RELEASE_REF.findall(text)))
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def _compute_file_hash(md_file: Path) -> str:
|
|
349
|
+
content = md_file.read_bytes()
|
|
350
|
+
return hashlib.sha256(content).hexdigest()
|
|
351
|
+
|
|
352
|
+
|
|
353
353
|
def _parse_doc_record(md_file: Path, root: Path) -> dict[str, Any]:
|
|
354
354
|
rel = _display_scan_path(md_file, root)
|
|
355
355
|
text = md_file.read_text(encoding="utf-8", errors="replace")
|
|
356
|
-
frontmatter_fields, body_text = _parse_frontmatter(text)
|
|
357
|
-
title = str(
|
|
358
|
-
frontmatter_fields.get("title")
|
|
359
|
-
or _extract_title(body_text)
|
|
360
|
-
or md_file.stem.replace("_", " ").replace("-", " ")
|
|
361
|
-
)
|
|
362
|
-
headings = _extract_headings(body_text)
|
|
363
|
-
issues, knowledge_ids, path_refs = _extract_references(text)
|
|
364
|
-
wiki_links = _extract_wiki_links(text)
|
|
365
|
-
markdown_links = _extract_markdown_links(text)
|
|
366
|
-
path_refs = sorted(set(path_refs + markdown_links))
|
|
367
|
-
family = classify_family(rel)
|
|
368
|
-
role = classify_role(rel)
|
|
369
|
-
skill_refs = _extract_skill_refs(text, path_refs, wiki_links)
|
|
370
|
-
template_refs = _extract_template_refs(path_refs, wiki_links)
|
|
371
|
-
release_refs = _extract_release_refs(text)
|
|
372
|
-
return {
|
|
373
|
-
"id": rel,
|
|
374
|
-
"path": rel,
|
|
375
|
-
"title": title,
|
|
376
|
-
"family": family,
|
|
377
|
-
"role": role,
|
|
378
|
-
"trust_zone": "medium",
|
|
379
|
-
"body_markdown": body_text,
|
|
380
|
-
"issues": issues,
|
|
381
|
-
"knowledge_ids": knowledge_ids,
|
|
382
|
-
"headings": headings,
|
|
383
|
-
"frontmatter_fields": frontmatter_fields,
|
|
384
|
-
"skill_refs": skill_refs,
|
|
385
|
-
"template_refs": template_refs,
|
|
386
|
-
"release_refs": release_refs,
|
|
387
|
-
"lane_refs": [],
|
|
388
|
-
"wiki_links": wiki_links,
|
|
389
|
-
"path_refs": path_refs,
|
|
390
|
-
"outgoing_paths": path_refs,
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
|
|
356
|
+
frontmatter_fields, body_text = _parse_frontmatter(text)
|
|
357
|
+
title = str(
|
|
358
|
+
frontmatter_fields.get("title")
|
|
359
|
+
or _extract_title(body_text)
|
|
360
|
+
or md_file.stem.replace("_", " ").replace("-", " ")
|
|
361
|
+
)
|
|
362
|
+
headings = _extract_headings(body_text)
|
|
363
|
+
issues, knowledge_ids, path_refs = _extract_references(text)
|
|
364
|
+
wiki_links = _extract_wiki_links(text)
|
|
365
|
+
markdown_links = _extract_markdown_links(text)
|
|
366
|
+
path_refs = sorted(set(path_refs + markdown_links))
|
|
367
|
+
family = classify_family(rel)
|
|
368
|
+
role = classify_role(rel)
|
|
369
|
+
skill_refs = _extract_skill_refs(text, path_refs, wiki_links)
|
|
370
|
+
template_refs = _extract_template_refs(path_refs, wiki_links)
|
|
371
|
+
release_refs = _extract_release_refs(text)
|
|
372
|
+
return {
|
|
373
|
+
"id": rel,
|
|
374
|
+
"path": rel,
|
|
375
|
+
"title": title,
|
|
376
|
+
"family": family,
|
|
377
|
+
"role": role,
|
|
378
|
+
"trust_zone": "medium",
|
|
379
|
+
"body_markdown": body_text,
|
|
380
|
+
"issues": issues,
|
|
381
|
+
"knowledge_ids": knowledge_ids,
|
|
382
|
+
"headings": headings,
|
|
383
|
+
"frontmatter_fields": frontmatter_fields,
|
|
384
|
+
"skill_refs": skill_refs,
|
|
385
|
+
"template_refs": template_refs,
|
|
386
|
+
"release_refs": release_refs,
|
|
387
|
+
"lane_refs": [],
|
|
388
|
+
"wiki_links": wiki_links,
|
|
389
|
+
"path_refs": path_refs,
|
|
390
|
+
"outgoing_paths": path_refs,
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
|
|
394
394
|
def list_indexable_markdown_files(
|
|
395
395
|
root: Path,
|
|
396
396
|
scan_roots: list[Path],
|
|
@@ -413,11 +413,11 @@ def collect_indexable_markdown_files(
|
|
|
413
413
|
if not scan_root.exists():
|
|
414
414
|
print(f"[atlas] Warning: scan root does not exist, skipping: {scan_root}", file=sys.stderr)
|
|
415
415
|
continue
|
|
416
|
-
if scan_root.is_file() and scan_root.suffix.lower() == ".md":
|
|
417
|
-
candidates = [scan_root]
|
|
418
|
-
elif scan_root.is_dir():
|
|
419
|
-
candidates = [p for p in sorted(scan_root.rglob("*.md")) if p.is_file()]
|
|
420
|
-
else:
|
|
416
|
+
if scan_root.is_file() and scan_root.suffix.lower() == ".md":
|
|
417
|
+
candidates = [scan_root]
|
|
418
|
+
elif scan_root.is_dir():
|
|
419
|
+
candidates = [p for p in sorted(scan_root.rglob("*.md")) if p.is_file()]
|
|
420
|
+
else:
|
|
421
421
|
candidates = []
|
|
422
422
|
|
|
423
423
|
for md_file in candidates:
|
|
@@ -455,37 +455,37 @@ def collect_indexable_markdown_files(
|
|
|
455
455
|
"skipped_count": len(skipped_files),
|
|
456
456
|
"skipped_files": skipped_files,
|
|
457
457
|
}
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
# ---------------------------------------------------------------------------
|
|
461
|
-
# Incremental build
|
|
462
|
-
# ---------------------------------------------------------------------------
|
|
463
|
-
def _empty_atlas_state() -> dict[str, Any]:
|
|
464
|
-
return {"version": ATLAS_STATE_VERSION, "documents": {}}
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
def _atlas_state_path(atlas_dir: Path) -> Path:
|
|
468
|
-
return atlas_dir / "ATLAS_STATE.json"
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
def load_atlas_state(atlas_dir: Path) -> dict[str, Any]:
|
|
472
|
-
state_path = _atlas_state_path(atlas_dir)
|
|
473
|
-
if not state_path.exists():
|
|
474
|
-
return _empty_atlas_state()
|
|
475
|
-
try:
|
|
476
|
-
data = json.loads(state_path.read_text(encoding="utf-8"))
|
|
477
|
-
except (OSError, json.JSONDecodeError):
|
|
478
|
-
return _empty_atlas_state()
|
|
479
|
-
if not isinstance(data, dict):
|
|
480
|
-
return _empty_atlas_state()
|
|
481
|
-
if data.get("version") != ATLAS_STATE_VERSION:
|
|
482
|
-
return _empty_atlas_state()
|
|
483
|
-
documents = data.get("documents")
|
|
484
|
-
if not isinstance(documents, dict):
|
|
485
|
-
return _empty_atlas_state()
|
|
486
|
-
return {"version": ATLAS_STATE_VERSION, "generated": data.get("generated"), "documents": documents}
|
|
487
|
-
|
|
488
|
-
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
# ---------------------------------------------------------------------------
|
|
461
|
+
# Incremental build
|
|
462
|
+
# ---------------------------------------------------------------------------
|
|
463
|
+
def _empty_atlas_state() -> dict[str, Any]:
|
|
464
|
+
return {"version": ATLAS_STATE_VERSION, "documents": {}}
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def _atlas_state_path(atlas_dir: Path) -> Path:
|
|
468
|
+
return atlas_dir / "ATLAS_STATE.json"
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def load_atlas_state(atlas_dir: Path) -> dict[str, Any]:
|
|
472
|
+
state_path = _atlas_state_path(atlas_dir)
|
|
473
|
+
if not state_path.exists():
|
|
474
|
+
return _empty_atlas_state()
|
|
475
|
+
try:
|
|
476
|
+
data = json.loads(state_path.read_text(encoding="utf-8"))
|
|
477
|
+
except (OSError, json.JSONDecodeError):
|
|
478
|
+
return _empty_atlas_state()
|
|
479
|
+
if not isinstance(data, dict):
|
|
480
|
+
return _empty_atlas_state()
|
|
481
|
+
if data.get("version") != ATLAS_STATE_VERSION:
|
|
482
|
+
return _empty_atlas_state()
|
|
483
|
+
documents = data.get("documents")
|
|
484
|
+
if not isinstance(documents, dict):
|
|
485
|
+
return _empty_atlas_state()
|
|
486
|
+
return {"version": ATLAS_STATE_VERSION, "generated": data.get("generated"), "documents": documents}
|
|
487
|
+
|
|
488
|
+
|
|
489
489
|
def save_atlas_state(state: dict[str, Any], atlas_dir: Path) -> Path:
|
|
490
490
|
atlas_dir.mkdir(parents=True, exist_ok=True)
|
|
491
491
|
state_path = _atlas_state_path(atlas_dir)
|
|
@@ -696,8 +696,8 @@ def write_wiki_pages_and_provenance(
|
|
|
696
696
|
"changes_path": str(changes_path),
|
|
697
697
|
"changes": change_set,
|
|
698
698
|
}
|
|
699
|
-
|
|
700
|
-
|
|
699
|
+
|
|
700
|
+
|
|
701
701
|
def build_docs_incremental(
|
|
702
702
|
root: Path,
|
|
703
703
|
atlas_dir: Path,
|
|
@@ -709,67 +709,67 @@ def build_docs_incremental(
|
|
|
709
709
|
prior_documents = prior_state.get("documents", {})
|
|
710
710
|
scan_result = collect_indexable_markdown_files(root, scan_roots, exclude_frags)
|
|
711
711
|
current_files = scan_result["files"]
|
|
712
|
-
|
|
713
|
-
current_rel_paths = {}
|
|
714
|
-
for md_file in current_files:
|
|
715
|
-
try:
|
|
716
|
-
rel = md_file.relative_to(root).as_posix()
|
|
717
|
-
except ValueError:
|
|
718
|
-
rel = md_file.as_posix()
|
|
719
|
-
current_rel_paths[rel] = md_file
|
|
720
|
-
|
|
721
|
-
next_documents: dict[str, Any] = {}
|
|
722
|
-
reused_count = 0
|
|
723
|
-
reparsed_count = 0
|
|
724
|
-
|
|
725
|
-
for rel, md_file in current_rel_paths.items():
|
|
726
|
-
stats = md_file.stat()
|
|
727
|
-
current_mtime = stats.st_mtime_ns
|
|
728
|
-
prior_record = prior_documents.get(rel)
|
|
729
|
-
prior_doc = prior_record.get("doc") if isinstance(prior_record, dict) else None
|
|
730
|
-
|
|
731
|
-
if (
|
|
732
|
-
isinstance(prior_record, dict)
|
|
733
|
-
and isinstance(prior_doc, dict)
|
|
734
|
-
and prior_record.get("mtime") == current_mtime
|
|
735
|
-
):
|
|
736
|
-
next_documents[rel] = prior_record
|
|
737
|
-
reused_count += 1
|
|
738
|
-
continue
|
|
739
|
-
|
|
740
|
-
current_hash = _compute_file_hash(md_file)
|
|
741
|
-
if (
|
|
742
|
-
isinstance(prior_record, dict)
|
|
743
|
-
and isinstance(prior_doc, dict)
|
|
744
|
-
and prior_record.get("hash") == current_hash
|
|
745
|
-
):
|
|
746
|
-
next_documents[rel] = {
|
|
747
|
-
"mtime": current_mtime,
|
|
748
|
-
"hash": current_hash,
|
|
749
|
-
"last_indexed": prior_record.get("last_indexed") or generated,
|
|
750
|
-
"doc": prior_doc,
|
|
751
|
-
}
|
|
752
|
-
reused_count += 1
|
|
753
|
-
continue
|
|
754
|
-
|
|
755
|
-
next_documents[rel] = {
|
|
756
|
-
"mtime": current_mtime,
|
|
757
|
-
"hash": current_hash,
|
|
758
|
-
"last_indexed": generated,
|
|
759
|
-
"doc": _parse_doc_record(md_file, root=root),
|
|
760
|
-
}
|
|
761
|
-
reparsed_count += 1
|
|
762
|
-
|
|
763
|
-
removed_count = len(set(prior_documents.keys()) - set(current_rel_paths.keys()))
|
|
764
|
-
docs = sorted(
|
|
765
|
-
[record["doc"] for record in next_documents.values()],
|
|
766
|
-
key=lambda d: d["id"],
|
|
767
|
-
)
|
|
768
|
-
next_state = {
|
|
769
|
-
"version": ATLAS_STATE_VERSION,
|
|
770
|
-
"generated": generated,
|
|
771
|
-
"documents": next_documents,
|
|
772
|
-
}
|
|
712
|
+
|
|
713
|
+
current_rel_paths = {}
|
|
714
|
+
for md_file in current_files:
|
|
715
|
+
try:
|
|
716
|
+
rel = md_file.relative_to(root).as_posix()
|
|
717
|
+
except ValueError:
|
|
718
|
+
rel = md_file.as_posix()
|
|
719
|
+
current_rel_paths[rel] = md_file
|
|
720
|
+
|
|
721
|
+
next_documents: dict[str, Any] = {}
|
|
722
|
+
reused_count = 0
|
|
723
|
+
reparsed_count = 0
|
|
724
|
+
|
|
725
|
+
for rel, md_file in current_rel_paths.items():
|
|
726
|
+
stats = md_file.stat()
|
|
727
|
+
current_mtime = stats.st_mtime_ns
|
|
728
|
+
prior_record = prior_documents.get(rel)
|
|
729
|
+
prior_doc = prior_record.get("doc") if isinstance(prior_record, dict) else None
|
|
730
|
+
|
|
731
|
+
if (
|
|
732
|
+
isinstance(prior_record, dict)
|
|
733
|
+
and isinstance(prior_doc, dict)
|
|
734
|
+
and prior_record.get("mtime") == current_mtime
|
|
735
|
+
):
|
|
736
|
+
next_documents[rel] = prior_record
|
|
737
|
+
reused_count += 1
|
|
738
|
+
continue
|
|
739
|
+
|
|
740
|
+
current_hash = _compute_file_hash(md_file)
|
|
741
|
+
if (
|
|
742
|
+
isinstance(prior_record, dict)
|
|
743
|
+
and isinstance(prior_doc, dict)
|
|
744
|
+
and prior_record.get("hash") == current_hash
|
|
745
|
+
):
|
|
746
|
+
next_documents[rel] = {
|
|
747
|
+
"mtime": current_mtime,
|
|
748
|
+
"hash": current_hash,
|
|
749
|
+
"last_indexed": prior_record.get("last_indexed") or generated,
|
|
750
|
+
"doc": prior_doc,
|
|
751
|
+
}
|
|
752
|
+
reused_count += 1
|
|
753
|
+
continue
|
|
754
|
+
|
|
755
|
+
next_documents[rel] = {
|
|
756
|
+
"mtime": current_mtime,
|
|
757
|
+
"hash": current_hash,
|
|
758
|
+
"last_indexed": generated,
|
|
759
|
+
"doc": _parse_doc_record(md_file, root=root),
|
|
760
|
+
}
|
|
761
|
+
reparsed_count += 1
|
|
762
|
+
|
|
763
|
+
removed_count = len(set(prior_documents.keys()) - set(current_rel_paths.keys()))
|
|
764
|
+
docs = sorted(
|
|
765
|
+
[record["doc"] for record in next_documents.values()],
|
|
766
|
+
key=lambda d: d["id"],
|
|
767
|
+
)
|
|
768
|
+
next_state = {
|
|
769
|
+
"version": ATLAS_STATE_VERSION,
|
|
770
|
+
"generated": generated,
|
|
771
|
+
"documents": next_documents,
|
|
772
|
+
}
|
|
773
773
|
build_stats = {
|
|
774
774
|
"discovered_count": len(current_rel_paths),
|
|
775
775
|
"scanned_count": scan_result["scanned_count"],
|
|
@@ -780,138 +780,138 @@ def build_docs_incremental(
|
|
|
780
780
|
"reparsed_count": reparsed_count,
|
|
781
781
|
"removed_count": removed_count,
|
|
782
782
|
}
|
|
783
|
-
return docs, next_state, build_stats
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
# ---------------------------------------------------------------------------
|
|
787
|
-
# Graph builder
|
|
788
|
-
# ---------------------------------------------------------------------------
|
|
789
|
-
def _build_doc_alias_map(docs: list[dict[str, Any]]) -> dict[str, set[str]]:
|
|
790
|
-
alias_map: dict[str, set[str]] = {}
|
|
791
|
-
for doc in docs:
|
|
792
|
-
doc_id = doc["id"]
|
|
793
|
-
path_obj = Path(doc_id)
|
|
794
|
-
aliases = {
|
|
795
|
-
doc_id,
|
|
796
|
-
doc_id.lower(),
|
|
797
|
-
path_obj.name,
|
|
798
|
-
path_obj.name.lower(),
|
|
799
|
-
path_obj.stem,
|
|
800
|
-
path_obj.stem.lower(),
|
|
801
|
-
}
|
|
802
|
-
if doc_id.lower().endswith(".md"):
|
|
803
|
-
no_ext = doc_id[:-3]
|
|
804
|
-
no_ext_path = Path(no_ext)
|
|
805
|
-
aliases.update({no_ext, no_ext.lower(), no_ext_path.name, no_ext_path.name.lower()})
|
|
806
|
-
for alias in aliases:
|
|
807
|
-
alias_map.setdefault(alias, set()).add(doc_id)
|
|
808
|
-
return alias_map
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
def _resolve_doc_reference(raw: str, alias_map: dict[str, set[str]]) -> str | None:
|
|
812
|
-
normalised = _normalize_internal_ref(raw)
|
|
813
|
-
if not normalised:
|
|
814
|
-
return None
|
|
815
|
-
candidates = [normalised, normalised.lower()]
|
|
816
|
-
if not normalised.lower().endswith(".md"):
|
|
817
|
-
candidates.extend([f"{normalised}.md", f"{normalised.lower()}.md"])
|
|
818
|
-
for candidate in candidates:
|
|
819
|
-
matches = alias_map.get(candidate)
|
|
820
|
-
if matches and len(matches) == 1:
|
|
821
|
-
return next(iter(matches))
|
|
822
|
-
return None
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
def build_graph(docs: list[dict[str, Any]]) -> dict[str, Any]:
|
|
826
|
-
alias_map = _build_doc_alias_map(docs)
|
|
827
|
-
|
|
828
|
-
nodes = [
|
|
829
|
-
{
|
|
830
|
-
"id": d["id"],
|
|
831
|
-
"title": d["title"],
|
|
832
|
-
"family": d["family"],
|
|
833
|
-
"role": d["role"],
|
|
834
|
-
"trust_zone": d.get("trust_zone", "medium"),
|
|
835
|
-
}
|
|
836
|
-
for d in docs
|
|
837
|
-
]
|
|
838
|
-
|
|
839
|
-
edges: list[dict[str, Any]] = []
|
|
840
|
-
|
|
841
|
-
for doc in docs:
|
|
842
|
-
src = doc["id"]
|
|
843
|
-
|
|
844
|
-
for issue in doc.get("issues", []):
|
|
845
|
-
edges.append({"source": src, "target": issue, "type": "references_issue", "label": issue})
|
|
846
|
-
|
|
847
|
-
for kid in doc.get("knowledge_ids", []):
|
|
848
|
-
edges.append({"source": src, "target": kid, "type": "references_knowledge_object", "label": kid})
|
|
849
|
-
|
|
850
|
-
for rp in doc.get("path_refs", doc.get("outgoing_paths", [])):
|
|
851
|
-
target = _resolve_doc_reference(rp, alias_map)
|
|
852
|
-
if target:
|
|
853
|
-
edges.append({"source": src, "target": target, "type": "references_path", "label": rp})
|
|
854
|
-
|
|
855
|
-
for wiki_ref in doc.get("wiki_links", []):
|
|
856
|
-
target = _resolve_doc_reference(wiki_ref, alias_map)
|
|
857
|
-
if target:
|
|
858
|
-
edges.append({"source": src, "target": target, "type": "references_wiki_link", "label": wiki_ref})
|
|
859
|
-
|
|
860
|
-
for skill_ref in doc.get("skill_refs", []):
|
|
861
|
-
edges.append({"source": src, "target": f"__skill__{skill_ref}", "type": "references_skill", "label": skill_ref})
|
|
862
|
-
|
|
863
|
-
for template_ref in doc.get("template_refs", []):
|
|
864
|
-
edges.append({"source": src, "target": f"__template__{template_ref}", "type": "references_template", "label": template_ref})
|
|
865
|
-
|
|
866
|
-
family_groups: dict[str, list[str]] = {}
|
|
867
|
-
for doc in docs:
|
|
868
|
-
family_groups.setdefault(doc["family"], []).append(doc["id"])
|
|
869
|
-
for family, members in family_groups.items():
|
|
870
|
-
if len(members) < 2:
|
|
871
|
-
continue
|
|
872
|
-
for mid in members:
|
|
873
|
-
edges.append({"source": mid, "target": f"__family__{family}", "type": "same_family", "label": family})
|
|
874
|
-
|
|
875
|
-
return {"nodes": nodes, "edges": edges}
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
# ---------------------------------------------------------------------------
|
|
879
|
-
# Summary markdown
|
|
880
|
-
# ---------------------------------------------------------------------------
|
|
783
|
+
return docs, next_state, build_stats
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
# ---------------------------------------------------------------------------
|
|
787
|
+
# Graph builder
|
|
788
|
+
# ---------------------------------------------------------------------------
|
|
789
|
+
def _build_doc_alias_map(docs: list[dict[str, Any]]) -> dict[str, set[str]]:
|
|
790
|
+
alias_map: dict[str, set[str]] = {}
|
|
791
|
+
for doc in docs:
|
|
792
|
+
doc_id = doc["id"]
|
|
793
|
+
path_obj = Path(doc_id)
|
|
794
|
+
aliases = {
|
|
795
|
+
doc_id,
|
|
796
|
+
doc_id.lower(),
|
|
797
|
+
path_obj.name,
|
|
798
|
+
path_obj.name.lower(),
|
|
799
|
+
path_obj.stem,
|
|
800
|
+
path_obj.stem.lower(),
|
|
801
|
+
}
|
|
802
|
+
if doc_id.lower().endswith(".md"):
|
|
803
|
+
no_ext = doc_id[:-3]
|
|
804
|
+
no_ext_path = Path(no_ext)
|
|
805
|
+
aliases.update({no_ext, no_ext.lower(), no_ext_path.name, no_ext_path.name.lower()})
|
|
806
|
+
for alias in aliases:
|
|
807
|
+
alias_map.setdefault(alias, set()).add(doc_id)
|
|
808
|
+
return alias_map
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
def _resolve_doc_reference(raw: str, alias_map: dict[str, set[str]]) -> str | None:
|
|
812
|
+
normalised = _normalize_internal_ref(raw)
|
|
813
|
+
if not normalised:
|
|
814
|
+
return None
|
|
815
|
+
candidates = [normalised, normalised.lower()]
|
|
816
|
+
if not normalised.lower().endswith(".md"):
|
|
817
|
+
candidates.extend([f"{normalised}.md", f"{normalised.lower()}.md"])
|
|
818
|
+
for candidate in candidates:
|
|
819
|
+
matches = alias_map.get(candidate)
|
|
820
|
+
if matches and len(matches) == 1:
|
|
821
|
+
return next(iter(matches))
|
|
822
|
+
return None
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
def build_graph(docs: list[dict[str, Any]]) -> dict[str, Any]:
|
|
826
|
+
alias_map = _build_doc_alias_map(docs)
|
|
827
|
+
|
|
828
|
+
nodes = [
|
|
829
|
+
{
|
|
830
|
+
"id": d["id"],
|
|
831
|
+
"title": d["title"],
|
|
832
|
+
"family": d["family"],
|
|
833
|
+
"role": d["role"],
|
|
834
|
+
"trust_zone": d.get("trust_zone", "medium"),
|
|
835
|
+
}
|
|
836
|
+
for d in docs
|
|
837
|
+
]
|
|
838
|
+
|
|
839
|
+
edges: list[dict[str, Any]] = []
|
|
840
|
+
|
|
841
|
+
for doc in docs:
|
|
842
|
+
src = doc["id"]
|
|
843
|
+
|
|
844
|
+
for issue in doc.get("issues", []):
|
|
845
|
+
edges.append({"source": src, "target": issue, "type": "references_issue", "label": issue})
|
|
846
|
+
|
|
847
|
+
for kid in doc.get("knowledge_ids", []):
|
|
848
|
+
edges.append({"source": src, "target": kid, "type": "references_knowledge_object", "label": kid})
|
|
849
|
+
|
|
850
|
+
for rp in doc.get("path_refs", doc.get("outgoing_paths", [])):
|
|
851
|
+
target = _resolve_doc_reference(rp, alias_map)
|
|
852
|
+
if target:
|
|
853
|
+
edges.append({"source": src, "target": target, "type": "references_path", "label": rp})
|
|
854
|
+
|
|
855
|
+
for wiki_ref in doc.get("wiki_links", []):
|
|
856
|
+
target = _resolve_doc_reference(wiki_ref, alias_map)
|
|
857
|
+
if target:
|
|
858
|
+
edges.append({"source": src, "target": target, "type": "references_wiki_link", "label": wiki_ref})
|
|
859
|
+
|
|
860
|
+
for skill_ref in doc.get("skill_refs", []):
|
|
861
|
+
edges.append({"source": src, "target": f"__skill__{skill_ref}", "type": "references_skill", "label": skill_ref})
|
|
862
|
+
|
|
863
|
+
for template_ref in doc.get("template_refs", []):
|
|
864
|
+
edges.append({"source": src, "target": f"__template__{template_ref}", "type": "references_template", "label": template_ref})
|
|
865
|
+
|
|
866
|
+
family_groups: dict[str, list[str]] = {}
|
|
867
|
+
for doc in docs:
|
|
868
|
+
family_groups.setdefault(doc["family"], []).append(doc["id"])
|
|
869
|
+
for family, members in family_groups.items():
|
|
870
|
+
if len(members) < 2:
|
|
871
|
+
continue
|
|
872
|
+
for mid in members:
|
|
873
|
+
edges.append({"source": mid, "target": f"__family__{family}", "type": "same_family", "label": family})
|
|
874
|
+
|
|
875
|
+
return {"nodes": nodes, "edges": edges}
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
# ---------------------------------------------------------------------------
|
|
879
|
+
# Summary markdown
|
|
880
|
+
# ---------------------------------------------------------------------------
|
|
881
881
|
def build_summary(
|
|
882
|
-
docs: list[dict[str, Any]],
|
|
883
|
-
graph: dict[str, Any],
|
|
884
|
-
generated: str,
|
|
882
|
+
docs: list[dict[str, Any]],
|
|
883
|
+
graph: dict[str, Any],
|
|
884
|
+
generated: str,
|
|
885
885
|
stats: dict[str, Any] | None,
|
|
886
|
-
root: Path,
|
|
887
|
-
scan_roots: list[Path],
|
|
888
|
-
exclude_frags: list[str],
|
|
889
|
-
) -> str:
|
|
890
|
-
family_counts: dict[str, int] = {}
|
|
891
|
-
for d in docs:
|
|
892
|
-
family_counts[d["family"]] = family_counts.get(d["family"], 0) + 1
|
|
893
|
-
|
|
894
|
-
edge_type_counts: dict[str, int] = {}
|
|
895
|
-
for e in graph["edges"]:
|
|
896
|
-
et = e["type"]
|
|
897
|
-
edge_type_counts[et] = edge_type_counts.get(et, 0) + 1
|
|
898
|
-
|
|
899
|
-
lines: list[str] = [
|
|
886
|
+
root: Path,
|
|
887
|
+
scan_roots: list[Path],
|
|
888
|
+
exclude_frags: list[str],
|
|
889
|
+
) -> str:
|
|
890
|
+
family_counts: dict[str, int] = {}
|
|
891
|
+
for d in docs:
|
|
892
|
+
family_counts[d["family"]] = family_counts.get(d["family"], 0) + 1
|
|
893
|
+
|
|
894
|
+
edge_type_counts: dict[str, int] = {}
|
|
895
|
+
for e in graph["edges"]:
|
|
896
|
+
et = e["type"]
|
|
897
|
+
edge_type_counts[et] = edge_type_counts.get(et, 0) + 1
|
|
898
|
+
|
|
899
|
+
lines: list[str] = [
|
|
900
900
|
"# SDTK-WIKI Graph Summary",
|
|
901
|
-
"",
|
|
902
|
-
f"Generated: {generated}",
|
|
903
|
-
f"Project root: {root}",
|
|
904
|
-
"",
|
|
905
|
-
"## Document Counts",
|
|
906
|
-
"",
|
|
907
|
-
f"Total documents indexed: {len(docs)}",
|
|
908
|
-
"",
|
|
909
|
-
"| Family | Count |",
|
|
910
|
-
"|--------|-------|",
|
|
911
|
-
]
|
|
912
|
-
for fam, cnt in sorted(family_counts.items(), key=lambda x: -x[1]):
|
|
913
|
-
lines.append(f"| {fam} | {cnt} |")
|
|
914
|
-
|
|
901
|
+
"",
|
|
902
|
+
f"Generated: {generated}",
|
|
903
|
+
f"Project root: {root}",
|
|
904
|
+
"",
|
|
905
|
+
"## Document Counts",
|
|
906
|
+
"",
|
|
907
|
+
f"Total documents indexed: {len(docs)}",
|
|
908
|
+
"",
|
|
909
|
+
"| Family | Count |",
|
|
910
|
+
"|--------|-------|",
|
|
911
|
+
]
|
|
912
|
+
for fam, cnt in sorted(family_counts.items(), key=lambda x: -x[1]):
|
|
913
|
+
lines.append(f"| {fam} | {cnt} |")
|
|
914
|
+
|
|
915
915
|
if stats is not None:
|
|
916
916
|
lines += [
|
|
917
917
|
"",
|
|
@@ -936,107 +936,107 @@ def build_summary(
|
|
|
936
936
|
]
|
|
937
937
|
for skipped in skipped_files:
|
|
938
938
|
lines.append(f"| {skipped['path']} | {skipped['reason']} |")
|
|
939
|
-
|
|
940
|
-
lines += [
|
|
941
|
-
"",
|
|
942
|
-
"## Graph Summary",
|
|
943
|
-
"",
|
|
944
|
-
f"Total nodes: {len(graph['nodes'])}",
|
|
945
|
-
f"Total edges: {len(graph['edges'])}",
|
|
946
|
-
"",
|
|
947
|
-
"## Scan Roots",
|
|
948
|
-
"",
|
|
949
|
-
]
|
|
950
|
-
for sr in scan_roots:
|
|
951
|
-
lines.append(f"- {sr}")
|
|
952
|
-
|
|
953
|
-
lines += [
|
|
954
|
-
"",
|
|
955
|
-
"## Exclusions Applied",
|
|
956
|
-
"",
|
|
957
|
-
]
|
|
958
|
-
for frag in exclude_frags:
|
|
959
|
-
lines.append(f"- {frag}")
|
|
960
|
-
|
|
961
|
-
return "\n".join(lines) + "\n"
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
# ---------------------------------------------------------------------------
|
|
965
|
-
# Static viewer
|
|
966
|
-
# ---------------------------------------------------------------------------
|
|
967
|
-
_FAMILY_COLORS = {
|
|
968
|
-
"governance": "#58a6ff",
|
|
969
|
-
"guide": "#14b8a6",
|
|
970
|
-
"backlog": "#d2a8ff",
|
|
971
|
-
"spec": "#f0883e",
|
|
972
|
-
"architecture": "#3fb950",
|
|
973
|
-
"database": "#a371f7",
|
|
974
|
-
"api": "#f778ba",
|
|
975
|
-
"qa": "#79c0ff",
|
|
976
|
-
"design": "#ffa657",
|
|
977
|
-
"dev": "#56d364",
|
|
978
|
-
"product": "#e3b341",
|
|
979
|
-
"skill": "#58a6ff",
|
|
980
|
-
"template": "#f0883e",
|
|
981
|
-
"root-readme": "#e3b341",
|
|
982
|
-
"other-markdown": "#8b949e",
|
|
983
|
-
}
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
def build_viewer(index: dict, graph: dict, generated: str) -> str:
|
|
987
|
-
if not _VIEWER_TEMPLATE_PATH.exists():
|
|
988
|
-
raise FileNotFoundError(f"Viewer template not found: {_VIEWER_TEMPLATE_PATH}")
|
|
989
|
-
index_json = _json_for_inline_script(index)
|
|
990
|
-
graph_json = _json_for_inline_script(graph)
|
|
991
|
-
family_colors_json = _json_for_inline_script(_FAMILY_COLORS)
|
|
992
|
-
template = _VIEWER_TEMPLATE_PATH.read_text(encoding="utf-8")
|
|
993
|
-
return (
|
|
994
|
-
template
|
|
995
|
-
.replace("__ATLAS_GENERATED__", generated)
|
|
996
|
-
.replace("__ATLAS_INDEX_JSON__", index_json)
|
|
997
|
-
.replace("__ATLAS_GRAPH_JSON__", graph_json)
|
|
998
|
-
.replace("__ATLAS_FAMILY_COLORS_JSON__", family_colors_json)
|
|
999
|
-
)
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
def copy_viewer_assets(atlas_dir: Path) -> list[Path]:
|
|
1003
|
-
if not MERMAID_VENDOR_PATH.exists():
|
|
1004
|
-
raise FileNotFoundError(f"Missing Mermaid runtime asset: {MERMAID_VENDOR_PATH}")
|
|
1005
|
-
atlas_dir.mkdir(parents=True, exist_ok=True)
|
|
1006
|
-
# Copy mermaid to atlas root (same location the viewer template expects)
|
|
1007
|
-
destination = atlas_dir / MERMAID_ASSET_NAME
|
|
1008
|
-
shutil.copyfile(MERMAID_VENDOR_PATH, destination)
|
|
1009
|
-
return [destination]
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
# ---------------------------------------------------------------------------
|
|
1013
|
-
# Main build
|
|
1014
|
-
# ---------------------------------------------------------------------------
|
|
1015
|
-
def build_atlas(
|
|
1016
|
-
root: Path,
|
|
1017
|
-
atlas_dir: Path,
|
|
1018
|
-
scan_roots: list[Path] | None = None,
|
|
1019
|
-
exclude_frags: list[str] | None = None,
|
|
1020
|
-
verbose: bool = False,
|
|
1021
|
-
) -> dict[str, Any]:
|
|
1022
|
-
generated = _now_utc()
|
|
1023
|
-
frags = exclude_frags if exclude_frags is not None else DEFAULT_EXCLUDE_FRAGS
|
|
1024
|
-
roots = scan_roots if scan_roots else [root]
|
|
1025
|
-
|
|
1026
|
-
print(f"[atlas] Project root: {root}")
|
|
1027
|
-
print(f"[atlas] Output dir: {atlas_dir}")
|
|
1028
|
-
print(f"[atlas] Scan roots: {[str(r) for r in roots]}")
|
|
1029
|
-
|
|
1030
|
-
atlas_dir.mkdir(parents=True, exist_ok=True)
|
|
1031
|
-
|
|
1032
|
-
print("[atlas] Scanning markdown files...")
|
|
1033
|
-
docs, state, stats = build_docs_incremental(
|
|
1034
|
-
root=root,
|
|
1035
|
-
atlas_dir=atlas_dir,
|
|
1036
|
-
generated=generated,
|
|
1037
|
-
scan_roots=roots,
|
|
1038
|
-
exclude_frags=frags,
|
|
1039
|
-
)
|
|
939
|
+
|
|
940
|
+
lines += [
|
|
941
|
+
"",
|
|
942
|
+
"## Graph Summary",
|
|
943
|
+
"",
|
|
944
|
+
f"Total nodes: {len(graph['nodes'])}",
|
|
945
|
+
f"Total edges: {len(graph['edges'])}",
|
|
946
|
+
"",
|
|
947
|
+
"## Scan Roots",
|
|
948
|
+
"",
|
|
949
|
+
]
|
|
950
|
+
for sr in scan_roots:
|
|
951
|
+
lines.append(f"- {sr}")
|
|
952
|
+
|
|
953
|
+
lines += [
|
|
954
|
+
"",
|
|
955
|
+
"## Exclusions Applied",
|
|
956
|
+
"",
|
|
957
|
+
]
|
|
958
|
+
for frag in exclude_frags:
|
|
959
|
+
lines.append(f"- {frag}")
|
|
960
|
+
|
|
961
|
+
return "\n".join(lines) + "\n"
|
|
962
|
+
|
|
963
|
+
|
|
964
|
+
# ---------------------------------------------------------------------------
|
|
965
|
+
# Static viewer
|
|
966
|
+
# ---------------------------------------------------------------------------
|
|
967
|
+
_FAMILY_COLORS = {
|
|
968
|
+
"governance": "#58a6ff",
|
|
969
|
+
"guide": "#14b8a6",
|
|
970
|
+
"backlog": "#d2a8ff",
|
|
971
|
+
"spec": "#f0883e",
|
|
972
|
+
"architecture": "#3fb950",
|
|
973
|
+
"database": "#a371f7",
|
|
974
|
+
"api": "#f778ba",
|
|
975
|
+
"qa": "#79c0ff",
|
|
976
|
+
"design": "#ffa657",
|
|
977
|
+
"dev": "#56d364",
|
|
978
|
+
"product": "#e3b341",
|
|
979
|
+
"skill": "#58a6ff",
|
|
980
|
+
"template": "#f0883e",
|
|
981
|
+
"root-readme": "#e3b341",
|
|
982
|
+
"other-markdown": "#8b949e",
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
def build_viewer(index: dict, graph: dict, generated: str) -> str:
|
|
987
|
+
if not _VIEWER_TEMPLATE_PATH.exists():
|
|
988
|
+
raise FileNotFoundError(f"Viewer template not found: {_VIEWER_TEMPLATE_PATH}")
|
|
989
|
+
index_json = _json_for_inline_script(index)
|
|
990
|
+
graph_json = _json_for_inline_script(graph)
|
|
991
|
+
family_colors_json = _json_for_inline_script(_FAMILY_COLORS)
|
|
992
|
+
template = _VIEWER_TEMPLATE_PATH.read_text(encoding="utf-8")
|
|
993
|
+
return (
|
|
994
|
+
template
|
|
995
|
+
.replace("__ATLAS_GENERATED__", generated)
|
|
996
|
+
.replace("__ATLAS_INDEX_JSON__", index_json)
|
|
997
|
+
.replace("__ATLAS_GRAPH_JSON__", graph_json)
|
|
998
|
+
.replace("__ATLAS_FAMILY_COLORS_JSON__", family_colors_json)
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
def copy_viewer_assets(atlas_dir: Path) -> list[Path]:
|
|
1003
|
+
if not MERMAID_VENDOR_PATH.exists():
|
|
1004
|
+
raise FileNotFoundError(f"Missing Mermaid runtime asset: {MERMAID_VENDOR_PATH}")
|
|
1005
|
+
atlas_dir.mkdir(parents=True, exist_ok=True)
|
|
1006
|
+
# Copy mermaid to atlas root (same location the viewer template expects)
|
|
1007
|
+
destination = atlas_dir / MERMAID_ASSET_NAME
|
|
1008
|
+
shutil.copyfile(MERMAID_VENDOR_PATH, destination)
|
|
1009
|
+
return [destination]
|
|
1010
|
+
|
|
1011
|
+
|
|
1012
|
+
# ---------------------------------------------------------------------------
|
|
1013
|
+
# Main build
|
|
1014
|
+
# ---------------------------------------------------------------------------
|
|
1015
|
+
def build_atlas(
|
|
1016
|
+
root: Path,
|
|
1017
|
+
atlas_dir: Path,
|
|
1018
|
+
scan_roots: list[Path] | None = None,
|
|
1019
|
+
exclude_frags: list[str] | None = None,
|
|
1020
|
+
verbose: bool = False,
|
|
1021
|
+
) -> dict[str, Any]:
|
|
1022
|
+
generated = _now_utc()
|
|
1023
|
+
frags = exclude_frags if exclude_frags is not None else DEFAULT_EXCLUDE_FRAGS
|
|
1024
|
+
roots = scan_roots if scan_roots else [root]
|
|
1025
|
+
|
|
1026
|
+
print(f"[atlas] Project root: {root}")
|
|
1027
|
+
print(f"[atlas] Output dir: {atlas_dir}")
|
|
1028
|
+
print(f"[atlas] Scan roots: {[str(r) for r in roots]}")
|
|
1029
|
+
|
|
1030
|
+
atlas_dir.mkdir(parents=True, exist_ok=True)
|
|
1031
|
+
|
|
1032
|
+
print("[atlas] Scanning markdown files...")
|
|
1033
|
+
docs, state, stats = build_docs_incremental(
|
|
1034
|
+
root=root,
|
|
1035
|
+
atlas_dir=atlas_dir,
|
|
1036
|
+
generated=generated,
|
|
1037
|
+
scan_roots=roots,
|
|
1038
|
+
exclude_frags=frags,
|
|
1039
|
+
)
|
|
1040
1040
|
print(f"[atlas] Indexed {len(docs)} documents.")
|
|
1041
1041
|
print(
|
|
1042
1042
|
f"[atlas] Scan coverage: scanned {stats.get('scanned_count', len(docs))}, "
|
|
@@ -1050,7 +1050,7 @@ def build_atlas(
|
|
|
1050
1050
|
)
|
|
1051
1051
|
for skipped in stats.get("skipped_files", []):
|
|
1052
1052
|
print(f"[atlas] Skipped markdown: {skipped['path']} ({skipped['reason']})")
|
|
1053
|
-
|
|
1053
|
+
|
|
1054
1054
|
print("[atlas] Building graph...")
|
|
1055
1055
|
graph = build_graph(docs)
|
|
1056
1056
|
print(f"[atlas] Graph: {len(graph['nodes'])} nodes, {len(graph['edges'])} edges.")
|
|
@@ -1063,45 +1063,45 @@ def build_atlas(
|
|
|
1063
1063
|
scan_roots=roots,
|
|
1064
1064
|
)
|
|
1065
1065
|
print(f"[atlas] Wiki pages: {wiki_result['page_count']}")
|
|
1066
|
-
|
|
1067
|
-
index_data = {
|
|
1068
|
-
"generated": generated,
|
|
1069
|
-
"count": len(docs),
|
|
1070
|
-
"documents": docs,
|
|
1071
|
-
}
|
|
1072
|
-
|
|
1073
|
-
save_atlas_state(state, atlas_dir=atlas_dir)
|
|
1074
|
-
|
|
1075
|
-
index_path = atlas_dir / "SDTK_DOC_INDEX.json"
|
|
1076
|
-
_write_text_lf(index_path, json.dumps(index_data, ensure_ascii=True, indent=2, sort_keys=False))
|
|
1077
|
-
|
|
1078
|
-
graph_out = {
|
|
1079
|
-
"generated": generated,
|
|
1080
|
-
"node_count": len(graph["nodes"]),
|
|
1081
|
-
"edge_count": len(graph["edges"]),
|
|
1082
|
-
"nodes": graph["nodes"],
|
|
1083
|
-
"edges": graph["edges"],
|
|
1084
|
-
}
|
|
1085
|
-
graph_path = atlas_dir / "SDTK_DOC_GRAPH.json"
|
|
1086
|
-
_write_text_lf(graph_path, json.dumps(graph_out, ensure_ascii=True, indent=2, sort_keys=False))
|
|
1087
|
-
|
|
1088
|
-
summary_text = build_summary(docs, graph, generated, stats=stats, root=root, scan_roots=roots, exclude_frags=frags)
|
|
1089
|
-
summary_path = atlas_dir / "SDTK_DOC_ATLAS_SUMMARY.md"
|
|
1090
|
-
_write_text_lf(summary_path, summary_text)
|
|
1091
|
-
|
|
1092
|
-
viewer_html = build_viewer(index_data, graph_out, generated)
|
|
1093
|
-
viewer_path = atlas_dir / "viewer.html"
|
|
1094
|
-
_write_text_lf(viewer_path, viewer_html)
|
|
1095
|
-
|
|
1096
|
-
for asset_path in copy_viewer_assets(atlas_dir=atlas_dir):
|
|
1097
|
-
if verbose:
|
|
1098
|
-
print(f"[atlas] Wrote asset: {asset_path.name}")
|
|
1099
|
-
|
|
1100
|
-
print(f"[atlas] Done. Output: {atlas_dir}")
|
|
1101
|
-
return {
|
|
1102
|
-
"generated": generated,
|
|
1103
|
-
"doc_count": len(docs),
|
|
1104
|
-
"node_count": len(graph["nodes"]),
|
|
1066
|
+
|
|
1067
|
+
index_data = {
|
|
1068
|
+
"generated": generated,
|
|
1069
|
+
"count": len(docs),
|
|
1070
|
+
"documents": docs,
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
save_atlas_state(state, atlas_dir=atlas_dir)
|
|
1074
|
+
|
|
1075
|
+
index_path = atlas_dir / "SDTK_DOC_INDEX.json"
|
|
1076
|
+
_write_text_lf(index_path, json.dumps(index_data, ensure_ascii=True, indent=2, sort_keys=False))
|
|
1077
|
+
|
|
1078
|
+
graph_out = {
|
|
1079
|
+
"generated": generated,
|
|
1080
|
+
"node_count": len(graph["nodes"]),
|
|
1081
|
+
"edge_count": len(graph["edges"]),
|
|
1082
|
+
"nodes": graph["nodes"],
|
|
1083
|
+
"edges": graph["edges"],
|
|
1084
|
+
}
|
|
1085
|
+
graph_path = atlas_dir / "SDTK_DOC_GRAPH.json"
|
|
1086
|
+
_write_text_lf(graph_path, json.dumps(graph_out, ensure_ascii=True, indent=2, sort_keys=False))
|
|
1087
|
+
|
|
1088
|
+
summary_text = build_summary(docs, graph, generated, stats=stats, root=root, scan_roots=roots, exclude_frags=frags)
|
|
1089
|
+
summary_path = atlas_dir / "SDTK_DOC_ATLAS_SUMMARY.md"
|
|
1090
|
+
_write_text_lf(summary_path, summary_text)
|
|
1091
|
+
|
|
1092
|
+
viewer_html = build_viewer(index_data, graph_out, generated)
|
|
1093
|
+
viewer_path = atlas_dir / "viewer.html"
|
|
1094
|
+
_write_text_lf(viewer_path, viewer_html)
|
|
1095
|
+
|
|
1096
|
+
for asset_path in copy_viewer_assets(atlas_dir=atlas_dir):
|
|
1097
|
+
if verbose:
|
|
1098
|
+
print(f"[atlas] Wrote asset: {asset_path.name}")
|
|
1099
|
+
|
|
1100
|
+
print(f"[atlas] Done. Output: {atlas_dir}")
|
|
1101
|
+
return {
|
|
1102
|
+
"generated": generated,
|
|
1103
|
+
"doc_count": len(docs),
|
|
1104
|
+
"node_count": len(graph["nodes"]),
|
|
1105
1105
|
"edge_count": len(graph["edges"]),
|
|
1106
1106
|
"stats": stats,
|
|
1107
1107
|
"atlas_dir": str(atlas_dir),
|
|
@@ -1112,84 +1112,84 @@ def build_atlas(
|
|
|
1112
1112
|
"changes_path": wiki_result["changes_path"],
|
|
1113
1113
|
"changes": wiki_result["changes"],
|
|
1114
1114
|
}
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
# ---------------------------------------------------------------------------
|
|
1118
|
-
# CLI entry point
|
|
1119
|
-
# ---------------------------------------------------------------------------
|
|
1120
|
-
def main() -> int:
|
|
1121
|
-
parser = argparse.ArgumentParser(
|
|
1115
|
+
|
|
1116
|
+
|
|
1117
|
+
# ---------------------------------------------------------------------------
|
|
1118
|
+
# CLI entry point
|
|
1119
|
+
# ---------------------------------------------------------------------------
|
|
1120
|
+
def main() -> int:
|
|
1121
|
+
parser = argparse.ArgumentParser(
|
|
1122
1122
|
description="SDTK-WIKI Builder -- build a local document graph, wiki pages, and viewer.",
|
|
1123
|
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
1124
|
-
)
|
|
1125
|
-
parser.add_argument(
|
|
1126
|
-
"--project-root",
|
|
1127
|
-
required=True,
|
|
1128
|
-
help="Absolute path to the project root to scan.",
|
|
1129
|
-
)
|
|
1130
|
-
parser.add_argument(
|
|
1131
|
-
"--output-dir",
|
|
1132
|
-
required=True,
|
|
1133
|
-
help="Directory to write atlas artifacts into.",
|
|
1134
|
-
)
|
|
1135
|
-
parser.add_argument(
|
|
1136
|
-
"--scan-root",
|
|
1137
|
-
dest="scan_roots",
|
|
1138
|
-
action="append",
|
|
1139
|
-
metavar="PATH",
|
|
1140
|
-
default=None,
|
|
1141
|
-
help="Explicit scan root (repeatable). Defaults to project root.",
|
|
1142
|
-
)
|
|
1143
|
-
parser.add_argument(
|
|
1144
|
-
"--exclude",
|
|
1145
|
-
dest="excludes",
|
|
1146
|
-
action="append",
|
|
1147
|
-
metavar="FRAG",
|
|
1148
|
-
default=None,
|
|
1149
|
-
help="Exclusion path fragment (repeatable). Defaults to standard set.",
|
|
1150
|
-
)
|
|
1151
|
-
parser.add_argument(
|
|
1152
|
-
"--verbose",
|
|
1153
|
-
action="store_true",
|
|
1154
|
-
default=False,
|
|
1155
|
-
help="Show incremental build detail.",
|
|
1156
|
-
)
|
|
1157
|
-
|
|
1158
|
-
args = parser.parse_args()
|
|
1159
|
-
|
|
1160
|
-
root = Path(args.project_root).resolve()
|
|
1161
|
-
if not root.is_dir():
|
|
1162
|
-
print(f"[atlas] ERROR: --project-root is not a directory: {root}", file=sys.stderr)
|
|
1163
|
-
return 1
|
|
1164
|
-
|
|
1165
|
-
atlas_dir = Path(args.output_dir).resolve()
|
|
1166
|
-
|
|
1167
|
-
scan_roots: list[Path] | None = None
|
|
1168
|
-
if args.scan_roots:
|
|
1169
|
-
scan_roots = [Path(sr).resolve() for sr in args.scan_roots]
|
|
1170
|
-
|
|
1171
|
-
excludes: list[str] | None = None
|
|
1172
|
-
if args.excludes:
|
|
1173
|
-
excludes = args.excludes
|
|
1174
|
-
|
|
1175
|
-
try:
|
|
1176
|
-
result = build_atlas(
|
|
1177
|
-
root=root,
|
|
1178
|
-
atlas_dir=atlas_dir,
|
|
1179
|
-
scan_roots=scan_roots,
|
|
1180
|
-
exclude_frags=excludes,
|
|
1181
|
-
verbose=args.verbose,
|
|
1182
|
-
)
|
|
1183
|
-
# Print JSON summary to stdout for Node CLI to parse
|
|
1184
|
-
print(f"[atlas:result] {json.dumps(result)}")
|
|
1185
|
-
return 0
|
|
1186
|
-
except FileNotFoundError as e:
|
|
1187
|
-
print(f"[atlas] ERROR: {e}", file=sys.stderr)
|
|
1188
|
-
return 2
|
|
1189
|
-
except Exception as e:
|
|
1190
|
-
print(f"[atlas] ERROR: {e}", file=sys.stderr)
|
|
1191
|
-
return 1
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
if __name__ == "__main__":
|
|
1195
|
-
sys.exit(main())
|
|
1123
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
1124
|
+
)
|
|
1125
|
+
parser.add_argument(
|
|
1126
|
+
"--project-root",
|
|
1127
|
+
required=True,
|
|
1128
|
+
help="Absolute path to the project root to scan.",
|
|
1129
|
+
)
|
|
1130
|
+
parser.add_argument(
|
|
1131
|
+
"--output-dir",
|
|
1132
|
+
required=True,
|
|
1133
|
+
help="Directory to write atlas artifacts into.",
|
|
1134
|
+
)
|
|
1135
|
+
parser.add_argument(
|
|
1136
|
+
"--scan-root",
|
|
1137
|
+
dest="scan_roots",
|
|
1138
|
+
action="append",
|
|
1139
|
+
metavar="PATH",
|
|
1140
|
+
default=None,
|
|
1141
|
+
help="Explicit scan root (repeatable). Defaults to project root.",
|
|
1142
|
+
)
|
|
1143
|
+
parser.add_argument(
|
|
1144
|
+
"--exclude",
|
|
1145
|
+
dest="excludes",
|
|
1146
|
+
action="append",
|
|
1147
|
+
metavar="FRAG",
|
|
1148
|
+
default=None,
|
|
1149
|
+
help="Exclusion path fragment (repeatable). Defaults to standard set.",
|
|
1150
|
+
)
|
|
1151
|
+
parser.add_argument(
|
|
1152
|
+
"--verbose",
|
|
1153
|
+
action="store_true",
|
|
1154
|
+
default=False,
|
|
1155
|
+
help="Show incremental build detail.",
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1158
|
+
args = parser.parse_args()
|
|
1159
|
+
|
|
1160
|
+
root = Path(args.project_root).resolve()
|
|
1161
|
+
if not root.is_dir():
|
|
1162
|
+
print(f"[atlas] ERROR: --project-root is not a directory: {root}", file=sys.stderr)
|
|
1163
|
+
return 1
|
|
1164
|
+
|
|
1165
|
+
atlas_dir = Path(args.output_dir).resolve()
|
|
1166
|
+
|
|
1167
|
+
scan_roots: list[Path] | None = None
|
|
1168
|
+
if args.scan_roots:
|
|
1169
|
+
scan_roots = [Path(sr).resolve() for sr in args.scan_roots]
|
|
1170
|
+
|
|
1171
|
+
excludes: list[str] | None = None
|
|
1172
|
+
if args.excludes:
|
|
1173
|
+
excludes = args.excludes
|
|
1174
|
+
|
|
1175
|
+
try:
|
|
1176
|
+
result = build_atlas(
|
|
1177
|
+
root=root,
|
|
1178
|
+
atlas_dir=atlas_dir,
|
|
1179
|
+
scan_roots=scan_roots,
|
|
1180
|
+
exclude_frags=excludes,
|
|
1181
|
+
verbose=args.verbose,
|
|
1182
|
+
)
|
|
1183
|
+
# Print JSON summary to stdout for Node CLI to parse
|
|
1184
|
+
print(f"[atlas:result] {json.dumps(result)}")
|
|
1185
|
+
return 0
|
|
1186
|
+
except FileNotFoundError as e:
|
|
1187
|
+
print(f"[atlas] ERROR: {e}", file=sys.stderr)
|
|
1188
|
+
return 2
|
|
1189
|
+
except Exception as e:
|
|
1190
|
+
print(f"[atlas] ERROR: {e}", file=sys.stderr)
|
|
1191
|
+
return 1
|
|
1192
|
+
|
|
1193
|
+
|
|
1194
|
+
if __name__ == "__main__":
|
|
1195
|
+
sys.exit(main())
|