sdtk-wiki-kit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +262 -0
- package/assets/atlas/build_atlas.py +1110 -0
- package/assets/atlas/doc_atlas_viewer_template.html +3796 -0
- package/assets/atlas/vendor/mermaid.min.js +2029 -0
- package/assets/keys/sdtk-entitlement-public.pem +11 -0
- package/bin/sdtk-wiki.js +14 -0
- package/package.json +45 -0
- package/src/commands/ask.js +139 -0
- package/src/commands/atlas.js +339 -0
- package/src/commands/deferred.js +14 -0
- package/src/commands/help.js +67 -0
- package/src/commands/init.js +91 -0
- package/src/commands/lint.js +48 -0
- package/src/commands/wiki.js +251 -0
- package/src/index.js +65 -0
- package/src/lib/args.js +68 -0
- package/src/lib/browser-open.js +32 -0
- package/src/lib/errors.js +29 -0
- package/src/lib/wiki-ask.js +175 -0
- package/src/lib/wiki-compile.js +287 -0
- package/src/lib/wiki-config.js +180 -0
- package/src/lib/wiki-discover.js +271 -0
- package/src/lib/wiki-flags.js +89 -0
- package/src/lib/wiki-ingest.js +198 -0
- package/src/lib/wiki-lint.js +468 -0
- package/src/lib/wiki-paths.js +169 -0
- package/src/lib/wiki-premium-loader.js +364 -0
- package/src/lib/wiki-prune.js +334 -0
- package/src/lib/wiki-query-history.js +111 -0
- package/src/lib/wiki-runner.js +373 -0
- package/src/lib/wiki-workspace.js +144 -0
|
@@ -0,0 +1,1110 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SDTK-WIKI Builder -- generic local-project edition.
|
|
4
|
+
|
|
5
|
+
Scans markdown files under configured scan roots, builds a document index
|
|
6
|
+
and graph, and generates a static local viewer.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python build_atlas.py --project-root <path> --output-dir <path>
|
|
10
|
+
[--scan-root <path> ...] [--exclude <frag> ...]
|
|
11
|
+
[--verbose]
|
|
12
|
+
|
|
13
|
+
Outputs (written to <output-dir>/):
|
|
14
|
+
ATLAS_STATE.json - incremental scan/build state
|
|
15
|
+
SDTK_DOC_INDEX.json - full document index
|
|
16
|
+
SDTK_DOC_GRAPH.json - nodes + typed edges
|
|
17
|
+
SDTK_DOC_ATLAS_SUMMARY.md - human-readable summary
|
|
18
|
+
viewer.html - static local viewer (data embedded)
|
|
19
|
+
vendor/mermaid.min.js - vendored viewer asset
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
import hashlib
|
|
26
|
+
import json
|
|
27
|
+
import re
|
|
28
|
+
import shutil
|
|
29
|
+
import sys
|
|
30
|
+
from datetime import datetime, timezone
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
ATLAS_STATE_VERSION = 6
|
|
35
|
+
WIKI_PAGE_SCHEMA_VERSION = 1
|
|
36
|
+
WIKI_PROVENANCE_SCHEMA_VERSION = 1
|
|
37
|
+
MERMAID_VENDOR_PATH = Path(__file__).parent / "vendor" / "mermaid.min.js"
|
|
38
|
+
MERMAID_ASSET_NAME = "mermaid.min.js"
|
|
39
|
+
_VIEWER_TEMPLATE_PATH = Path(__file__).parent / "doc_atlas_viewer_template.html"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _json_for_inline_script(value: Any) -> str:
|
|
43
|
+
return (
|
|
44
|
+
json.dumps(value, ensure_ascii=True, separators=(",", ":"))
|
|
45
|
+
.replace("</", "<\\/")
|
|
46
|
+
.replace("<!--", "<\\!--")
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
# Default consumer project exclude fragments
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
DEFAULT_EXCLUDE_FRAGS: list[str] = [
|
|
53
|
+
".git",
|
|
54
|
+
".sdtk/wiki",
|
|
55
|
+
".sdtk/atlas",
|
|
56
|
+
"node_modules",
|
|
57
|
+
".venv",
|
|
58
|
+
"venv",
|
|
59
|
+
"dist",
|
|
60
|
+
"build",
|
|
61
|
+
"coverage",
|
|
62
|
+
".next",
|
|
63
|
+
".turbo",
|
|
64
|
+
".cache",
|
|
65
|
+
"__pycache__",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# Reference patterns
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
RE_BK = re.compile(r"\bBK-(\d{3,})\b")
|
|
72
|
+
RE_KNOWLEDGE_ID = re.compile(r"\b(KD|KT|KP|KA|KR|KRB|KF)-(\d{4})\b")
|
|
73
|
+
RE_REPO_PATH = re.compile(
|
|
74
|
+
r"(?:^|[\s`(\[])([a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-. ]+)+\."
|
|
75
|
+
r"(?:md|py|ps1|json|yaml|yml|html|txt))"
|
|
76
|
+
)
|
|
77
|
+
RE_WIKI_LINK = re.compile(r"\[\[([^\]]+)\]\]")
|
|
78
|
+
RE_MARKDOWN_LINK = re.compile(r"(?<!!)\[[^\]]+\]\(([^)]+)\)")
|
|
79
|
+
RE_SKILL_REF = re.compile(r"\b(sdtk-[a-z0-9][a-z0-9-]*)\b")
|
|
80
|
+
RE_RELEASE_REF = re.compile(r"\b(?:sdtk-spec-kit@)?(0\.\d+\.\d+)\b")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
# Generic doc-family classifier (project-scope, no maintainer assumptions)
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
def classify_family(rel: str) -> str:
|
|
87
|
+
p = rel.replace("\\", "/").lower()
|
|
88
|
+
name = Path(rel).name.lower()
|
|
89
|
+
is_guide_path = p.startswith("guides/") or "/guides/" in p
|
|
90
|
+
if p == "readme.md":
|
|
91
|
+
return "root-readme"
|
|
92
|
+
if "backlog" in name:
|
|
93
|
+
return "backlog"
|
|
94
|
+
if "skills" in p:
|
|
95
|
+
return "skill"
|
|
96
|
+
if "templates" in p:
|
|
97
|
+
return "template"
|
|
98
|
+
if "docs/database" in p or "database/" in p:
|
|
99
|
+
return "database"
|
|
100
|
+
if "docs/specs" in p or "specs/" in p:
|
|
101
|
+
return "spec"
|
|
102
|
+
if "docs/architecture" in p or "architecture/" in p:
|
|
103
|
+
return "architecture"
|
|
104
|
+
if "docs/api" in p or "api/" in p:
|
|
105
|
+
return "api"
|
|
106
|
+
if "docs/qa" in p or "qa/" in p:
|
|
107
|
+
return "qa"
|
|
108
|
+
if "docs/design" in p or "design/" in p:
|
|
109
|
+
return "design"
|
|
110
|
+
if "docs/dev" in p or "dev/" in p:
|
|
111
|
+
return "dev"
|
|
112
|
+
if "docs/product" in p or "product/" in p:
|
|
113
|
+
return "product"
|
|
114
|
+
if is_guide_path:
|
|
115
|
+
return "guide"
|
|
116
|
+
if "governance" in p:
|
|
117
|
+
return "governance"
|
|
118
|
+
return "other-markdown"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def classify_role(rel: str) -> str:
|
|
122
|
+
p = rel.replace("\\", "/").lower()
|
|
123
|
+
if "governance" in p:
|
|
124
|
+
return "governance"
|
|
125
|
+
if "spec" in p or "architecture" in p:
|
|
126
|
+
return "spec-artifact"
|
|
127
|
+
if "skill" in p:
|
|
128
|
+
return "skill"
|
|
129
|
+
return "other"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ---------------------------------------------------------------------------
|
|
133
|
+
# Scanner helpers
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
def _now_utc() -> str:
|
|
136
|
+
return datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _write_text_lf(path: Path, content: str) -> None:
|
|
140
|
+
path.write_text(content, encoding="utf-8", newline="\n")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _assert_inside(base: Path, target: Path) -> None:
|
|
144
|
+
resolved_base = base.resolve()
|
|
145
|
+
resolved_target = target.resolve()
|
|
146
|
+
if resolved_target != resolved_base and resolved_base not in resolved_target.parents:
|
|
147
|
+
raise ValueError(f"Refusing to write outside SDTK-WIKI workspace: {resolved_target}")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _is_excluded(
|
|
151
|
+
path: Path,
|
|
152
|
+
root: Path,
|
|
153
|
+
exclude_frags: list[str],
|
|
154
|
+
) -> bool:
|
|
155
|
+
try:
|
|
156
|
+
rel = path.relative_to(root).as_posix().lower()
|
|
157
|
+
except ValueError:
|
|
158
|
+
rel = path.as_posix().lower()
|
|
159
|
+
for frag in exclude_frags:
|
|
160
|
+
norm_frag = frag.replace("\\", "/").lower()
|
|
161
|
+
if norm_frag in rel:
|
|
162
|
+
return True
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _extract_title(text: str) -> str:
|
|
167
|
+
for line in text.splitlines():
|
|
168
|
+
stripped = line.strip()
|
|
169
|
+
if stripped.startswith("# "):
|
|
170
|
+
return stripped[2:].strip()
|
|
171
|
+
return ""
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _extract_headings(text: str) -> list[str]:
|
|
175
|
+
headings: list[str] = []
|
|
176
|
+
for line in text.splitlines():
|
|
177
|
+
stripped = line.strip()
|
|
178
|
+
if not stripped.startswith("#"):
|
|
179
|
+
continue
|
|
180
|
+
level = len(stripped) - len(stripped.lstrip("#"))
|
|
181
|
+
if 1 <= level <= 6 and len(stripped) > level and stripped[level] == " ":
|
|
182
|
+
headings.append(stripped[level + 1:].strip())
|
|
183
|
+
return headings
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
|
|
187
|
+
lines = text.splitlines()
|
|
188
|
+
if not lines or lines[0].strip() != "---":
|
|
189
|
+
return {}, text
|
|
190
|
+
|
|
191
|
+
fields: dict[str, Any] = {}
|
|
192
|
+
current_list_key: str | None = None
|
|
193
|
+
for idx in range(1, len(lines)):
|
|
194
|
+
raw = lines[idx]
|
|
195
|
+
stripped = raw.strip()
|
|
196
|
+
if stripped in {"---", "..."}:
|
|
197
|
+
body = "\n".join(lines[idx + 1:])
|
|
198
|
+
if text.endswith("\n"):
|
|
199
|
+
body += "\n"
|
|
200
|
+
return fields, body
|
|
201
|
+
if not stripped:
|
|
202
|
+
current_list_key = None
|
|
203
|
+
continue
|
|
204
|
+
if stripped.startswith("- ") and current_list_key and isinstance(fields.get(current_list_key), list):
|
|
205
|
+
fields[current_list_key].append(stripped[2:].strip().strip('"\''))
|
|
206
|
+
continue
|
|
207
|
+
if ":" not in raw:
|
|
208
|
+
current_list_key = None
|
|
209
|
+
continue
|
|
210
|
+
key, value = raw.split(":", 1)
|
|
211
|
+
key = key.strip()
|
|
212
|
+
value = value.strip()
|
|
213
|
+
if not key:
|
|
214
|
+
current_list_key = None
|
|
215
|
+
continue
|
|
216
|
+
if not value:
|
|
217
|
+
fields[key] = []
|
|
218
|
+
current_list_key = key
|
|
219
|
+
continue
|
|
220
|
+
if value.startswith("[") and value.endswith("]"):
|
|
221
|
+
inner = value[1:-1].strip()
|
|
222
|
+
if inner:
|
|
223
|
+
fields[key] = [part.strip().strip('"\'') for part in inner.split(",") if part.strip()]
|
|
224
|
+
else:
|
|
225
|
+
fields[key] = []
|
|
226
|
+
current_list_key = None
|
|
227
|
+
continue
|
|
228
|
+
fields[key] = value.strip('"\'')
|
|
229
|
+
current_list_key = None
|
|
230
|
+
|
|
231
|
+
return {}, text
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _normalize_internal_ref(raw: str) -> str:
|
|
235
|
+
value = raw.strip()
|
|
236
|
+
if not value:
|
|
237
|
+
return ""
|
|
238
|
+
value = value.split("|", 1)[0].strip()
|
|
239
|
+
value = value.split("#", 1)[0].strip()
|
|
240
|
+
value = value.replace("\\", "/")
|
|
241
|
+
while value.startswith("./"):
|
|
242
|
+
value = value[2:]
|
|
243
|
+
if value.startswith("/"):
|
|
244
|
+
value = value[1:]
|
|
245
|
+
return value.strip()
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _extract_references(text: str) -> tuple[list[str], list[str], list[str]]:
|
|
249
|
+
issues = sorted(set(f"BK-{m}" for m in RE_BK.findall(text)))
|
|
250
|
+
knowledge_ids = sorted(
|
|
251
|
+
set(f"{m[0]}-{m[1]}" for m in RE_KNOWLEDGE_ID.findall(text))
|
|
252
|
+
)
|
|
253
|
+
raw_paths = RE_REPO_PATH.findall(text)
|
|
254
|
+
paths: list[str] = []
|
|
255
|
+
seen: set[str] = set()
|
|
256
|
+
for rp in raw_paths:
|
|
257
|
+
normalised = _normalize_internal_ref(rp)
|
|
258
|
+
if normalised and normalised not in seen:
|
|
259
|
+
seen.add(normalised)
|
|
260
|
+
paths.append(normalised)
|
|
261
|
+
return issues, knowledge_ids, paths
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _extract_wiki_links(text: str) -> list[str]:
|
|
265
|
+
links: list[str] = []
|
|
266
|
+
seen: set[str] = set()
|
|
267
|
+
for raw in RE_WIKI_LINK.findall(text):
|
|
268
|
+
normalised = _normalize_internal_ref(raw)
|
|
269
|
+
if normalised and normalised not in seen:
|
|
270
|
+
seen.add(normalised)
|
|
271
|
+
links.append(normalised)
|
|
272
|
+
return links
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _extract_markdown_links(text: str) -> list[str]:
|
|
276
|
+
links: list[str] = []
|
|
277
|
+
seen: set[str] = set()
|
|
278
|
+
for raw in RE_MARKDOWN_LINK.findall(text):
|
|
279
|
+
target = raw.strip().strip('<>')
|
|
280
|
+
lower = target.lower()
|
|
281
|
+
if not target or lower.startswith(("http://", "https://", "mailto:", "#")) or "://" in target:
|
|
282
|
+
continue
|
|
283
|
+
# Markdown links may include optional titles: [x](path.md "title").
|
|
284
|
+
if ' "' in target:
|
|
285
|
+
target = target.split(' "', 1)[0]
|
|
286
|
+
if " '" in target:
|
|
287
|
+
target = target.split(" '", 1)[0]
|
|
288
|
+
normalised = _normalize_internal_ref(target)
|
|
289
|
+
if normalised and normalised not in seen:
|
|
290
|
+
seen.add(normalised)
|
|
291
|
+
links.append(normalised)
|
|
292
|
+
return links
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _extract_skill_refs(text: str, path_refs: list[str], wiki_links: list[str]) -> list[str]:
|
|
296
|
+
refs = set(match.lower() for match in RE_SKILL_REF.findall(text))
|
|
297
|
+
for ref in path_refs + wiki_links:
|
|
298
|
+
parts = [part for part in ref.split("/") if part]
|
|
299
|
+
for marker in ("skills", "skills-claude"):
|
|
300
|
+
if marker in parts:
|
|
301
|
+
idx = parts.index(marker)
|
|
302
|
+
if idx + 1 < len(parts):
|
|
303
|
+
refs.add(parts[idx + 1].lower())
|
|
304
|
+
return sorted(refs)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _extract_template_refs(path_refs: list[str], wiki_links: list[str]) -> list[str]:
|
|
308
|
+
refs: set[str] = set()
|
|
309
|
+
for ref in path_refs + wiki_links:
|
|
310
|
+
norm = _normalize_internal_ref(ref)
|
|
311
|
+
if "/templates/" in f"/{norm}":
|
|
312
|
+
refs.add(norm)
|
|
313
|
+
return sorted(refs)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _extract_release_refs(text: str) -> list[str]:
|
|
317
|
+
return sorted(set(RE_RELEASE_REF.findall(text)))
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _compute_file_hash(md_file: Path) -> str:
|
|
321
|
+
content = md_file.read_bytes()
|
|
322
|
+
return hashlib.sha256(content).hexdigest()
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _parse_doc_record(md_file: Path, root: Path) -> dict[str, Any]:
|
|
326
|
+
rel = md_file.relative_to(root).as_posix()
|
|
327
|
+
text = md_file.read_text(encoding="utf-8", errors="replace")
|
|
328
|
+
frontmatter_fields, body_text = _parse_frontmatter(text)
|
|
329
|
+
title = str(
|
|
330
|
+
frontmatter_fields.get("title")
|
|
331
|
+
or _extract_title(body_text)
|
|
332
|
+
or md_file.stem.replace("_", " ").replace("-", " ")
|
|
333
|
+
)
|
|
334
|
+
headings = _extract_headings(body_text)
|
|
335
|
+
issues, knowledge_ids, path_refs = _extract_references(text)
|
|
336
|
+
wiki_links = _extract_wiki_links(text)
|
|
337
|
+
markdown_links = _extract_markdown_links(text)
|
|
338
|
+
path_refs = sorted(set(path_refs + markdown_links))
|
|
339
|
+
family = classify_family(rel)
|
|
340
|
+
role = classify_role(rel)
|
|
341
|
+
skill_refs = _extract_skill_refs(text, path_refs, wiki_links)
|
|
342
|
+
template_refs = _extract_template_refs(path_refs, wiki_links)
|
|
343
|
+
release_refs = _extract_release_refs(text)
|
|
344
|
+
return {
|
|
345
|
+
"id": rel,
|
|
346
|
+
"path": rel,
|
|
347
|
+
"title": title,
|
|
348
|
+
"family": family,
|
|
349
|
+
"role": role,
|
|
350
|
+
"trust_zone": "medium",
|
|
351
|
+
"body_markdown": body_text,
|
|
352
|
+
"issues": issues,
|
|
353
|
+
"knowledge_ids": knowledge_ids,
|
|
354
|
+
"headings": headings,
|
|
355
|
+
"frontmatter_fields": frontmatter_fields,
|
|
356
|
+
"skill_refs": skill_refs,
|
|
357
|
+
"template_refs": template_refs,
|
|
358
|
+
"release_refs": release_refs,
|
|
359
|
+
"lane_refs": [],
|
|
360
|
+
"wiki_links": wiki_links,
|
|
361
|
+
"path_refs": path_refs,
|
|
362
|
+
"outgoing_paths": path_refs,
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def list_indexable_markdown_files(
|
|
367
|
+
root: Path,
|
|
368
|
+
scan_roots: list[Path],
|
|
369
|
+
exclude_frags: list[str],
|
|
370
|
+
) -> list[Path]:
|
|
371
|
+
files: list[Path] = []
|
|
372
|
+
seen_paths: set[str] = set()
|
|
373
|
+
|
|
374
|
+
for scan_root in scan_roots:
|
|
375
|
+
if not scan_root.exists():
|
|
376
|
+
print(f"[atlas] Warning: scan root does not exist, skipping: {scan_root}", file=sys.stderr)
|
|
377
|
+
continue
|
|
378
|
+
if scan_root.is_file() and scan_root.suffix.lower() == ".md":
|
|
379
|
+
candidates = [scan_root]
|
|
380
|
+
elif scan_root.is_dir():
|
|
381
|
+
candidates = [p for p in sorted(scan_root.rglob("*.md")) if p.is_file()]
|
|
382
|
+
else:
|
|
383
|
+
candidates = []
|
|
384
|
+
|
|
385
|
+
for md_file in candidates:
|
|
386
|
+
if _is_excluded(md_file, root=root, exclude_frags=exclude_frags):
|
|
387
|
+
continue
|
|
388
|
+
try:
|
|
389
|
+
rel = md_file.relative_to(root).as_posix()
|
|
390
|
+
except ValueError:
|
|
391
|
+
rel = md_file.as_posix()
|
|
392
|
+
if rel in seen_paths:
|
|
393
|
+
continue
|
|
394
|
+
seen_paths.add(rel)
|
|
395
|
+
files.append(md_file)
|
|
396
|
+
|
|
397
|
+
files.sort(key=lambda p: p.as_posix())
|
|
398
|
+
return files
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
# ---------------------------------------------------------------------------
|
|
402
|
+
# Incremental build
|
|
403
|
+
# ---------------------------------------------------------------------------
|
|
404
|
+
def _empty_atlas_state() -> dict[str, Any]:
|
|
405
|
+
return {"version": ATLAS_STATE_VERSION, "documents": {}}
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _atlas_state_path(atlas_dir: Path) -> Path:
|
|
409
|
+
return atlas_dir / "ATLAS_STATE.json"
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def load_atlas_state(atlas_dir: Path) -> dict[str, Any]:
|
|
413
|
+
state_path = _atlas_state_path(atlas_dir)
|
|
414
|
+
if not state_path.exists():
|
|
415
|
+
return _empty_atlas_state()
|
|
416
|
+
try:
|
|
417
|
+
data = json.loads(state_path.read_text(encoding="utf-8"))
|
|
418
|
+
except (OSError, json.JSONDecodeError):
|
|
419
|
+
return _empty_atlas_state()
|
|
420
|
+
if not isinstance(data, dict):
|
|
421
|
+
return _empty_atlas_state()
|
|
422
|
+
if data.get("version") != ATLAS_STATE_VERSION:
|
|
423
|
+
return _empty_atlas_state()
|
|
424
|
+
documents = data.get("documents")
|
|
425
|
+
if not isinstance(documents, dict):
|
|
426
|
+
return _empty_atlas_state()
|
|
427
|
+
return {"version": ATLAS_STATE_VERSION, "generated": data.get("generated"), "documents": documents}
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def save_atlas_state(state: dict[str, Any], atlas_dir: Path) -> Path:
|
|
431
|
+
atlas_dir.mkdir(parents=True, exist_ok=True)
|
|
432
|
+
state_path = _atlas_state_path(atlas_dir)
|
|
433
|
+
_write_text_lf(state_path, json.dumps(state, ensure_ascii=True, indent=2, sort_keys=False))
|
|
434
|
+
return state_path
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
# ---------------------------------------------------------------------------
|
|
438
|
+
# Wiki pages and provenance
|
|
439
|
+
# ---------------------------------------------------------------------------
|
|
440
|
+
def _wiki_workspace_root(root: Path) -> Path:
|
|
441
|
+
return root / ".sdtk" / "wiki"
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _wiki_pages_root(root: Path) -> Path:
|
|
445
|
+
return _wiki_workspace_root(root) / "pages"
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _wiki_provenance_root(root: Path) -> Path:
|
|
449
|
+
return _wiki_workspace_root(root) / "provenance"
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _stable_page_id(source_path: str) -> str:
|
|
453
|
+
norm = source_path.replace("\\", "/")
|
|
454
|
+
digest = hashlib.sha256(norm.encode("utf-8")).hexdigest()
|
|
455
|
+
return f"wiki:{digest[:16]}"
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def _safe_slug(value: str) -> str:
|
|
459
|
+
slug = value.strip().lower()
|
|
460
|
+
slug = re.sub(r"[^a-z0-9]+", "-", slug)
|
|
461
|
+
slug = slug.strip("-")
|
|
462
|
+
return slug or "page"
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _page_relative_path(doc: dict[str, Any]) -> str:
|
|
466
|
+
source_path = str(doc["path"]).replace("\\", "/")
|
|
467
|
+
source_digest = hashlib.sha256(source_path.encode("utf-8")).hexdigest()[:8]
|
|
468
|
+
slug = _safe_slug(str(doc.get("title") or Path(source_path).stem))
|
|
469
|
+
family = _safe_slug(str(doc.get("family") or "other-markdown"))
|
|
470
|
+
return f".sdtk/wiki/pages/{family}/{slug}--{source_digest}.md"
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _yaml_quote(value: Any) -> str:
|
|
474
|
+
text = str(value)
|
|
475
|
+
escaped = text.replace("\\", "\\\\").replace('"', '\\"')
|
|
476
|
+
return f'"{escaped}"'
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def _render_generated_page(doc: dict[str, Any], page_id: str, source_hash: str, generated: str) -> str:
|
|
480
|
+
frontmatter = [
|
|
481
|
+
"---",
|
|
482
|
+
f"schema_version: {WIKI_PAGE_SCHEMA_VERSION}",
|
|
483
|
+
'product: "SDTK-WIKI"',
|
|
484
|
+
'managed_by: "sdtk-wiki"',
|
|
485
|
+
f"page_id: {_yaml_quote(page_id)}",
|
|
486
|
+
f"source_path: {_yaml_quote(doc['path'])}",
|
|
487
|
+
f"source_hash: {_yaml_quote(source_hash)}",
|
|
488
|
+
f"title: {_yaml_quote(doc.get('title') or '')}",
|
|
489
|
+
f"family: {_yaml_quote(doc.get('family') or '')}",
|
|
490
|
+
f"role: {_yaml_quote(doc.get('role') or '')}",
|
|
491
|
+
f"generated_at: {_yaml_quote(generated)}",
|
|
492
|
+
"---",
|
|
493
|
+
"",
|
|
494
|
+
]
|
|
495
|
+
body = str(doc.get("body_markdown") or "")
|
|
496
|
+
if body and not body.endswith("\n"):
|
|
497
|
+
body += "\n"
|
|
498
|
+
return "\n".join(frontmatter) + body
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _prior_source_hashes(sources_path: Path) -> dict[str, str]:
|
|
502
|
+
if not sources_path.exists():
|
|
503
|
+
return {}
|
|
504
|
+
try:
|
|
505
|
+
payload = json.loads(sources_path.read_text(encoding="utf-8"))
|
|
506
|
+
except (OSError, json.JSONDecodeError):
|
|
507
|
+
return {}
|
|
508
|
+
sources = payload.get("sources")
|
|
509
|
+
if not isinstance(sources, list):
|
|
510
|
+
return {}
|
|
511
|
+
hashes: dict[str, str] = {}
|
|
512
|
+
for record in sources:
|
|
513
|
+
if not isinstance(record, dict):
|
|
514
|
+
continue
|
|
515
|
+
source_path = record.get("sourcePath")
|
|
516
|
+
source_hash = record.get("sourceHash")
|
|
517
|
+
if isinstance(source_path, str) and isinstance(source_hash, str):
|
|
518
|
+
hashes[source_path] = source_hash
|
|
519
|
+
return hashes
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _build_change_set(
|
|
523
|
+
prior_hashes: dict[str, str],
|
|
524
|
+
current_hashes: dict[str, str],
|
|
525
|
+
) -> dict[str, list[str]]:
|
|
526
|
+
prior_paths = set(prior_hashes)
|
|
527
|
+
current_paths = set(current_hashes)
|
|
528
|
+
added = sorted(current_paths - prior_paths)
|
|
529
|
+
removed = sorted(prior_paths - current_paths)
|
|
530
|
+
changed = sorted(
|
|
531
|
+
path for path in current_paths & prior_paths
|
|
532
|
+
if prior_hashes.get(path) != current_hashes.get(path)
|
|
533
|
+
)
|
|
534
|
+
unchanged = sorted(
|
|
535
|
+
path for path in current_paths & prior_paths
|
|
536
|
+
if prior_hashes.get(path) == current_hashes.get(path)
|
|
537
|
+
)
|
|
538
|
+
return {
|
|
539
|
+
"added": added,
|
|
540
|
+
"changed": changed,
|
|
541
|
+
"unchanged": unchanged,
|
|
542
|
+
"removed": removed,
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def write_wiki_pages_and_provenance(
|
|
547
|
+
docs: list[dict[str, Any]],
|
|
548
|
+
state: dict[str, Any],
|
|
549
|
+
root: Path,
|
|
550
|
+
generated: str,
|
|
551
|
+
scan_roots: list[Path],
|
|
552
|
+
) -> dict[str, Any]:
|
|
553
|
+
workspace_root = _wiki_workspace_root(root)
|
|
554
|
+
pages_root = _wiki_pages_root(root)
|
|
555
|
+
provenance_root = _wiki_provenance_root(root)
|
|
556
|
+
sources_path = provenance_root / "sources.json"
|
|
557
|
+
changes_path = provenance_root / "changes.json"
|
|
558
|
+
|
|
559
|
+
pages_root.mkdir(parents=True, exist_ok=True)
|
|
560
|
+
provenance_root.mkdir(parents=True, exist_ok=True)
|
|
561
|
+
|
|
562
|
+
prior_hashes = _prior_source_hashes(sources_path)
|
|
563
|
+
state_docs = state.get("documents", {})
|
|
564
|
+
provenance_records: list[dict[str, Any]] = []
|
|
565
|
+
index_rows: list[tuple[str, str, str, str]] = []
|
|
566
|
+
current_hashes: dict[str, str] = {}
|
|
567
|
+
|
|
568
|
+
for doc in sorted(docs, key=lambda d: d["path"]):
|
|
569
|
+
source_path = str(doc["path"]).replace("\\", "/")
|
|
570
|
+
state_record = state_docs.get(source_path, {}) if isinstance(state_docs, dict) else {}
|
|
571
|
+
source_hash = state_record.get("hash")
|
|
572
|
+
if not isinstance(source_hash, str):
|
|
573
|
+
source_hash = hashlib.sha256(source_path.encode("utf-8")).hexdigest()
|
|
574
|
+
page_id = _stable_page_id(source_path)
|
|
575
|
+
page_rel = _page_relative_path(doc)
|
|
576
|
+
page_path = root / page_rel
|
|
577
|
+
_assert_inside(workspace_root, page_path)
|
|
578
|
+
page_path.parent.mkdir(parents=True, exist_ok=True)
|
|
579
|
+
_write_text_lf(page_path, _render_generated_page(doc, page_id, source_hash, generated))
|
|
580
|
+
|
|
581
|
+
current_hashes[source_path] = source_hash
|
|
582
|
+
provenance_records.append({
|
|
583
|
+
"pageId": page_id,
|
|
584
|
+
"sourcePath": source_path,
|
|
585
|
+
"sourceHash": source_hash,
|
|
586
|
+
"pagePath": page_rel,
|
|
587
|
+
"graphNodeId": doc["id"],
|
|
588
|
+
"title": doc.get("title") or "",
|
|
589
|
+
"family": doc.get("family") or "",
|
|
590
|
+
"role": doc.get("role") or "",
|
|
591
|
+
"frontmatter": doc.get("frontmatter_fields") or {},
|
|
592
|
+
"headings": doc.get("headings") or [],
|
|
593
|
+
"issues": doc.get("issues") or [],
|
|
594
|
+
"knowledgeIds": doc.get("knowledge_ids") or [],
|
|
595
|
+
"pathRefs": doc.get("path_refs") or [],
|
|
596
|
+
"wikiLinks": doc.get("wiki_links") or [],
|
|
597
|
+
})
|
|
598
|
+
index_rows.append((doc.get("title") or source_path, source_path, page_rel, page_id))
|
|
599
|
+
|
|
600
|
+
index_lines = [
|
|
601
|
+
"# SDTK-WIKI Page Index",
|
|
602
|
+
"",
|
|
603
|
+
f"Generated: {generated}",
|
|
604
|
+
"",
|
|
605
|
+
"| Title | Source | Page | Page ID |",
|
|
606
|
+
"|---|---|---|---|",
|
|
607
|
+
]
|
|
608
|
+
for title, source_path, page_rel, page_id in sorted(index_rows, key=lambda row: row[1]):
|
|
609
|
+
index_lines.append(f"| {title} | `{source_path}` | `{page_rel}` | `{page_id}` |")
|
|
610
|
+
_write_text_lf(pages_root / "_index.md", "\n".join(index_lines) + "\n")
|
|
611
|
+
|
|
612
|
+
source_payload = {
|
|
613
|
+
"schemaVersion": WIKI_PROVENANCE_SCHEMA_VERSION,
|
|
614
|
+
"product": "SDTK-WIKI",
|
|
615
|
+
"generatedAt": generated,
|
|
616
|
+
"projectRoot": str(root),
|
|
617
|
+
"scanRoots": [str(sr) for sr in scan_roots],
|
|
618
|
+
"sourceCount": len(provenance_records),
|
|
619
|
+
"sources": provenance_records,
|
|
620
|
+
}
|
|
621
|
+
_write_text_lf(sources_path, json.dumps(source_payload, ensure_ascii=True, indent=2, sort_keys=False) + "\n")
|
|
622
|
+
|
|
623
|
+
change_set = _build_change_set(prior_hashes, current_hashes)
|
|
624
|
+
change_payload = {
|
|
625
|
+
"schemaVersion": WIKI_PROVENANCE_SCHEMA_VERSION,
|
|
626
|
+
"product": "SDTK-WIKI",
|
|
627
|
+
"generatedAt": generated,
|
|
628
|
+
**change_set,
|
|
629
|
+
}
|
|
630
|
+
_write_text_lf(changes_path, json.dumps(change_payload, ensure_ascii=True, indent=2, sort_keys=False) + "\n")
|
|
631
|
+
|
|
632
|
+
return {
|
|
633
|
+
"page_count": len(provenance_records),
|
|
634
|
+
"pages_root": str(pages_root),
|
|
635
|
+
"page_index_path": str(pages_root / "_index.md"),
|
|
636
|
+
"provenance_path": str(sources_path),
|
|
637
|
+
"changes_path": str(changes_path),
|
|
638
|
+
"changes": change_set,
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def build_docs_incremental(
|
|
643
|
+
root: Path,
|
|
644
|
+
atlas_dir: Path,
|
|
645
|
+
generated: str,
|
|
646
|
+
scan_roots: list[Path],
|
|
647
|
+
exclude_frags: list[str],
|
|
648
|
+
) -> tuple[list[dict[str, Any]], dict[str, Any], dict[str, int]]:
|
|
649
|
+
prior_state = load_atlas_state(atlas_dir)
|
|
650
|
+
prior_documents = prior_state.get("documents", {})
|
|
651
|
+
current_files = list_indexable_markdown_files(root, scan_roots, exclude_frags)
|
|
652
|
+
|
|
653
|
+
current_rel_paths = {}
|
|
654
|
+
for md_file in current_files:
|
|
655
|
+
try:
|
|
656
|
+
rel = md_file.relative_to(root).as_posix()
|
|
657
|
+
except ValueError:
|
|
658
|
+
rel = md_file.as_posix()
|
|
659
|
+
current_rel_paths[rel] = md_file
|
|
660
|
+
|
|
661
|
+
next_documents: dict[str, Any] = {}
|
|
662
|
+
reused_count = 0
|
|
663
|
+
reparsed_count = 0
|
|
664
|
+
|
|
665
|
+
for rel, md_file in current_rel_paths.items():
|
|
666
|
+
stats = md_file.stat()
|
|
667
|
+
current_mtime = stats.st_mtime_ns
|
|
668
|
+
prior_record = prior_documents.get(rel)
|
|
669
|
+
prior_doc = prior_record.get("doc") if isinstance(prior_record, dict) else None
|
|
670
|
+
|
|
671
|
+
if (
|
|
672
|
+
isinstance(prior_record, dict)
|
|
673
|
+
and isinstance(prior_doc, dict)
|
|
674
|
+
and prior_record.get("mtime") == current_mtime
|
|
675
|
+
):
|
|
676
|
+
next_documents[rel] = prior_record
|
|
677
|
+
reused_count += 1
|
|
678
|
+
continue
|
|
679
|
+
|
|
680
|
+
current_hash = _compute_file_hash(md_file)
|
|
681
|
+
if (
|
|
682
|
+
isinstance(prior_record, dict)
|
|
683
|
+
and isinstance(prior_doc, dict)
|
|
684
|
+
and prior_record.get("hash") == current_hash
|
|
685
|
+
):
|
|
686
|
+
next_documents[rel] = {
|
|
687
|
+
"mtime": current_mtime,
|
|
688
|
+
"hash": current_hash,
|
|
689
|
+
"last_indexed": prior_record.get("last_indexed") or generated,
|
|
690
|
+
"doc": prior_doc,
|
|
691
|
+
}
|
|
692
|
+
reused_count += 1
|
|
693
|
+
continue
|
|
694
|
+
|
|
695
|
+
next_documents[rel] = {
|
|
696
|
+
"mtime": current_mtime,
|
|
697
|
+
"hash": current_hash,
|
|
698
|
+
"last_indexed": generated,
|
|
699
|
+
"doc": _parse_doc_record(md_file, root=root),
|
|
700
|
+
}
|
|
701
|
+
reparsed_count += 1
|
|
702
|
+
|
|
703
|
+
removed_count = len(set(prior_documents.keys()) - set(current_rel_paths.keys()))
|
|
704
|
+
docs = sorted(
|
|
705
|
+
[record["doc"] for record in next_documents.values()],
|
|
706
|
+
key=lambda d: d["id"],
|
|
707
|
+
)
|
|
708
|
+
next_state = {
|
|
709
|
+
"version": ATLAS_STATE_VERSION,
|
|
710
|
+
"generated": generated,
|
|
711
|
+
"documents": next_documents,
|
|
712
|
+
}
|
|
713
|
+
build_stats = {
|
|
714
|
+
"discovered_count": len(current_rel_paths),
|
|
715
|
+
"reused_count": reused_count,
|
|
716
|
+
"reparsed_count": reparsed_count,
|
|
717
|
+
"removed_count": removed_count,
|
|
718
|
+
}
|
|
719
|
+
return docs, next_state, build_stats
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
# ---------------------------------------------------------------------------
|
|
723
|
+
# Graph builder
|
|
724
|
+
# ---------------------------------------------------------------------------
|
|
725
|
+
def _build_doc_alias_map(docs: list[dict[str, Any]]) -> dict[str, set[str]]:
|
|
726
|
+
alias_map: dict[str, set[str]] = {}
|
|
727
|
+
for doc in docs:
|
|
728
|
+
doc_id = doc["id"]
|
|
729
|
+
path_obj = Path(doc_id)
|
|
730
|
+
aliases = {
|
|
731
|
+
doc_id,
|
|
732
|
+
doc_id.lower(),
|
|
733
|
+
path_obj.name,
|
|
734
|
+
path_obj.name.lower(),
|
|
735
|
+
path_obj.stem,
|
|
736
|
+
path_obj.stem.lower(),
|
|
737
|
+
}
|
|
738
|
+
if doc_id.lower().endswith(".md"):
|
|
739
|
+
no_ext = doc_id[:-3]
|
|
740
|
+
no_ext_path = Path(no_ext)
|
|
741
|
+
aliases.update({no_ext, no_ext.lower(), no_ext_path.name, no_ext_path.name.lower()})
|
|
742
|
+
for alias in aliases:
|
|
743
|
+
alias_map.setdefault(alias, set()).add(doc_id)
|
|
744
|
+
return alias_map
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def _resolve_doc_reference(raw: str, alias_map: dict[str, set[str]]) -> str | None:
|
|
748
|
+
normalised = _normalize_internal_ref(raw)
|
|
749
|
+
if not normalised:
|
|
750
|
+
return None
|
|
751
|
+
candidates = [normalised, normalised.lower()]
|
|
752
|
+
if not normalised.lower().endswith(".md"):
|
|
753
|
+
candidates.extend([f"{normalised}.md", f"{normalised.lower()}.md"])
|
|
754
|
+
for candidate in candidates:
|
|
755
|
+
matches = alias_map.get(candidate)
|
|
756
|
+
if matches and len(matches) == 1:
|
|
757
|
+
return next(iter(matches))
|
|
758
|
+
return None
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def build_graph(docs: list[dict[str, Any]]) -> dict[str, Any]:
|
|
762
|
+
alias_map = _build_doc_alias_map(docs)
|
|
763
|
+
|
|
764
|
+
nodes = [
|
|
765
|
+
{
|
|
766
|
+
"id": d["id"],
|
|
767
|
+
"title": d["title"],
|
|
768
|
+
"family": d["family"],
|
|
769
|
+
"role": d["role"],
|
|
770
|
+
"trust_zone": d.get("trust_zone", "medium"),
|
|
771
|
+
}
|
|
772
|
+
for d in docs
|
|
773
|
+
]
|
|
774
|
+
|
|
775
|
+
edges: list[dict[str, Any]] = []
|
|
776
|
+
|
|
777
|
+
for doc in docs:
|
|
778
|
+
src = doc["id"]
|
|
779
|
+
|
|
780
|
+
for issue in doc.get("issues", []):
|
|
781
|
+
edges.append({"source": src, "target": issue, "type": "references_issue", "label": issue})
|
|
782
|
+
|
|
783
|
+
for kid in doc.get("knowledge_ids", []):
|
|
784
|
+
edges.append({"source": src, "target": kid, "type": "references_knowledge_object", "label": kid})
|
|
785
|
+
|
|
786
|
+
for rp in doc.get("path_refs", doc.get("outgoing_paths", [])):
|
|
787
|
+
target = _resolve_doc_reference(rp, alias_map)
|
|
788
|
+
if target:
|
|
789
|
+
edges.append({"source": src, "target": target, "type": "references_path", "label": rp})
|
|
790
|
+
|
|
791
|
+
for wiki_ref in doc.get("wiki_links", []):
|
|
792
|
+
target = _resolve_doc_reference(wiki_ref, alias_map)
|
|
793
|
+
if target:
|
|
794
|
+
edges.append({"source": src, "target": target, "type": "references_wiki_link", "label": wiki_ref})
|
|
795
|
+
|
|
796
|
+
for skill_ref in doc.get("skill_refs", []):
|
|
797
|
+
edges.append({"source": src, "target": f"__skill__{skill_ref}", "type": "references_skill", "label": skill_ref})
|
|
798
|
+
|
|
799
|
+
for template_ref in doc.get("template_refs", []):
|
|
800
|
+
edges.append({"source": src, "target": f"__template__{template_ref}", "type": "references_template", "label": template_ref})
|
|
801
|
+
|
|
802
|
+
family_groups: dict[str, list[str]] = {}
|
|
803
|
+
for doc in docs:
|
|
804
|
+
family_groups.setdefault(doc["family"], []).append(doc["id"])
|
|
805
|
+
for family, members in family_groups.items():
|
|
806
|
+
if len(members) < 2:
|
|
807
|
+
continue
|
|
808
|
+
for mid in members:
|
|
809
|
+
edges.append({"source": mid, "target": f"__family__{family}", "type": "same_family", "label": family})
|
|
810
|
+
|
|
811
|
+
return {"nodes": nodes, "edges": edges}
|
|
812
|
+
|
|
813
|
+
|
|
814
|
+
# ---------------------------------------------------------------------------
|
|
815
|
+
# Summary markdown
|
|
816
|
+
# ---------------------------------------------------------------------------
|
|
817
|
+
def build_summary(
|
|
818
|
+
docs: list[dict[str, Any]],
|
|
819
|
+
graph: dict[str, Any],
|
|
820
|
+
generated: str,
|
|
821
|
+
stats: dict[str, int] | None,
|
|
822
|
+
root: Path,
|
|
823
|
+
scan_roots: list[Path],
|
|
824
|
+
exclude_frags: list[str],
|
|
825
|
+
) -> str:
|
|
826
|
+
family_counts: dict[str, int] = {}
|
|
827
|
+
for d in docs:
|
|
828
|
+
family_counts[d["family"]] = family_counts.get(d["family"], 0) + 1
|
|
829
|
+
|
|
830
|
+
edge_type_counts: dict[str, int] = {}
|
|
831
|
+
for e in graph["edges"]:
|
|
832
|
+
et = e["type"]
|
|
833
|
+
edge_type_counts[et] = edge_type_counts.get(et, 0) + 1
|
|
834
|
+
|
|
835
|
+
lines: list[str] = [
|
|
836
|
+
"# SDTK-WIKI Graph Summary",
|
|
837
|
+
"",
|
|
838
|
+
f"Generated: {generated}",
|
|
839
|
+
f"Project root: {root}",
|
|
840
|
+
"",
|
|
841
|
+
"## Document Counts",
|
|
842
|
+
"",
|
|
843
|
+
f"Total documents indexed: {len(docs)}",
|
|
844
|
+
"",
|
|
845
|
+
"| Family | Count |",
|
|
846
|
+
"|--------|-------|",
|
|
847
|
+
]
|
|
848
|
+
for fam, cnt in sorted(family_counts.items(), key=lambda x: -x[1]):
|
|
849
|
+
lines.append(f"| {fam} | {cnt} |")
|
|
850
|
+
|
|
851
|
+
if stats is not None:
|
|
852
|
+
lines += [
|
|
853
|
+
"",
|
|
854
|
+
"## Incremental Build",
|
|
855
|
+
"",
|
|
856
|
+
f"Discovered markdown docs: {stats['discovered_count']}",
|
|
857
|
+
f"Reused cached docs: {stats['reused_count']}",
|
|
858
|
+
f"Reparsed docs: {stats['reparsed_count']}",
|
|
859
|
+
f"Removed stale docs: {stats['removed_count']}",
|
|
860
|
+
]
|
|
861
|
+
|
|
862
|
+
lines += [
|
|
863
|
+
"",
|
|
864
|
+
"## Graph Summary",
|
|
865
|
+
"",
|
|
866
|
+
f"Total nodes: {len(graph['nodes'])}",
|
|
867
|
+
f"Total edges: {len(graph['edges'])}",
|
|
868
|
+
"",
|
|
869
|
+
"## Scan Roots",
|
|
870
|
+
"",
|
|
871
|
+
]
|
|
872
|
+
for sr in scan_roots:
|
|
873
|
+
lines.append(f"- {sr}")
|
|
874
|
+
|
|
875
|
+
lines += [
|
|
876
|
+
"",
|
|
877
|
+
"## Exclusions Applied",
|
|
878
|
+
"",
|
|
879
|
+
]
|
|
880
|
+
for frag in exclude_frags:
|
|
881
|
+
lines.append(f"- {frag}")
|
|
882
|
+
|
|
883
|
+
return "\n".join(lines) + "\n"
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
# ---------------------------------------------------------------------------
|
|
887
|
+
# Static viewer
|
|
888
|
+
# ---------------------------------------------------------------------------
|
|
889
|
+
_FAMILY_COLORS = {
|
|
890
|
+
"governance": "#58a6ff",
|
|
891
|
+
"guide": "#14b8a6",
|
|
892
|
+
"backlog": "#d2a8ff",
|
|
893
|
+
"spec": "#f0883e",
|
|
894
|
+
"architecture": "#3fb950",
|
|
895
|
+
"database": "#a371f7",
|
|
896
|
+
"api": "#f778ba",
|
|
897
|
+
"qa": "#79c0ff",
|
|
898
|
+
"design": "#ffa657",
|
|
899
|
+
"dev": "#56d364",
|
|
900
|
+
"product": "#e3b341",
|
|
901
|
+
"skill": "#58a6ff",
|
|
902
|
+
"template": "#f0883e",
|
|
903
|
+
"root-readme": "#e3b341",
|
|
904
|
+
"other-markdown": "#8b949e",
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
def build_viewer(index: dict, graph: dict, generated: str) -> str:
|
|
909
|
+
if not _VIEWER_TEMPLATE_PATH.exists():
|
|
910
|
+
raise FileNotFoundError(f"Viewer template not found: {_VIEWER_TEMPLATE_PATH}")
|
|
911
|
+
index_json = _json_for_inline_script(index)
|
|
912
|
+
graph_json = _json_for_inline_script(graph)
|
|
913
|
+
family_colors_json = _json_for_inline_script(_FAMILY_COLORS)
|
|
914
|
+
template = _VIEWER_TEMPLATE_PATH.read_text(encoding="utf-8")
|
|
915
|
+
return (
|
|
916
|
+
template
|
|
917
|
+
.replace("__ATLAS_GENERATED__", generated)
|
|
918
|
+
.replace("__ATLAS_INDEX_JSON__", index_json)
|
|
919
|
+
.replace("__ATLAS_GRAPH_JSON__", graph_json)
|
|
920
|
+
.replace("__ATLAS_FAMILY_COLORS_JSON__", family_colors_json)
|
|
921
|
+
)
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
def copy_viewer_assets(atlas_dir: Path) -> list[Path]:
|
|
925
|
+
if not MERMAID_VENDOR_PATH.exists():
|
|
926
|
+
raise FileNotFoundError(f"Missing Mermaid runtime asset: {MERMAID_VENDOR_PATH}")
|
|
927
|
+
atlas_dir.mkdir(parents=True, exist_ok=True)
|
|
928
|
+
# Copy mermaid to atlas root (same location the viewer template expects)
|
|
929
|
+
destination = atlas_dir / MERMAID_ASSET_NAME
|
|
930
|
+
shutil.copyfile(MERMAID_VENDOR_PATH, destination)
|
|
931
|
+
return [destination]
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
# ---------------------------------------------------------------------------
|
|
935
|
+
# Main build
|
|
936
|
+
# ---------------------------------------------------------------------------
|
|
937
|
+
def build_atlas(
|
|
938
|
+
root: Path,
|
|
939
|
+
atlas_dir: Path,
|
|
940
|
+
scan_roots: list[Path] | None = None,
|
|
941
|
+
exclude_frags: list[str] | None = None,
|
|
942
|
+
verbose: bool = False,
|
|
943
|
+
) -> dict[str, Any]:
|
|
944
|
+
generated = _now_utc()
|
|
945
|
+
frags = exclude_frags if exclude_frags is not None else DEFAULT_EXCLUDE_FRAGS
|
|
946
|
+
roots = scan_roots if scan_roots else [root]
|
|
947
|
+
|
|
948
|
+
print(f"[atlas] Project root: {root}")
|
|
949
|
+
print(f"[atlas] Output dir: {atlas_dir}")
|
|
950
|
+
print(f"[atlas] Scan roots: {[str(r) for r in roots]}")
|
|
951
|
+
|
|
952
|
+
atlas_dir.mkdir(parents=True, exist_ok=True)
|
|
953
|
+
|
|
954
|
+
print("[atlas] Scanning markdown files...")
|
|
955
|
+
docs, state, stats = build_docs_incremental(
|
|
956
|
+
root=root,
|
|
957
|
+
atlas_dir=atlas_dir,
|
|
958
|
+
generated=generated,
|
|
959
|
+
scan_roots=roots,
|
|
960
|
+
exclude_frags=frags,
|
|
961
|
+
)
|
|
962
|
+
print(f"[atlas] Indexed {len(docs)} documents.")
|
|
963
|
+
if verbose:
|
|
964
|
+
print(
|
|
965
|
+
f"[atlas] Incremental build: reused {stats['reused_count']} cached, "
|
|
966
|
+
f"reparsed {stats['reparsed_count']}, removed {stats['removed_count']}."
|
|
967
|
+
)
|
|
968
|
+
|
|
969
|
+
print("[atlas] Building graph...")
|
|
970
|
+
graph = build_graph(docs)
|
|
971
|
+
print(f"[atlas] Graph: {len(graph['nodes'])} nodes, {len(graph['edges'])} edges.")
|
|
972
|
+
print("[atlas] Writing wiki pages and provenance...")
|
|
973
|
+
wiki_result = write_wiki_pages_and_provenance(
|
|
974
|
+
docs=docs,
|
|
975
|
+
state=state,
|
|
976
|
+
root=root,
|
|
977
|
+
generated=generated,
|
|
978
|
+
scan_roots=roots,
|
|
979
|
+
)
|
|
980
|
+
print(f"[atlas] Wiki pages: {wiki_result['page_count']}")
|
|
981
|
+
|
|
982
|
+
index_data = {
|
|
983
|
+
"generated": generated,
|
|
984
|
+
"count": len(docs),
|
|
985
|
+
"documents": docs,
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
save_atlas_state(state, atlas_dir=atlas_dir)
|
|
989
|
+
|
|
990
|
+
index_path = atlas_dir / "SDTK_DOC_INDEX.json"
|
|
991
|
+
_write_text_lf(index_path, json.dumps(index_data, ensure_ascii=True, indent=2, sort_keys=False))
|
|
992
|
+
|
|
993
|
+
graph_out = {
|
|
994
|
+
"generated": generated,
|
|
995
|
+
"node_count": len(graph["nodes"]),
|
|
996
|
+
"edge_count": len(graph["edges"]),
|
|
997
|
+
"nodes": graph["nodes"],
|
|
998
|
+
"edges": graph["edges"],
|
|
999
|
+
}
|
|
1000
|
+
graph_path = atlas_dir / "SDTK_DOC_GRAPH.json"
|
|
1001
|
+
_write_text_lf(graph_path, json.dumps(graph_out, ensure_ascii=True, indent=2, sort_keys=False))
|
|
1002
|
+
|
|
1003
|
+
summary_text = build_summary(docs, graph, generated, stats=stats, root=root, scan_roots=roots, exclude_frags=frags)
|
|
1004
|
+
summary_path = atlas_dir / "SDTK_DOC_ATLAS_SUMMARY.md"
|
|
1005
|
+
_write_text_lf(summary_path, summary_text)
|
|
1006
|
+
|
|
1007
|
+
viewer_html = build_viewer(index_data, graph_out, generated)
|
|
1008
|
+
viewer_path = atlas_dir / "viewer.html"
|
|
1009
|
+
_write_text_lf(viewer_path, viewer_html)
|
|
1010
|
+
|
|
1011
|
+
for asset_path in copy_viewer_assets(atlas_dir=atlas_dir):
|
|
1012
|
+
if verbose:
|
|
1013
|
+
print(f"[atlas] Wrote asset: {asset_path.name}")
|
|
1014
|
+
|
|
1015
|
+
print(f"[atlas] Done. Output: {atlas_dir}")
|
|
1016
|
+
return {
|
|
1017
|
+
"generated": generated,
|
|
1018
|
+
"doc_count": len(docs),
|
|
1019
|
+
"node_count": len(graph["nodes"]),
|
|
1020
|
+
"edge_count": len(graph["edges"]),
|
|
1021
|
+
"stats": stats,
|
|
1022
|
+
"atlas_dir": str(atlas_dir),
|
|
1023
|
+
"page_count": wiki_result["page_count"],
|
|
1024
|
+
"pages_root": wiki_result["pages_root"],
|
|
1025
|
+
"page_index_path": wiki_result["page_index_path"],
|
|
1026
|
+
"provenance_path": wiki_result["provenance_path"],
|
|
1027
|
+
"changes_path": wiki_result["changes_path"],
|
|
1028
|
+
"changes": wiki_result["changes"],
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
# ---------------------------------------------------------------------------
|
|
1033
|
+
# CLI entry point
|
|
1034
|
+
# ---------------------------------------------------------------------------
|
|
1035
|
+
def main() -> int:
|
|
1036
|
+
parser = argparse.ArgumentParser(
|
|
1037
|
+
description="SDTK-WIKI Builder -- build a local document graph, wiki pages, and viewer.",
|
|
1038
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
1039
|
+
)
|
|
1040
|
+
parser.add_argument(
|
|
1041
|
+
"--project-root",
|
|
1042
|
+
required=True,
|
|
1043
|
+
help="Absolute path to the project root to scan.",
|
|
1044
|
+
)
|
|
1045
|
+
parser.add_argument(
|
|
1046
|
+
"--output-dir",
|
|
1047
|
+
required=True,
|
|
1048
|
+
help="Directory to write atlas artifacts into.",
|
|
1049
|
+
)
|
|
1050
|
+
parser.add_argument(
|
|
1051
|
+
"--scan-root",
|
|
1052
|
+
dest="scan_roots",
|
|
1053
|
+
action="append",
|
|
1054
|
+
metavar="PATH",
|
|
1055
|
+
default=None,
|
|
1056
|
+
help="Explicit scan root (repeatable). Defaults to project root.",
|
|
1057
|
+
)
|
|
1058
|
+
parser.add_argument(
|
|
1059
|
+
"--exclude",
|
|
1060
|
+
dest="excludes",
|
|
1061
|
+
action="append",
|
|
1062
|
+
metavar="FRAG",
|
|
1063
|
+
default=None,
|
|
1064
|
+
help="Exclusion path fragment (repeatable). Defaults to standard set.",
|
|
1065
|
+
)
|
|
1066
|
+
parser.add_argument(
|
|
1067
|
+
"--verbose",
|
|
1068
|
+
action="store_true",
|
|
1069
|
+
default=False,
|
|
1070
|
+
help="Show incremental build detail.",
|
|
1071
|
+
)
|
|
1072
|
+
|
|
1073
|
+
args = parser.parse_args()
|
|
1074
|
+
|
|
1075
|
+
root = Path(args.project_root).resolve()
|
|
1076
|
+
if not root.is_dir():
|
|
1077
|
+
print(f"[atlas] ERROR: --project-root is not a directory: {root}", file=sys.stderr)
|
|
1078
|
+
return 1
|
|
1079
|
+
|
|
1080
|
+
atlas_dir = Path(args.output_dir).resolve()
|
|
1081
|
+
|
|
1082
|
+
scan_roots: list[Path] | None = None
|
|
1083
|
+
if args.scan_roots:
|
|
1084
|
+
scan_roots = [Path(sr).resolve() for sr in args.scan_roots]
|
|
1085
|
+
|
|
1086
|
+
excludes: list[str] | None = None
|
|
1087
|
+
if args.excludes:
|
|
1088
|
+
excludes = args.excludes
|
|
1089
|
+
|
|
1090
|
+
try:
|
|
1091
|
+
result = build_atlas(
|
|
1092
|
+
root=root,
|
|
1093
|
+
atlas_dir=atlas_dir,
|
|
1094
|
+
scan_roots=scan_roots,
|
|
1095
|
+
exclude_frags=excludes,
|
|
1096
|
+
verbose=args.verbose,
|
|
1097
|
+
)
|
|
1098
|
+
# Print JSON summary to stdout for Node CLI to parse
|
|
1099
|
+
print(f"[atlas:result] {json.dumps(result)}")
|
|
1100
|
+
return 0
|
|
1101
|
+
except FileNotFoundError as e:
|
|
1102
|
+
print(f"[atlas] ERROR: {e}", file=sys.stderr)
|
|
1103
|
+
return 2
|
|
1104
|
+
except Exception as e:
|
|
1105
|
+
print(f"[atlas] ERROR: {e}", file=sys.stderr)
|
|
1106
|
+
return 1
|
|
1107
|
+
|
|
1108
|
+
|
|
1109
|
+
if __name__ == "__main__":
|
|
1110
|
+
sys.exit(main())
|