sdtk-wiki-kit 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,141 +1,141 @@
1
- #!/usr/bin/env python3
1
+ #!/usr/bin/env python3
2
2
  """
3
3
  SDTK-WIKI Builder -- generic local-project edition.
4
-
5
- Scans markdown files under configured scan roots, builds a document index
6
- and graph, and generates a static local viewer.
7
-
8
- Usage:
9
- python build_atlas.py --project-root <path> --output-dir <path>
10
- [--scan-root <path> ...] [--exclude <frag> ...]
11
- [--verbose]
12
-
13
- Outputs (written to <output-dir>/):
14
- ATLAS_STATE.json - incremental scan/build state
15
- SDTK_DOC_INDEX.json - full document index
16
- SDTK_DOC_GRAPH.json - nodes + typed edges
17
- SDTK_DOC_ATLAS_SUMMARY.md - human-readable summary
18
- viewer.html - static local viewer (data embedded)
19
- vendor/mermaid.min.js - vendored viewer asset
20
- """
21
-
22
- from __future__ import annotations
23
-
24
- import argparse
25
- import hashlib
26
- import json
27
- import re
28
- import shutil
29
- import sys
30
- from datetime import datetime, timezone
31
- from pathlib import Path
32
- from typing import Any
33
-
4
+
5
+ Scans markdown files under configured scan roots, builds a document index
6
+ and graph, and generates a static local viewer.
7
+
8
+ Usage:
9
+ python build_atlas.py --project-root <path> --output-dir <path>
10
+ [--scan-root <path> ...] [--exclude <frag> ...]
11
+ [--verbose]
12
+
13
+ Outputs (written to <output-dir>/):
14
+ ATLAS_STATE.json - incremental scan/build state
15
+ SDTK_DOC_INDEX.json - full document index
16
+ SDTK_DOC_GRAPH.json - nodes + typed edges
17
+ SDTK_DOC_ATLAS_SUMMARY.md - human-readable summary
18
+ viewer.html - static local viewer (data embedded)
19
+ vendor/mermaid.min.js - vendored viewer asset
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import argparse
25
+ import hashlib
26
+ import json
27
+ import re
28
+ import shutil
29
+ import sys
30
+ from datetime import datetime, timezone
31
+ from pathlib import Path
32
+ from typing import Any
33
+
34
34
  ATLAS_STATE_VERSION = 6
35
35
  WIKI_PAGE_SCHEMA_VERSION = 1
36
36
  WIKI_PROVENANCE_SCHEMA_VERSION = 1
37
37
  MERMAID_VENDOR_PATH = Path(__file__).parent / "vendor" / "mermaid.min.js"
38
38
  MERMAID_ASSET_NAME = "mermaid.min.js"
39
39
  _VIEWER_TEMPLATE_PATH = Path(__file__).parent / "doc_atlas_viewer_template.html"
40
-
41
-
42
- def _json_for_inline_script(value: Any) -> str:
43
- return (
44
- json.dumps(value, ensure_ascii=True, separators=(",", ":"))
45
- .replace("</", "<\\/")
46
- .replace("<!--", "<\\!--")
47
- )
48
-
49
- # ---------------------------------------------------------------------------
50
- # Default consumer project exclude fragments
51
- # ---------------------------------------------------------------------------
40
+
41
+
42
+ def _json_for_inline_script(value: Any) -> str:
43
+ return (
44
+ json.dumps(value, ensure_ascii=True, separators=(",", ":"))
45
+ .replace("</", "<\\/")
46
+ .replace("<!--", "<\\!--")
47
+ )
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # Default consumer project exclude fragments
51
+ # ---------------------------------------------------------------------------
52
52
  DEFAULT_EXCLUDE_FRAGS: list[str] = [
53
53
  ".git",
54
54
  ".sdtk/wiki",
55
55
  ".sdtk/atlas",
56
56
  "node_modules",
57
- ".venv",
58
- "venv",
59
- "dist",
60
- "build",
61
- "coverage",
62
- ".next",
63
- ".turbo",
64
- ".cache",
65
- "__pycache__",
66
- ]
67
-
68
- # ---------------------------------------------------------------------------
69
- # Reference patterns
70
- # ---------------------------------------------------------------------------
71
- RE_BK = re.compile(r"\bBK-(\d{3,})\b")
72
- RE_KNOWLEDGE_ID = re.compile(r"\b(KD|KT|KP|KA|KR|KRB|KF)-(\d{4})\b")
73
- RE_REPO_PATH = re.compile(
74
- r"(?:^|[\s`(\[])([a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-. ]+)+\."
75
- r"(?:md|py|ps1|json|yaml|yml|html|txt))"
76
- )
77
- RE_WIKI_LINK = re.compile(r"\[\[([^\]]+)\]\]")
78
- RE_MARKDOWN_LINK = re.compile(r"(?<!!)\[[^\]]+\]\(([^)]+)\)")
79
- RE_SKILL_REF = re.compile(r"\b(sdtk-[a-z0-9][a-z0-9-]*)\b")
80
- RE_RELEASE_REF = re.compile(r"\b(?:sdtk-spec-kit@)?(0\.\d+\.\d+)\b")
81
-
82
-
83
- # ---------------------------------------------------------------------------
84
- # Generic doc-family classifier (project-scope, no maintainer assumptions)
85
- # ---------------------------------------------------------------------------
86
- def classify_family(rel: str) -> str:
87
- p = rel.replace("\\", "/").lower()
88
- name = Path(rel).name.lower()
89
- is_guide_path = p.startswith("guides/") or "/guides/" in p
90
- if p == "readme.md":
91
- return "root-readme"
92
- if "backlog" in name:
93
- return "backlog"
94
- if "skills" in p:
95
- return "skill"
96
- if "templates" in p:
97
- return "template"
98
- if "docs/database" in p or "database/" in p:
99
- return "database"
100
- if "docs/specs" in p or "specs/" in p:
101
- return "spec"
102
- if "docs/architecture" in p or "architecture/" in p:
103
- return "architecture"
104
- if "docs/api" in p or "api/" in p:
105
- return "api"
106
- if "docs/qa" in p or "qa/" in p:
107
- return "qa"
108
- if "docs/design" in p or "design/" in p:
109
- return "design"
110
- if "docs/dev" in p or "dev/" in p:
111
- return "dev"
112
- if "docs/product" in p or "product/" in p:
113
- return "product"
114
- if is_guide_path:
115
- return "guide"
116
- if "governance" in p:
117
- return "governance"
118
- return "other-markdown"
119
-
120
-
121
- def classify_role(rel: str) -> str:
122
- p = rel.replace("\\", "/").lower()
123
- if "governance" in p:
124
- return "governance"
125
- if "spec" in p or "architecture" in p:
126
- return "spec-artifact"
127
- if "skill" in p:
128
- return "skill"
129
- return "other"
130
-
131
-
132
- # ---------------------------------------------------------------------------
133
- # Scanner helpers
134
- # ---------------------------------------------------------------------------
135
- def _now_utc() -> str:
136
- return datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
137
-
138
-
57
+ ".venv",
58
+ "venv",
59
+ "dist",
60
+ "build",
61
+ "coverage",
62
+ ".next",
63
+ ".turbo",
64
+ ".cache",
65
+ "__pycache__",
66
+ ]
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # Reference patterns
70
+ # ---------------------------------------------------------------------------
71
+ RE_BK = re.compile(r"\bBK-(\d{3,})\b")
72
+ RE_KNOWLEDGE_ID = re.compile(r"\b(KD|KT|KP|KA|KR|KRB|KF)-(\d{4})\b")
73
+ RE_REPO_PATH = re.compile(
74
+ r"(?:^|[\s`(\[])([a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-. ]+)+\."
75
+ r"(?:md|py|ps1|json|yaml|yml|html|txt))"
76
+ )
77
+ RE_WIKI_LINK = re.compile(r"\[\[([^\]]+)\]\]")
78
+ RE_MARKDOWN_LINK = re.compile(r"(?<!!)\[[^\]]+\]\(([^)]+)\)")
79
+ RE_SKILL_REF = re.compile(r"\b(sdtk-[a-z0-9][a-z0-9-]*)\b")
80
+ RE_RELEASE_REF = re.compile(r"\b(?:sdtk-spec-kit@)?(0\.\d+\.\d+)\b")
81
+
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # Generic doc-family classifier (project-scope, no maintainer assumptions)
85
+ # ---------------------------------------------------------------------------
86
+ def classify_family(rel: str) -> str:
87
+ p = rel.replace("\\", "/").lower()
88
+ name = Path(rel).name.lower()
89
+ is_guide_path = p.startswith("guides/") or "/guides/" in p
90
+ if p == "readme.md":
91
+ return "root-readme"
92
+ if "backlog" in name:
93
+ return "backlog"
94
+ if "skills" in p:
95
+ return "skill"
96
+ if "templates" in p:
97
+ return "template"
98
+ if "docs/database" in p or "database/" in p:
99
+ return "database"
100
+ if "docs/specs" in p or "specs/" in p:
101
+ return "spec"
102
+ if "docs/architecture" in p or "architecture/" in p:
103
+ return "architecture"
104
+ if "docs/api" in p or "api/" in p:
105
+ return "api"
106
+ if "docs/qa" in p or "qa/" in p:
107
+ return "qa"
108
+ if "docs/design" in p or "design/" in p:
109
+ return "design"
110
+ if "docs/dev" in p or "dev/" in p:
111
+ return "dev"
112
+ if "docs/product" in p or "product/" in p:
113
+ return "product"
114
+ if is_guide_path:
115
+ return "guide"
116
+ if "governance" in p:
117
+ return "governance"
118
+ return "other-markdown"
119
+
120
+
121
+ def classify_role(rel: str) -> str:
122
+ p = rel.replace("\\", "/").lower()
123
+ if "governance" in p:
124
+ return "governance"
125
+ if "spec" in p or "architecture" in p:
126
+ return "spec-artifact"
127
+ if "skill" in p:
128
+ return "skill"
129
+ return "other"
130
+
131
+
132
+ # ---------------------------------------------------------------------------
133
+ # Scanner helpers
134
+ # ---------------------------------------------------------------------------
135
+ def _now_utc() -> str:
136
+ return datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
137
+
138
+
139
139
  def _write_text_lf(path: Path, content: str) -> None:
140
140
  path.write_text(content, encoding="utf-8", newline="\n")
141
141
 
@@ -145,8 +145,8 @@ def _assert_inside(base: Path, target: Path) -> None:
145
145
  resolved_target = target.resolve()
146
146
  if resolved_target != resolved_base and resolved_base not in resolved_target.parents:
147
147
  raise ValueError(f"Refusing to write outside SDTK-WIKI workspace: {resolved_target}")
148
-
149
-
148
+
149
+
150
150
  def _is_excluded(
151
151
  path: Path,
152
152
  root: Path,
@@ -189,208 +189,208 @@ def _match_exclude(
189
189
  return frag
190
190
 
191
191
  return None
192
-
193
-
194
- def _extract_title(text: str) -> str:
195
- for line in text.splitlines():
196
- stripped = line.strip()
197
- if stripped.startswith("# "):
198
- return stripped[2:].strip()
199
- return ""
200
-
201
-
202
- def _extract_headings(text: str) -> list[str]:
203
- headings: list[str] = []
204
- for line in text.splitlines():
205
- stripped = line.strip()
206
- if not stripped.startswith("#"):
207
- continue
208
- level = len(stripped) - len(stripped.lstrip("#"))
209
- if 1 <= level <= 6 and len(stripped) > level and stripped[level] == " ":
210
- headings.append(stripped[level + 1:].strip())
211
- return headings
212
-
213
-
214
- def _parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
215
- lines = text.splitlines()
216
- if not lines or lines[0].strip() != "---":
217
- return {}, text
218
-
219
- fields: dict[str, Any] = {}
220
- current_list_key: str | None = None
221
- for idx in range(1, len(lines)):
222
- raw = lines[idx]
223
- stripped = raw.strip()
224
- if stripped in {"---", "..."}:
225
- body = "\n".join(lines[idx + 1:])
226
- if text.endswith("\n"):
227
- body += "\n"
228
- return fields, body
229
- if not stripped:
230
- current_list_key = None
231
- continue
232
- if stripped.startswith("- ") and current_list_key and isinstance(fields.get(current_list_key), list):
233
- fields[current_list_key].append(stripped[2:].strip().strip('"\''))
234
- continue
235
- if ":" not in raw:
236
- current_list_key = None
237
- continue
238
- key, value = raw.split(":", 1)
239
- key = key.strip()
240
- value = value.strip()
241
- if not key:
242
- current_list_key = None
243
- continue
244
- if not value:
245
- fields[key] = []
246
- current_list_key = key
247
- continue
248
- if value.startswith("[") and value.endswith("]"):
249
- inner = value[1:-1].strip()
250
- if inner:
251
- fields[key] = [part.strip().strip('"\'') for part in inner.split(",") if part.strip()]
252
- else:
253
- fields[key] = []
254
- current_list_key = None
255
- continue
256
- fields[key] = value.strip('"\'')
257
- current_list_key = None
258
-
259
- return {}, text
260
-
261
-
262
- def _normalize_internal_ref(raw: str) -> str:
263
- value = raw.strip()
264
- if not value:
265
- return ""
266
- value = value.split("|", 1)[0].strip()
267
- value = value.split("#", 1)[0].strip()
268
- value = value.replace("\\", "/")
269
- while value.startswith("./"):
270
- value = value[2:]
271
- if value.startswith("/"):
272
- value = value[1:]
273
- return value.strip()
274
-
275
-
276
- def _extract_references(text: str) -> tuple[list[str], list[str], list[str]]:
277
- issues = sorted(set(f"BK-{m}" for m in RE_BK.findall(text)))
278
- knowledge_ids = sorted(
279
- set(f"{m[0]}-{m[1]}" for m in RE_KNOWLEDGE_ID.findall(text))
280
- )
281
- raw_paths = RE_REPO_PATH.findall(text)
282
- paths: list[str] = []
283
- seen: set[str] = set()
284
- for rp in raw_paths:
285
- normalised = _normalize_internal_ref(rp)
286
- if normalised and normalised not in seen:
287
- seen.add(normalised)
288
- paths.append(normalised)
289
- return issues, knowledge_ids, paths
290
-
291
-
292
- def _extract_wiki_links(text: str) -> list[str]:
293
- links: list[str] = []
294
- seen: set[str] = set()
295
- for raw in RE_WIKI_LINK.findall(text):
296
- normalised = _normalize_internal_ref(raw)
297
- if normalised and normalised not in seen:
298
- seen.add(normalised)
299
- links.append(normalised)
300
- return links
301
-
302
-
303
- def _extract_markdown_links(text: str) -> list[str]:
304
- links: list[str] = []
305
- seen: set[str] = set()
306
- for raw in RE_MARKDOWN_LINK.findall(text):
307
- target = raw.strip().strip('<>')
308
- lower = target.lower()
309
- if not target or lower.startswith(("http://", "https://", "mailto:", "#")) or "://" in target:
310
- continue
311
- # Markdown links may include optional titles: [x](path.md "title").
312
- if ' "' in target:
313
- target = target.split(' "', 1)[0]
314
- if " '" in target:
315
- target = target.split(" '", 1)[0]
316
- normalised = _normalize_internal_ref(target)
317
- if normalised and normalised not in seen:
318
- seen.add(normalised)
319
- links.append(normalised)
320
- return links
321
-
322
-
323
- def _extract_skill_refs(text: str, path_refs: list[str], wiki_links: list[str]) -> list[str]:
324
- refs = set(match.lower() for match in RE_SKILL_REF.findall(text))
325
- for ref in path_refs + wiki_links:
326
- parts = [part for part in ref.split("/") if part]
327
- for marker in ("skills", "skills-claude"):
328
- if marker in parts:
329
- idx = parts.index(marker)
330
- if idx + 1 < len(parts):
331
- refs.add(parts[idx + 1].lower())
332
- return sorted(refs)
333
-
334
-
335
- def _extract_template_refs(path_refs: list[str], wiki_links: list[str]) -> list[str]:
336
- refs: set[str] = set()
337
- for ref in path_refs + wiki_links:
338
- norm = _normalize_internal_ref(ref)
339
- if "/templates/" in f"/{norm}":
340
- refs.add(norm)
341
- return sorted(refs)
342
-
343
-
344
- def _extract_release_refs(text: str) -> list[str]:
345
- return sorted(set(RE_RELEASE_REF.findall(text)))
346
-
347
-
348
- def _compute_file_hash(md_file: Path) -> str:
349
- content = md_file.read_bytes()
350
- return hashlib.sha256(content).hexdigest()
351
-
352
-
192
+
193
+
194
+ def _extract_title(text: str) -> str:
195
+ for line in text.splitlines():
196
+ stripped = line.strip()
197
+ if stripped.startswith("# "):
198
+ return stripped[2:].strip()
199
+ return ""
200
+
201
+
202
+ def _extract_headings(text: str) -> list[str]:
203
+ headings: list[str] = []
204
+ for line in text.splitlines():
205
+ stripped = line.strip()
206
+ if not stripped.startswith("#"):
207
+ continue
208
+ level = len(stripped) - len(stripped.lstrip("#"))
209
+ if 1 <= level <= 6 and len(stripped) > level and stripped[level] == " ":
210
+ headings.append(stripped[level + 1:].strip())
211
+ return headings
212
+
213
+
214
+ def _parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
215
+ lines = text.splitlines()
216
+ if not lines or lines[0].strip() != "---":
217
+ return {}, text
218
+
219
+ fields: dict[str, Any] = {}
220
+ current_list_key: str | None = None
221
+ for idx in range(1, len(lines)):
222
+ raw = lines[idx]
223
+ stripped = raw.strip()
224
+ if stripped in {"---", "..."}:
225
+ body = "\n".join(lines[idx + 1:])
226
+ if text.endswith("\n"):
227
+ body += "\n"
228
+ return fields, body
229
+ if not stripped:
230
+ current_list_key = None
231
+ continue
232
+ if stripped.startswith("- ") and current_list_key and isinstance(fields.get(current_list_key), list):
233
+ fields[current_list_key].append(stripped[2:].strip().strip('"\''))
234
+ continue
235
+ if ":" not in raw:
236
+ current_list_key = None
237
+ continue
238
+ key, value = raw.split(":", 1)
239
+ key = key.strip()
240
+ value = value.strip()
241
+ if not key:
242
+ current_list_key = None
243
+ continue
244
+ if not value:
245
+ fields[key] = []
246
+ current_list_key = key
247
+ continue
248
+ if value.startswith("[") and value.endswith("]"):
249
+ inner = value[1:-1].strip()
250
+ if inner:
251
+ fields[key] = [part.strip().strip('"\'') for part in inner.split(",") if part.strip()]
252
+ else:
253
+ fields[key] = []
254
+ current_list_key = None
255
+ continue
256
+ fields[key] = value.strip('"\'')
257
+ current_list_key = None
258
+
259
+ return {}, text
260
+
261
+
262
+ def _normalize_internal_ref(raw: str) -> str:
263
+ value = raw.strip()
264
+ if not value:
265
+ return ""
266
+ value = value.split("|", 1)[0].strip()
267
+ value = value.split("#", 1)[0].strip()
268
+ value = value.replace("\\", "/")
269
+ while value.startswith("./"):
270
+ value = value[2:]
271
+ if value.startswith("/"):
272
+ value = value[1:]
273
+ return value.strip()
274
+
275
+
276
+ def _extract_references(text: str) -> tuple[list[str], list[str], list[str]]:
277
+ issues = sorted(set(f"BK-{m}" for m in RE_BK.findall(text)))
278
+ knowledge_ids = sorted(
279
+ set(f"{m[0]}-{m[1]}" for m in RE_KNOWLEDGE_ID.findall(text))
280
+ )
281
+ raw_paths = RE_REPO_PATH.findall(text)
282
+ paths: list[str] = []
283
+ seen: set[str] = set()
284
+ for rp in raw_paths:
285
+ normalised = _normalize_internal_ref(rp)
286
+ if normalised and normalised not in seen:
287
+ seen.add(normalised)
288
+ paths.append(normalised)
289
+ return issues, knowledge_ids, paths
290
+
291
+
292
+ def _extract_wiki_links(text: str) -> list[str]:
293
+ links: list[str] = []
294
+ seen: set[str] = set()
295
+ for raw in RE_WIKI_LINK.findall(text):
296
+ normalised = _normalize_internal_ref(raw)
297
+ if normalised and normalised not in seen:
298
+ seen.add(normalised)
299
+ links.append(normalised)
300
+ return links
301
+
302
+
303
+ def _extract_markdown_links(text: str) -> list[str]:
304
+ links: list[str] = []
305
+ seen: set[str] = set()
306
+ for raw in RE_MARKDOWN_LINK.findall(text):
307
+ target = raw.strip().strip('<>')
308
+ lower = target.lower()
309
+ if not target or lower.startswith(("http://", "https://", "mailto:", "#")) or "://" in target:
310
+ continue
311
+ # Markdown links may include optional titles: [x](path.md "title").
312
+ if ' "' in target:
313
+ target = target.split(' "', 1)[0]
314
+ if " '" in target:
315
+ target = target.split(" '", 1)[0]
316
+ normalised = _normalize_internal_ref(target)
317
+ if normalised and normalised not in seen:
318
+ seen.add(normalised)
319
+ links.append(normalised)
320
+ return links
321
+
322
+
323
+ def _extract_skill_refs(text: str, path_refs: list[str], wiki_links: list[str]) -> list[str]:
324
+ refs = set(match.lower() for match in RE_SKILL_REF.findall(text))
325
+ for ref in path_refs + wiki_links:
326
+ parts = [part for part in ref.split("/") if part]
327
+ for marker in ("skills", "skills-claude"):
328
+ if marker in parts:
329
+ idx = parts.index(marker)
330
+ if idx + 1 < len(parts):
331
+ refs.add(parts[idx + 1].lower())
332
+ return sorted(refs)
333
+
334
+
335
+ def _extract_template_refs(path_refs: list[str], wiki_links: list[str]) -> list[str]:
336
+ refs: set[str] = set()
337
+ for ref in path_refs + wiki_links:
338
+ norm = _normalize_internal_ref(ref)
339
+ if "/templates/" in f"/{norm}":
340
+ refs.add(norm)
341
+ return sorted(refs)
342
+
343
+
344
+ def _extract_release_refs(text: str) -> list[str]:
345
+ return sorted(set(RE_RELEASE_REF.findall(text)))
346
+
347
+
348
+ def _compute_file_hash(md_file: Path) -> str:
349
+ content = md_file.read_bytes()
350
+ return hashlib.sha256(content).hexdigest()
351
+
352
+
353
353
  def _parse_doc_record(md_file: Path, root: Path) -> dict[str, Any]:
354
354
  rel = _display_scan_path(md_file, root)
355
355
  text = md_file.read_text(encoding="utf-8", errors="replace")
356
- frontmatter_fields, body_text = _parse_frontmatter(text)
357
- title = str(
358
- frontmatter_fields.get("title")
359
- or _extract_title(body_text)
360
- or md_file.stem.replace("_", " ").replace("-", " ")
361
- )
362
- headings = _extract_headings(body_text)
363
- issues, knowledge_ids, path_refs = _extract_references(text)
364
- wiki_links = _extract_wiki_links(text)
365
- markdown_links = _extract_markdown_links(text)
366
- path_refs = sorted(set(path_refs + markdown_links))
367
- family = classify_family(rel)
368
- role = classify_role(rel)
369
- skill_refs = _extract_skill_refs(text, path_refs, wiki_links)
370
- template_refs = _extract_template_refs(path_refs, wiki_links)
371
- release_refs = _extract_release_refs(text)
372
- return {
373
- "id": rel,
374
- "path": rel,
375
- "title": title,
376
- "family": family,
377
- "role": role,
378
- "trust_zone": "medium",
379
- "body_markdown": body_text,
380
- "issues": issues,
381
- "knowledge_ids": knowledge_ids,
382
- "headings": headings,
383
- "frontmatter_fields": frontmatter_fields,
384
- "skill_refs": skill_refs,
385
- "template_refs": template_refs,
386
- "release_refs": release_refs,
387
- "lane_refs": [],
388
- "wiki_links": wiki_links,
389
- "path_refs": path_refs,
390
- "outgoing_paths": path_refs,
391
- }
392
-
393
-
356
+ frontmatter_fields, body_text = _parse_frontmatter(text)
357
+ title = str(
358
+ frontmatter_fields.get("title")
359
+ or _extract_title(body_text)
360
+ or md_file.stem.replace("_", " ").replace("-", " ")
361
+ )
362
+ headings = _extract_headings(body_text)
363
+ issues, knowledge_ids, path_refs = _extract_references(text)
364
+ wiki_links = _extract_wiki_links(text)
365
+ markdown_links = _extract_markdown_links(text)
366
+ path_refs = sorted(set(path_refs + markdown_links))
367
+ family = classify_family(rel)
368
+ role = classify_role(rel)
369
+ skill_refs = _extract_skill_refs(text, path_refs, wiki_links)
370
+ template_refs = _extract_template_refs(path_refs, wiki_links)
371
+ release_refs = _extract_release_refs(text)
372
+ return {
373
+ "id": rel,
374
+ "path": rel,
375
+ "title": title,
376
+ "family": family,
377
+ "role": role,
378
+ "trust_zone": "medium",
379
+ "body_markdown": body_text,
380
+ "issues": issues,
381
+ "knowledge_ids": knowledge_ids,
382
+ "headings": headings,
383
+ "frontmatter_fields": frontmatter_fields,
384
+ "skill_refs": skill_refs,
385
+ "template_refs": template_refs,
386
+ "release_refs": release_refs,
387
+ "lane_refs": [],
388
+ "wiki_links": wiki_links,
389
+ "path_refs": path_refs,
390
+ "outgoing_paths": path_refs,
391
+ }
392
+
393
+
394
394
  def list_indexable_markdown_files(
395
395
  root: Path,
396
396
  scan_roots: list[Path],
@@ -413,11 +413,11 @@ def collect_indexable_markdown_files(
413
413
  if not scan_root.exists():
414
414
  print(f"[atlas] Warning: scan root does not exist, skipping: {scan_root}", file=sys.stderr)
415
415
  continue
416
- if scan_root.is_file() and scan_root.suffix.lower() == ".md":
417
- candidates = [scan_root]
418
- elif scan_root.is_dir():
419
- candidates = [p for p in sorted(scan_root.rglob("*.md")) if p.is_file()]
420
- else:
416
+ if scan_root.is_file() and scan_root.suffix.lower() == ".md":
417
+ candidates = [scan_root]
418
+ elif scan_root.is_dir():
419
+ candidates = [p for p in sorted(scan_root.rglob("*.md")) if p.is_file()]
420
+ else:
421
421
  candidates = []
422
422
 
423
423
  for md_file in candidates:
@@ -455,37 +455,37 @@ def collect_indexable_markdown_files(
455
455
  "skipped_count": len(skipped_files),
456
456
  "skipped_files": skipped_files,
457
457
  }
458
-
459
-
460
- # ---------------------------------------------------------------------------
461
- # Incremental build
462
- # ---------------------------------------------------------------------------
463
- def _empty_atlas_state() -> dict[str, Any]:
464
- return {"version": ATLAS_STATE_VERSION, "documents": {}}
465
-
466
-
467
- def _atlas_state_path(atlas_dir: Path) -> Path:
468
- return atlas_dir / "ATLAS_STATE.json"
469
-
470
-
471
- def load_atlas_state(atlas_dir: Path) -> dict[str, Any]:
472
- state_path = _atlas_state_path(atlas_dir)
473
- if not state_path.exists():
474
- return _empty_atlas_state()
475
- try:
476
- data = json.loads(state_path.read_text(encoding="utf-8"))
477
- except (OSError, json.JSONDecodeError):
478
- return _empty_atlas_state()
479
- if not isinstance(data, dict):
480
- return _empty_atlas_state()
481
- if data.get("version") != ATLAS_STATE_VERSION:
482
- return _empty_atlas_state()
483
- documents = data.get("documents")
484
- if not isinstance(documents, dict):
485
- return _empty_atlas_state()
486
- return {"version": ATLAS_STATE_VERSION, "generated": data.get("generated"), "documents": documents}
487
-
488
-
458
+
459
+
460
+ # ---------------------------------------------------------------------------
461
+ # Incremental build
462
+ # ---------------------------------------------------------------------------
463
+ def _empty_atlas_state() -> dict[str, Any]:
464
+ return {"version": ATLAS_STATE_VERSION, "documents": {}}
465
+
466
+
467
+ def _atlas_state_path(atlas_dir: Path) -> Path:
468
+ return atlas_dir / "ATLAS_STATE.json"
469
+
470
+
471
+ def load_atlas_state(atlas_dir: Path) -> dict[str, Any]:
472
+ state_path = _atlas_state_path(atlas_dir)
473
+ if not state_path.exists():
474
+ return _empty_atlas_state()
475
+ try:
476
+ data = json.loads(state_path.read_text(encoding="utf-8"))
477
+ except (OSError, json.JSONDecodeError):
478
+ return _empty_atlas_state()
479
+ if not isinstance(data, dict):
480
+ return _empty_atlas_state()
481
+ if data.get("version") != ATLAS_STATE_VERSION:
482
+ return _empty_atlas_state()
483
+ documents = data.get("documents")
484
+ if not isinstance(documents, dict):
485
+ return _empty_atlas_state()
486
+ return {"version": ATLAS_STATE_VERSION, "generated": data.get("generated"), "documents": documents}
487
+
488
+
489
489
  def save_atlas_state(state: dict[str, Any], atlas_dir: Path) -> Path:
490
490
  atlas_dir.mkdir(parents=True, exist_ok=True)
491
491
  state_path = _atlas_state_path(atlas_dir)
@@ -696,8 +696,8 @@ def write_wiki_pages_and_provenance(
696
696
  "changes_path": str(changes_path),
697
697
  "changes": change_set,
698
698
  }
699
-
700
-
699
+
700
+
701
701
  def build_docs_incremental(
702
702
  root: Path,
703
703
  atlas_dir: Path,
@@ -709,67 +709,67 @@ def build_docs_incremental(
709
709
  prior_documents = prior_state.get("documents", {})
710
710
  scan_result = collect_indexable_markdown_files(root, scan_roots, exclude_frags)
711
711
  current_files = scan_result["files"]
712
-
713
- current_rel_paths = {}
714
- for md_file in current_files:
715
- try:
716
- rel = md_file.relative_to(root).as_posix()
717
- except ValueError:
718
- rel = md_file.as_posix()
719
- current_rel_paths[rel] = md_file
720
-
721
- next_documents: dict[str, Any] = {}
722
- reused_count = 0
723
- reparsed_count = 0
724
-
725
- for rel, md_file in current_rel_paths.items():
726
- stats = md_file.stat()
727
- current_mtime = stats.st_mtime_ns
728
- prior_record = prior_documents.get(rel)
729
- prior_doc = prior_record.get("doc") if isinstance(prior_record, dict) else None
730
-
731
- if (
732
- isinstance(prior_record, dict)
733
- and isinstance(prior_doc, dict)
734
- and prior_record.get("mtime") == current_mtime
735
- ):
736
- next_documents[rel] = prior_record
737
- reused_count += 1
738
- continue
739
-
740
- current_hash = _compute_file_hash(md_file)
741
- if (
742
- isinstance(prior_record, dict)
743
- and isinstance(prior_doc, dict)
744
- and prior_record.get("hash") == current_hash
745
- ):
746
- next_documents[rel] = {
747
- "mtime": current_mtime,
748
- "hash": current_hash,
749
- "last_indexed": prior_record.get("last_indexed") or generated,
750
- "doc": prior_doc,
751
- }
752
- reused_count += 1
753
- continue
754
-
755
- next_documents[rel] = {
756
- "mtime": current_mtime,
757
- "hash": current_hash,
758
- "last_indexed": generated,
759
- "doc": _parse_doc_record(md_file, root=root),
760
- }
761
- reparsed_count += 1
762
-
763
- removed_count = len(set(prior_documents.keys()) - set(current_rel_paths.keys()))
764
- docs = sorted(
765
- [record["doc"] for record in next_documents.values()],
766
- key=lambda d: d["id"],
767
- )
768
- next_state = {
769
- "version": ATLAS_STATE_VERSION,
770
- "generated": generated,
771
- "documents": next_documents,
772
- }
712
+
713
+ current_rel_paths = {}
714
+ for md_file in current_files:
715
+ try:
716
+ rel = md_file.relative_to(root).as_posix()
717
+ except ValueError:
718
+ rel = md_file.as_posix()
719
+ current_rel_paths[rel] = md_file
720
+
721
+ next_documents: dict[str, Any] = {}
722
+ reused_count = 0
723
+ reparsed_count = 0
724
+
725
+ for rel, md_file in current_rel_paths.items():
726
+ stats = md_file.stat()
727
+ current_mtime = stats.st_mtime_ns
728
+ prior_record = prior_documents.get(rel)
729
+ prior_doc = prior_record.get("doc") if isinstance(prior_record, dict) else None
730
+
731
+ if (
732
+ isinstance(prior_record, dict)
733
+ and isinstance(prior_doc, dict)
734
+ and prior_record.get("mtime") == current_mtime
735
+ ):
736
+ next_documents[rel] = prior_record
737
+ reused_count += 1
738
+ continue
739
+
740
+ current_hash = _compute_file_hash(md_file)
741
+ if (
742
+ isinstance(prior_record, dict)
743
+ and isinstance(prior_doc, dict)
744
+ and prior_record.get("hash") == current_hash
745
+ ):
746
+ next_documents[rel] = {
747
+ "mtime": current_mtime,
748
+ "hash": current_hash,
749
+ "last_indexed": prior_record.get("last_indexed") or generated,
750
+ "doc": prior_doc,
751
+ }
752
+ reused_count += 1
753
+ continue
754
+
755
+ next_documents[rel] = {
756
+ "mtime": current_mtime,
757
+ "hash": current_hash,
758
+ "last_indexed": generated,
759
+ "doc": _parse_doc_record(md_file, root=root),
760
+ }
761
+ reparsed_count += 1
762
+
763
+ removed_count = len(set(prior_documents.keys()) - set(current_rel_paths.keys()))
764
+ docs = sorted(
765
+ [record["doc"] for record in next_documents.values()],
766
+ key=lambda d: d["id"],
767
+ )
768
+ next_state = {
769
+ "version": ATLAS_STATE_VERSION,
770
+ "generated": generated,
771
+ "documents": next_documents,
772
+ }
773
773
  build_stats = {
774
774
  "discovered_count": len(current_rel_paths),
775
775
  "scanned_count": scan_result["scanned_count"],
@@ -780,138 +780,138 @@ def build_docs_incremental(
780
780
  "reparsed_count": reparsed_count,
781
781
  "removed_count": removed_count,
782
782
  }
783
- return docs, next_state, build_stats
784
-
785
-
786
- # ---------------------------------------------------------------------------
787
- # Graph builder
788
- # ---------------------------------------------------------------------------
789
- def _build_doc_alias_map(docs: list[dict[str, Any]]) -> dict[str, set[str]]:
790
- alias_map: dict[str, set[str]] = {}
791
- for doc in docs:
792
- doc_id = doc["id"]
793
- path_obj = Path(doc_id)
794
- aliases = {
795
- doc_id,
796
- doc_id.lower(),
797
- path_obj.name,
798
- path_obj.name.lower(),
799
- path_obj.stem,
800
- path_obj.stem.lower(),
801
- }
802
- if doc_id.lower().endswith(".md"):
803
- no_ext = doc_id[:-3]
804
- no_ext_path = Path(no_ext)
805
- aliases.update({no_ext, no_ext.lower(), no_ext_path.name, no_ext_path.name.lower()})
806
- for alias in aliases:
807
- alias_map.setdefault(alias, set()).add(doc_id)
808
- return alias_map
809
-
810
-
811
- def _resolve_doc_reference(raw: str, alias_map: dict[str, set[str]]) -> str | None:
812
- normalised = _normalize_internal_ref(raw)
813
- if not normalised:
814
- return None
815
- candidates = [normalised, normalised.lower()]
816
- if not normalised.lower().endswith(".md"):
817
- candidates.extend([f"{normalised}.md", f"{normalised.lower()}.md"])
818
- for candidate in candidates:
819
- matches = alias_map.get(candidate)
820
- if matches and len(matches) == 1:
821
- return next(iter(matches))
822
- return None
823
-
824
-
825
- def build_graph(docs: list[dict[str, Any]]) -> dict[str, Any]:
826
- alias_map = _build_doc_alias_map(docs)
827
-
828
- nodes = [
829
- {
830
- "id": d["id"],
831
- "title": d["title"],
832
- "family": d["family"],
833
- "role": d["role"],
834
- "trust_zone": d.get("trust_zone", "medium"),
835
- }
836
- for d in docs
837
- ]
838
-
839
- edges: list[dict[str, Any]] = []
840
-
841
- for doc in docs:
842
- src = doc["id"]
843
-
844
- for issue in doc.get("issues", []):
845
- edges.append({"source": src, "target": issue, "type": "references_issue", "label": issue})
846
-
847
- for kid in doc.get("knowledge_ids", []):
848
- edges.append({"source": src, "target": kid, "type": "references_knowledge_object", "label": kid})
849
-
850
- for rp in doc.get("path_refs", doc.get("outgoing_paths", [])):
851
- target = _resolve_doc_reference(rp, alias_map)
852
- if target:
853
- edges.append({"source": src, "target": target, "type": "references_path", "label": rp})
854
-
855
- for wiki_ref in doc.get("wiki_links", []):
856
- target = _resolve_doc_reference(wiki_ref, alias_map)
857
- if target:
858
- edges.append({"source": src, "target": target, "type": "references_wiki_link", "label": wiki_ref})
859
-
860
- for skill_ref in doc.get("skill_refs", []):
861
- edges.append({"source": src, "target": f"__skill__{skill_ref}", "type": "references_skill", "label": skill_ref})
862
-
863
- for template_ref in doc.get("template_refs", []):
864
- edges.append({"source": src, "target": f"__template__{template_ref}", "type": "references_template", "label": template_ref})
865
-
866
- family_groups: dict[str, list[str]] = {}
867
- for doc in docs:
868
- family_groups.setdefault(doc["family"], []).append(doc["id"])
869
- for family, members in family_groups.items():
870
- if len(members) < 2:
871
- continue
872
- for mid in members:
873
- edges.append({"source": mid, "target": f"__family__{family}", "type": "same_family", "label": family})
874
-
875
- return {"nodes": nodes, "edges": edges}
876
-
877
-
878
- # ---------------------------------------------------------------------------
879
- # Summary markdown
880
- # ---------------------------------------------------------------------------
783
+ return docs, next_state, build_stats
784
+
785
+
786
+ # ---------------------------------------------------------------------------
787
+ # Graph builder
788
+ # ---------------------------------------------------------------------------
789
+ def _build_doc_alias_map(docs: list[dict[str, Any]]) -> dict[str, set[str]]:
790
+ alias_map: dict[str, set[str]] = {}
791
+ for doc in docs:
792
+ doc_id = doc["id"]
793
+ path_obj = Path(doc_id)
794
+ aliases = {
795
+ doc_id,
796
+ doc_id.lower(),
797
+ path_obj.name,
798
+ path_obj.name.lower(),
799
+ path_obj.stem,
800
+ path_obj.stem.lower(),
801
+ }
802
+ if doc_id.lower().endswith(".md"):
803
+ no_ext = doc_id[:-3]
804
+ no_ext_path = Path(no_ext)
805
+ aliases.update({no_ext, no_ext.lower(), no_ext_path.name, no_ext_path.name.lower()})
806
+ for alias in aliases:
807
+ alias_map.setdefault(alias, set()).add(doc_id)
808
+ return alias_map
809
+
810
+
811
+ def _resolve_doc_reference(raw: str, alias_map: dict[str, set[str]]) -> str | None:
812
+ normalised = _normalize_internal_ref(raw)
813
+ if not normalised:
814
+ return None
815
+ candidates = [normalised, normalised.lower()]
816
+ if not normalised.lower().endswith(".md"):
817
+ candidates.extend([f"{normalised}.md", f"{normalised.lower()}.md"])
818
+ for candidate in candidates:
819
+ matches = alias_map.get(candidate)
820
+ if matches and len(matches) == 1:
821
+ return next(iter(matches))
822
+ return None
823
+
824
+
825
+ def build_graph(docs: list[dict[str, Any]]) -> dict[str, Any]:
826
+ alias_map = _build_doc_alias_map(docs)
827
+
828
+ nodes = [
829
+ {
830
+ "id": d["id"],
831
+ "title": d["title"],
832
+ "family": d["family"],
833
+ "role": d["role"],
834
+ "trust_zone": d.get("trust_zone", "medium"),
835
+ }
836
+ for d in docs
837
+ ]
838
+
839
+ edges: list[dict[str, Any]] = []
840
+
841
+ for doc in docs:
842
+ src = doc["id"]
843
+
844
+ for issue in doc.get("issues", []):
845
+ edges.append({"source": src, "target": issue, "type": "references_issue", "label": issue})
846
+
847
+ for kid in doc.get("knowledge_ids", []):
848
+ edges.append({"source": src, "target": kid, "type": "references_knowledge_object", "label": kid})
849
+
850
+ for rp in doc.get("path_refs", doc.get("outgoing_paths", [])):
851
+ target = _resolve_doc_reference(rp, alias_map)
852
+ if target:
853
+ edges.append({"source": src, "target": target, "type": "references_path", "label": rp})
854
+
855
+ for wiki_ref in doc.get("wiki_links", []):
856
+ target = _resolve_doc_reference(wiki_ref, alias_map)
857
+ if target:
858
+ edges.append({"source": src, "target": target, "type": "references_wiki_link", "label": wiki_ref})
859
+
860
+ for skill_ref in doc.get("skill_refs", []):
861
+ edges.append({"source": src, "target": f"__skill__{skill_ref}", "type": "references_skill", "label": skill_ref})
862
+
863
+ for template_ref in doc.get("template_refs", []):
864
+ edges.append({"source": src, "target": f"__template__{template_ref}", "type": "references_template", "label": template_ref})
865
+
866
+ family_groups: dict[str, list[str]] = {}
867
+ for doc in docs:
868
+ family_groups.setdefault(doc["family"], []).append(doc["id"])
869
+ for family, members in family_groups.items():
870
+ if len(members) < 2:
871
+ continue
872
+ for mid in members:
873
+ edges.append({"source": mid, "target": f"__family__{family}", "type": "same_family", "label": family})
874
+
875
+ return {"nodes": nodes, "edges": edges}
876
+
877
+
878
+ # ---------------------------------------------------------------------------
879
+ # Summary markdown
880
+ # ---------------------------------------------------------------------------
881
881
  def build_summary(
882
- docs: list[dict[str, Any]],
883
- graph: dict[str, Any],
884
- generated: str,
882
+ docs: list[dict[str, Any]],
883
+ graph: dict[str, Any],
884
+ generated: str,
885
885
  stats: dict[str, Any] | None,
886
- root: Path,
887
- scan_roots: list[Path],
888
- exclude_frags: list[str],
889
- ) -> str:
890
- family_counts: dict[str, int] = {}
891
- for d in docs:
892
- family_counts[d["family"]] = family_counts.get(d["family"], 0) + 1
893
-
894
- edge_type_counts: dict[str, int] = {}
895
- for e in graph["edges"]:
896
- et = e["type"]
897
- edge_type_counts[et] = edge_type_counts.get(et, 0) + 1
898
-
899
- lines: list[str] = [
886
+ root: Path,
887
+ scan_roots: list[Path],
888
+ exclude_frags: list[str],
889
+ ) -> str:
890
+ family_counts: dict[str, int] = {}
891
+ for d in docs:
892
+ family_counts[d["family"]] = family_counts.get(d["family"], 0) + 1
893
+
894
+ edge_type_counts: dict[str, int] = {}
895
+ for e in graph["edges"]:
896
+ et = e["type"]
897
+ edge_type_counts[et] = edge_type_counts.get(et, 0) + 1
898
+
899
+ lines: list[str] = [
900
900
  "# SDTK-WIKI Graph Summary",
901
- "",
902
- f"Generated: {generated}",
903
- f"Project root: {root}",
904
- "",
905
- "## Document Counts",
906
- "",
907
- f"Total documents indexed: {len(docs)}",
908
- "",
909
- "| Family | Count |",
910
- "|--------|-------|",
911
- ]
912
- for fam, cnt in sorted(family_counts.items(), key=lambda x: -x[1]):
913
- lines.append(f"| {fam} | {cnt} |")
914
-
901
+ "",
902
+ f"Generated: {generated}",
903
+ f"Project root: {root}",
904
+ "",
905
+ "## Document Counts",
906
+ "",
907
+ f"Total documents indexed: {len(docs)}",
908
+ "",
909
+ "| Family | Count |",
910
+ "|--------|-------|",
911
+ ]
912
+ for fam, cnt in sorted(family_counts.items(), key=lambda x: -x[1]):
913
+ lines.append(f"| {fam} | {cnt} |")
914
+
915
915
  if stats is not None:
916
916
  lines += [
917
917
  "",
@@ -936,107 +936,107 @@ def build_summary(
936
936
  ]
937
937
  for skipped in skipped_files:
938
938
  lines.append(f"| {skipped['path']} | {skipped['reason']} |")
939
-
940
- lines += [
941
- "",
942
- "## Graph Summary",
943
- "",
944
- f"Total nodes: {len(graph['nodes'])}",
945
- f"Total edges: {len(graph['edges'])}",
946
- "",
947
- "## Scan Roots",
948
- "",
949
- ]
950
- for sr in scan_roots:
951
- lines.append(f"- {sr}")
952
-
953
- lines += [
954
- "",
955
- "## Exclusions Applied",
956
- "",
957
- ]
958
- for frag in exclude_frags:
959
- lines.append(f"- {frag}")
960
-
961
- return "\n".join(lines) + "\n"
962
-
963
-
964
- # ---------------------------------------------------------------------------
965
- # Static viewer
966
- # ---------------------------------------------------------------------------
967
- _FAMILY_COLORS = {
968
- "governance": "#58a6ff",
969
- "guide": "#14b8a6",
970
- "backlog": "#d2a8ff",
971
- "spec": "#f0883e",
972
- "architecture": "#3fb950",
973
- "database": "#a371f7",
974
- "api": "#f778ba",
975
- "qa": "#79c0ff",
976
- "design": "#ffa657",
977
- "dev": "#56d364",
978
- "product": "#e3b341",
979
- "skill": "#58a6ff",
980
- "template": "#f0883e",
981
- "root-readme": "#e3b341",
982
- "other-markdown": "#8b949e",
983
- }
984
-
985
-
986
- def build_viewer(index: dict, graph: dict, generated: str) -> str:
987
- if not _VIEWER_TEMPLATE_PATH.exists():
988
- raise FileNotFoundError(f"Viewer template not found: {_VIEWER_TEMPLATE_PATH}")
989
- index_json = _json_for_inline_script(index)
990
- graph_json = _json_for_inline_script(graph)
991
- family_colors_json = _json_for_inline_script(_FAMILY_COLORS)
992
- template = _VIEWER_TEMPLATE_PATH.read_text(encoding="utf-8")
993
- return (
994
- template
995
- .replace("__ATLAS_GENERATED__", generated)
996
- .replace("__ATLAS_INDEX_JSON__", index_json)
997
- .replace("__ATLAS_GRAPH_JSON__", graph_json)
998
- .replace("__ATLAS_FAMILY_COLORS_JSON__", family_colors_json)
999
- )
1000
-
1001
-
1002
- def copy_viewer_assets(atlas_dir: Path) -> list[Path]:
1003
- if not MERMAID_VENDOR_PATH.exists():
1004
- raise FileNotFoundError(f"Missing Mermaid runtime asset: {MERMAID_VENDOR_PATH}")
1005
- atlas_dir.mkdir(parents=True, exist_ok=True)
1006
- # Copy mermaid to atlas root (same location the viewer template expects)
1007
- destination = atlas_dir / MERMAID_ASSET_NAME
1008
- shutil.copyfile(MERMAID_VENDOR_PATH, destination)
1009
- return [destination]
1010
-
1011
-
1012
- # ---------------------------------------------------------------------------
1013
- # Main build
1014
- # ---------------------------------------------------------------------------
1015
- def build_atlas(
1016
- root: Path,
1017
- atlas_dir: Path,
1018
- scan_roots: list[Path] | None = None,
1019
- exclude_frags: list[str] | None = None,
1020
- verbose: bool = False,
1021
- ) -> dict[str, Any]:
1022
- generated = _now_utc()
1023
- frags = exclude_frags if exclude_frags is not None else DEFAULT_EXCLUDE_FRAGS
1024
- roots = scan_roots if scan_roots else [root]
1025
-
1026
- print(f"[atlas] Project root: {root}")
1027
- print(f"[atlas] Output dir: {atlas_dir}")
1028
- print(f"[atlas] Scan roots: {[str(r) for r in roots]}")
1029
-
1030
- atlas_dir.mkdir(parents=True, exist_ok=True)
1031
-
1032
- print("[atlas] Scanning markdown files...")
1033
- docs, state, stats = build_docs_incremental(
1034
- root=root,
1035
- atlas_dir=atlas_dir,
1036
- generated=generated,
1037
- scan_roots=roots,
1038
- exclude_frags=frags,
1039
- )
939
+
940
+ lines += [
941
+ "",
942
+ "## Graph Summary",
943
+ "",
944
+ f"Total nodes: {len(graph['nodes'])}",
945
+ f"Total edges: {len(graph['edges'])}",
946
+ "",
947
+ "## Scan Roots",
948
+ "",
949
+ ]
950
+ for sr in scan_roots:
951
+ lines.append(f"- {sr}")
952
+
953
+ lines += [
954
+ "",
955
+ "## Exclusions Applied",
956
+ "",
957
+ ]
958
+ for frag in exclude_frags:
959
+ lines.append(f"- {frag}")
960
+
961
+ return "\n".join(lines) + "\n"
962
+
963
+
964
+ # ---------------------------------------------------------------------------
965
+ # Static viewer
966
+ # ---------------------------------------------------------------------------
967
+ _FAMILY_COLORS = {
968
+ "governance": "#58a6ff",
969
+ "guide": "#14b8a6",
970
+ "backlog": "#d2a8ff",
971
+ "spec": "#f0883e",
972
+ "architecture": "#3fb950",
973
+ "database": "#a371f7",
974
+ "api": "#f778ba",
975
+ "qa": "#79c0ff",
976
+ "design": "#ffa657",
977
+ "dev": "#56d364",
978
+ "product": "#e3b341",
979
+ "skill": "#58a6ff",
980
+ "template": "#f0883e",
981
+ "root-readme": "#e3b341",
982
+ "other-markdown": "#8b949e",
983
+ }
984
+
985
+
986
+ def build_viewer(index: dict, graph: dict, generated: str) -> str:
987
+ if not _VIEWER_TEMPLATE_PATH.exists():
988
+ raise FileNotFoundError(f"Viewer template not found: {_VIEWER_TEMPLATE_PATH}")
989
+ index_json = _json_for_inline_script(index)
990
+ graph_json = _json_for_inline_script(graph)
991
+ family_colors_json = _json_for_inline_script(_FAMILY_COLORS)
992
+ template = _VIEWER_TEMPLATE_PATH.read_text(encoding="utf-8")
993
+ return (
994
+ template
995
+ .replace("__ATLAS_GENERATED__", generated)
996
+ .replace("__ATLAS_INDEX_JSON__", index_json)
997
+ .replace("__ATLAS_GRAPH_JSON__", graph_json)
998
+ .replace("__ATLAS_FAMILY_COLORS_JSON__", family_colors_json)
999
+ )
1000
+
1001
+
1002
+ def copy_viewer_assets(atlas_dir: Path) -> list[Path]:
1003
+ if not MERMAID_VENDOR_PATH.exists():
1004
+ raise FileNotFoundError(f"Missing Mermaid runtime asset: {MERMAID_VENDOR_PATH}")
1005
+ atlas_dir.mkdir(parents=True, exist_ok=True)
1006
+ # Copy mermaid to atlas root (same location the viewer template expects)
1007
+ destination = atlas_dir / MERMAID_ASSET_NAME
1008
+ shutil.copyfile(MERMAID_VENDOR_PATH, destination)
1009
+ return [destination]
1010
+
1011
+
1012
+ # ---------------------------------------------------------------------------
1013
+ # Main build
1014
+ # ---------------------------------------------------------------------------
1015
+ def build_atlas(
1016
+ root: Path,
1017
+ atlas_dir: Path,
1018
+ scan_roots: list[Path] | None = None,
1019
+ exclude_frags: list[str] | None = None,
1020
+ verbose: bool = False,
1021
+ ) -> dict[str, Any]:
1022
+ generated = _now_utc()
1023
+ frags = exclude_frags if exclude_frags is not None else DEFAULT_EXCLUDE_FRAGS
1024
+ roots = scan_roots if scan_roots else [root]
1025
+
1026
+ print(f"[atlas] Project root: {root}")
1027
+ print(f"[atlas] Output dir: {atlas_dir}")
1028
+ print(f"[atlas] Scan roots: {[str(r) for r in roots]}")
1029
+
1030
+ atlas_dir.mkdir(parents=True, exist_ok=True)
1031
+
1032
+ print("[atlas] Scanning markdown files...")
1033
+ docs, state, stats = build_docs_incremental(
1034
+ root=root,
1035
+ atlas_dir=atlas_dir,
1036
+ generated=generated,
1037
+ scan_roots=roots,
1038
+ exclude_frags=frags,
1039
+ )
1040
1040
  print(f"[atlas] Indexed {len(docs)} documents.")
1041
1041
  print(
1042
1042
  f"[atlas] Scan coverage: scanned {stats.get('scanned_count', len(docs))}, "
@@ -1050,7 +1050,7 @@ def build_atlas(
1050
1050
  )
1051
1051
  for skipped in stats.get("skipped_files", []):
1052
1052
  print(f"[atlas] Skipped markdown: {skipped['path']} ({skipped['reason']})")
1053
-
1053
+
1054
1054
  print("[atlas] Building graph...")
1055
1055
  graph = build_graph(docs)
1056
1056
  print(f"[atlas] Graph: {len(graph['nodes'])} nodes, {len(graph['edges'])} edges.")
@@ -1063,45 +1063,45 @@ def build_atlas(
1063
1063
  scan_roots=roots,
1064
1064
  )
1065
1065
  print(f"[atlas] Wiki pages: {wiki_result['page_count']}")
1066
-
1067
- index_data = {
1068
- "generated": generated,
1069
- "count": len(docs),
1070
- "documents": docs,
1071
- }
1072
-
1073
- save_atlas_state(state, atlas_dir=atlas_dir)
1074
-
1075
- index_path = atlas_dir / "SDTK_DOC_INDEX.json"
1076
- _write_text_lf(index_path, json.dumps(index_data, ensure_ascii=True, indent=2, sort_keys=False))
1077
-
1078
- graph_out = {
1079
- "generated": generated,
1080
- "node_count": len(graph["nodes"]),
1081
- "edge_count": len(graph["edges"]),
1082
- "nodes": graph["nodes"],
1083
- "edges": graph["edges"],
1084
- }
1085
- graph_path = atlas_dir / "SDTK_DOC_GRAPH.json"
1086
- _write_text_lf(graph_path, json.dumps(graph_out, ensure_ascii=True, indent=2, sort_keys=False))
1087
-
1088
- summary_text = build_summary(docs, graph, generated, stats=stats, root=root, scan_roots=roots, exclude_frags=frags)
1089
- summary_path = atlas_dir / "SDTK_DOC_ATLAS_SUMMARY.md"
1090
- _write_text_lf(summary_path, summary_text)
1091
-
1092
- viewer_html = build_viewer(index_data, graph_out, generated)
1093
- viewer_path = atlas_dir / "viewer.html"
1094
- _write_text_lf(viewer_path, viewer_html)
1095
-
1096
- for asset_path in copy_viewer_assets(atlas_dir=atlas_dir):
1097
- if verbose:
1098
- print(f"[atlas] Wrote asset: {asset_path.name}")
1099
-
1100
- print(f"[atlas] Done. Output: {atlas_dir}")
1101
- return {
1102
- "generated": generated,
1103
- "doc_count": len(docs),
1104
- "node_count": len(graph["nodes"]),
1066
+
1067
+ index_data = {
1068
+ "generated": generated,
1069
+ "count": len(docs),
1070
+ "documents": docs,
1071
+ }
1072
+
1073
+ save_atlas_state(state, atlas_dir=atlas_dir)
1074
+
1075
+ index_path = atlas_dir / "SDTK_DOC_INDEX.json"
1076
+ _write_text_lf(index_path, json.dumps(index_data, ensure_ascii=True, indent=2, sort_keys=False))
1077
+
1078
+ graph_out = {
1079
+ "generated": generated,
1080
+ "node_count": len(graph["nodes"]),
1081
+ "edge_count": len(graph["edges"]),
1082
+ "nodes": graph["nodes"],
1083
+ "edges": graph["edges"],
1084
+ }
1085
+ graph_path = atlas_dir / "SDTK_DOC_GRAPH.json"
1086
+ _write_text_lf(graph_path, json.dumps(graph_out, ensure_ascii=True, indent=2, sort_keys=False))
1087
+
1088
+ summary_text = build_summary(docs, graph, generated, stats=stats, root=root, scan_roots=roots, exclude_frags=frags)
1089
+ summary_path = atlas_dir / "SDTK_DOC_ATLAS_SUMMARY.md"
1090
+ _write_text_lf(summary_path, summary_text)
1091
+
1092
+ viewer_html = build_viewer(index_data, graph_out, generated)
1093
+ viewer_path = atlas_dir / "viewer.html"
1094
+ _write_text_lf(viewer_path, viewer_html)
1095
+
1096
+ for asset_path in copy_viewer_assets(atlas_dir=atlas_dir):
1097
+ if verbose:
1098
+ print(f"[atlas] Wrote asset: {asset_path.name}")
1099
+
1100
+ print(f"[atlas] Done. Output: {atlas_dir}")
1101
+ return {
1102
+ "generated": generated,
1103
+ "doc_count": len(docs),
1104
+ "node_count": len(graph["nodes"]),
1105
1105
  "edge_count": len(graph["edges"]),
1106
1106
  "stats": stats,
1107
1107
  "atlas_dir": str(atlas_dir),
@@ -1112,84 +1112,84 @@ def build_atlas(
1112
1112
  "changes_path": wiki_result["changes_path"],
1113
1113
  "changes": wiki_result["changes"],
1114
1114
  }
1115
-
1116
-
1117
- # ---------------------------------------------------------------------------
1118
- # CLI entry point
1119
- # ---------------------------------------------------------------------------
1120
- def main() -> int:
1121
- parser = argparse.ArgumentParser(
1115
+
1116
+
1117
+ # ---------------------------------------------------------------------------
1118
+ # CLI entry point
1119
+ # ---------------------------------------------------------------------------
1120
+ def main() -> int:
1121
+ parser = argparse.ArgumentParser(
1122
1122
  description="SDTK-WIKI Builder -- build a local document graph, wiki pages, and viewer.",
1123
- formatter_class=argparse.RawDescriptionHelpFormatter,
1124
- )
1125
- parser.add_argument(
1126
- "--project-root",
1127
- required=True,
1128
- help="Absolute path to the project root to scan.",
1129
- )
1130
- parser.add_argument(
1131
- "--output-dir",
1132
- required=True,
1133
- help="Directory to write atlas artifacts into.",
1134
- )
1135
- parser.add_argument(
1136
- "--scan-root",
1137
- dest="scan_roots",
1138
- action="append",
1139
- metavar="PATH",
1140
- default=None,
1141
- help="Explicit scan root (repeatable). Defaults to project root.",
1142
- )
1143
- parser.add_argument(
1144
- "--exclude",
1145
- dest="excludes",
1146
- action="append",
1147
- metavar="FRAG",
1148
- default=None,
1149
- help="Exclusion path fragment (repeatable). Defaults to standard set.",
1150
- )
1151
- parser.add_argument(
1152
- "--verbose",
1153
- action="store_true",
1154
- default=False,
1155
- help="Show incremental build detail.",
1156
- )
1157
-
1158
- args = parser.parse_args()
1159
-
1160
- root = Path(args.project_root).resolve()
1161
- if not root.is_dir():
1162
- print(f"[atlas] ERROR: --project-root is not a directory: {root}", file=sys.stderr)
1163
- return 1
1164
-
1165
- atlas_dir = Path(args.output_dir).resolve()
1166
-
1167
- scan_roots: list[Path] | None = None
1168
- if args.scan_roots:
1169
- scan_roots = [Path(sr).resolve() for sr in args.scan_roots]
1170
-
1171
- excludes: list[str] | None = None
1172
- if args.excludes:
1173
- excludes = args.excludes
1174
-
1175
- try:
1176
- result = build_atlas(
1177
- root=root,
1178
- atlas_dir=atlas_dir,
1179
- scan_roots=scan_roots,
1180
- exclude_frags=excludes,
1181
- verbose=args.verbose,
1182
- )
1183
- # Print JSON summary to stdout for Node CLI to parse
1184
- print(f"[atlas:result] {json.dumps(result)}")
1185
- return 0
1186
- except FileNotFoundError as e:
1187
- print(f"[atlas] ERROR: {e}", file=sys.stderr)
1188
- return 2
1189
- except Exception as e:
1190
- print(f"[atlas] ERROR: {e}", file=sys.stderr)
1191
- return 1
1192
-
1193
-
1194
- if __name__ == "__main__":
1195
- sys.exit(main())
1123
+ formatter_class=argparse.RawDescriptionHelpFormatter,
1124
+ )
1125
+ parser.add_argument(
1126
+ "--project-root",
1127
+ required=True,
1128
+ help="Absolute path to the project root to scan.",
1129
+ )
1130
+ parser.add_argument(
1131
+ "--output-dir",
1132
+ required=True,
1133
+ help="Directory to write atlas artifacts into.",
1134
+ )
1135
+ parser.add_argument(
1136
+ "--scan-root",
1137
+ dest="scan_roots",
1138
+ action="append",
1139
+ metavar="PATH",
1140
+ default=None,
1141
+ help="Explicit scan root (repeatable). Defaults to project root.",
1142
+ )
1143
+ parser.add_argument(
1144
+ "--exclude",
1145
+ dest="excludes",
1146
+ action="append",
1147
+ metavar="FRAG",
1148
+ default=None,
1149
+ help="Exclusion path fragment (repeatable). Defaults to standard set.",
1150
+ )
1151
+ parser.add_argument(
1152
+ "--verbose",
1153
+ action="store_true",
1154
+ default=False,
1155
+ help="Show incremental build detail.",
1156
+ )
1157
+
1158
+ args = parser.parse_args()
1159
+
1160
+ root = Path(args.project_root).resolve()
1161
+ if not root.is_dir():
1162
+ print(f"[atlas] ERROR: --project-root is not a directory: {root}", file=sys.stderr)
1163
+ return 1
1164
+
1165
+ atlas_dir = Path(args.output_dir).resolve()
1166
+
1167
+ scan_roots: list[Path] | None = None
1168
+ if args.scan_roots:
1169
+ scan_roots = [Path(sr).resolve() for sr in args.scan_roots]
1170
+
1171
+ excludes: list[str] | None = None
1172
+ if args.excludes:
1173
+ excludes = args.excludes
1174
+
1175
+ try:
1176
+ result = build_atlas(
1177
+ root=root,
1178
+ atlas_dir=atlas_dir,
1179
+ scan_roots=scan_roots,
1180
+ exclude_frags=excludes,
1181
+ verbose=args.verbose,
1182
+ )
1183
+ # Print JSON summary to stdout for Node CLI to parse
1184
+ print(f"[atlas:result] {json.dumps(result)}")
1185
+ return 0
1186
+ except FileNotFoundError as e:
1187
+ print(f"[atlas] ERROR: {e}", file=sys.stderr)
1188
+ return 2
1189
+ except Exception as e:
1190
+ print(f"[atlas] ERROR: {e}", file=sys.stderr)
1191
+ return 1
1192
+
1193
+
1194
+ if __name__ == "__main__":
1195
+ sys.exit(main())