sdtk-wiki-kit 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +186 -13
- package/assets/atlas/build_atlas.py +164 -79
- package/package.json +1 -1
- package/src/commands/help.js +38 -3
- package/src/commands/lint.js +2 -1
- package/src/commands/operations.js +345 -0
- package/src/commands/search.js +89 -0
- package/src/commands/wiki.js +83 -9
- package/src/index.js +35 -1
- package/src/lib/wiki-compile.js +694 -6
- package/src/lib/wiki-extract.js +637 -0
- package/src/lib/wiki-flags.js +8 -0
- package/src/lib/wiki-lint.js +179 -2
- package/src/lib/wiki-search.js +175 -0
package/README.md
CHANGED
|
@@ -28,6 +28,7 @@ Implemented in the Foundation/Beta package:
|
|
|
28
28
|
| Capability | Command |
|
|
29
29
|
|---|---|
|
|
30
30
|
| Initialize SDTK-WIKI workspace | `sdtk-wiki init` |
|
|
31
|
+
| Build a personal-brain without timestamp variables | `sdtk-wiki ingest`, `sdtk-wiki compile --mode safe --apply`, `sdtk-wiki query` |
|
|
31
32
|
| Build graph, viewer, generated pages, and provenance | `sdtk-wiki atlas build` |
|
|
32
33
|
| Open local viewer | `sdtk-wiki atlas open` |
|
|
33
34
|
| Watch markdown sources and rebuild | `sdtk-wiki atlas watch` |
|
|
@@ -36,13 +37,18 @@ Implemented in the Foundation/Beta package:
|
|
|
36
37
|
| Run non-destructive wiki lint | `sdtk-wiki lint` |
|
|
37
38
|
| Run stale-page prune dry-run report | `sdtk-wiki wiki prune --dry-run` |
|
|
38
39
|
| Generate local discovery plan from gap evidence | `sdtk-wiki wiki discover --plan` |
|
|
39
|
-
| Generate
|
|
40
|
-
|
|
|
41
|
-
|
|
|
40
|
+
| Generate local discovery plan with beginner facade | `sdtk-wiki discover --plan` |
|
|
41
|
+
| Run report-first maintenance cycle | `sdtk-wiki maintain --mode safe` |
|
|
42
|
+
| Generate semantic extraction dry-run report | `sdtk-wiki wiki extract --dry-run` |
|
|
43
|
+
| Generate compile dry-run preview and JSON sidecar | `sdtk-wiki wiki compile --dry-run` |
|
|
44
|
+
| Apply an approved compile JSON sidecar | `sdtk-wiki wiki compile --apply --yes` |
|
|
45
|
+
| Search generated personal-brain pages locally | `sdtk-wiki search` |
|
|
46
|
+
| Query generated personal-brain pages locally | `sdtk-wiki query` |
|
|
47
|
+
| Ask grounded questions over built graph | `sdtk-wiki ask` with `wiki.ask` entitlement/runtime preconditions |
|
|
48
|
+
| Save one redacted query record after successful Ask | `sdtk-wiki ask --save-query` with `wiki.ask` entitlement/runtime preconditions |
|
|
42
49
|
|
|
43
50
|
Not implemented in the Foundation/Beta runtime:
|
|
44
51
|
|
|
45
|
-
- `sdtk-wiki wiki compile --apply`
|
|
46
52
|
- automatic web discovery or web fetch
|
|
47
53
|
- automatic source ingest from the web
|
|
48
54
|
- destructive prune/delete/archive
|
|
@@ -60,6 +66,7 @@ Not implemented in the Foundation/Beta runtime:
|
|
|
60
66
|
| `.sdtk/wiki/raw` | metadata-only raw/source registry |
|
|
61
67
|
| `.sdtk/wiki/provenance` | source/build/ingest provenance |
|
|
62
68
|
| `.sdtk/wiki/reports` | lint, prune, discover, and compile preview reports |
|
|
69
|
+
| `.sdtk/wiki/personal-brain` | generated semantic personal-brain pages from explicit apply |
|
|
63
70
|
| `.sdtk/wiki/queries` | opt-in redacted Ask query records |
|
|
64
71
|
| `.sdtk/atlas` | legacy Atlas compatibility output, readable only |
|
|
65
72
|
|
|
@@ -88,6 +95,41 @@ Open the viewer:
|
|
|
88
95
|
sdtk-wiki atlas open --project-path .
|
|
89
96
|
```
|
|
90
97
|
|
|
98
|
+
## Simple Personal-Brain Flow
|
|
99
|
+
|
|
100
|
+
Use this when you want the shortest local second-brain path and do not need to
|
|
101
|
+
inspect timestamped JSON files:
|
|
102
|
+
|
|
103
|
+
```powershell
|
|
104
|
+
sdtk-wiki init --no-open
|
|
105
|
+
sdtk-wiki ingest .\source-md
|
|
106
|
+
sdtk-wiki compile --mode safe --apply
|
|
107
|
+
sdtk-wiki query "multi-agent"
|
|
108
|
+
sdtk-wiki lint
|
|
109
|
+
sdtk-wiki discover --plan
|
|
110
|
+
sdtk-wiki maintain --mode safe
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Behavior:
|
|
114
|
+
|
|
115
|
+
- `ingest` runs local semantic extraction and writes a report.
|
|
116
|
+
- `compile --mode safe --apply` creates or refreshes the compile preview/JSON
|
|
117
|
+
sidecar internally, then applies the sidecar contract.
|
|
118
|
+
- `query` is deterministic local search over `.sdtk/wiki/personal-brain`; it
|
|
119
|
+
does not use premium Ask, LLM/RAG, web fetch, or query history.
|
|
120
|
+
- `discover` and `maintain` are report-first and do not apply, delete, archive,
|
|
121
|
+
fetch web sources, or mutate `.sdtk/atlas`.
|
|
122
|
+
|
|
123
|
+
Interactive viewer flow:
|
|
124
|
+
|
|
125
|
+
```powershell
|
|
126
|
+
sdtk-wiki init
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Bare `sdtk-wiki init` may open/start the viewer and keep the terminal session
|
|
130
|
+
active. Use `sdtk-wiki init --no-open` for automation, CI, and local
|
|
131
|
+
non-interactive validation.
|
|
132
|
+
|
|
91
133
|
## Command Reference
|
|
92
134
|
|
|
93
135
|
### Init
|
|
@@ -128,6 +170,20 @@ Safety:
|
|
|
128
170
|
- no generated page updates
|
|
129
171
|
- no graph/viewer rebuild side effects
|
|
130
172
|
|
|
173
|
+
### Beginner Ingest Operation
|
|
174
|
+
|
|
175
|
+
```powershell
|
|
176
|
+
sdtk-wiki ingest <source-root> [--project-path <path>]
|
|
177
|
+
sdtk-wiki ingest --source-root <source-root> [--project-path <path>]
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Runs semantic extraction over a local Markdown source root and writes the latest
|
|
181
|
+
`semantic-extraction-dry-run-*.json` report under `.sdtk/wiki/reports`.
|
|
182
|
+
|
|
183
|
+
This is the beginner-friendly facade for `sdtk-wiki wiki extract --dry-run`.
|
|
184
|
+
It does not mutate source files, personal-brain pages, raw/provenance state,
|
|
185
|
+
graph outputs, web sources, Ask state, or `.sdtk/atlas`.
|
|
186
|
+
|
|
131
187
|
### Lint
|
|
132
188
|
|
|
133
189
|
```powershell
|
|
@@ -164,14 +220,49 @@ Safety:
|
|
|
164
220
|
- no compile/apply
|
|
165
221
|
- no prune/delete/archive
|
|
166
222
|
|
|
223
|
+
Beginner facade:
|
|
224
|
+
|
|
225
|
+
```powershell
|
|
226
|
+
sdtk-wiki discover --plan [--project-path <path>]
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
This uses the same local-only discovery plan behavior and report-first safety
|
|
230
|
+
boundary.
|
|
231
|
+
|
|
232
|
+
### Semantic Extraction Dry-Run
|
|
233
|
+
|
|
234
|
+
```powershell
|
|
235
|
+
sdtk-wiki wiki extract --project-path <path> --source-root <path> --dry-run
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
Reads local Markdown sources and writes a semantic extraction JSON report under
|
|
239
|
+
`.sdtk/wiki/reports`. The report can identify local source records, GitHub
|
|
240
|
+
tool candidates, concept candidates, relations, comparisons, syntheses, and
|
|
241
|
+
source-quality findings.
|
|
242
|
+
|
|
243
|
+
Safety:
|
|
244
|
+
|
|
245
|
+
- local source roots only
|
|
246
|
+
- no web fetch
|
|
247
|
+
- no page generation
|
|
248
|
+
- no graph/viewer rebuild side effects
|
|
249
|
+
- no raw/provenance mutation
|
|
250
|
+
- no `.sdtk/atlas` mutation
|
|
251
|
+
|
|
167
252
|
### Compile Dry-Run Preview
|
|
168
253
|
|
|
169
254
|
```powershell
|
|
170
255
|
sdtk-wiki wiki compile --plan <path> --project-path <path> --dry-run
|
|
171
256
|
```
|
|
172
257
|
|
|
173
|
-
Reads a local markdown
|
|
174
|
-
|
|
258
|
+
Reads a local structured markdown plan, JSON operation plan, or
|
|
259
|
+
`sdtk_wiki_semantic_extraction` JSON report and writes both:
|
|
260
|
+
|
|
261
|
+
- `.sdtk/wiki/reports/compile-dry-run-preview-YYYY-MM-DD.md`
|
|
262
|
+
- `.sdtk/wiki/reports/compile-apply-plan-YYYY-MM-DD.json`
|
|
263
|
+
|
|
264
|
+
The markdown report is for human review. The JSON sidecar is the only supported
|
|
265
|
+
source of truth for explicit apply.
|
|
175
266
|
|
|
176
267
|
Supported operation types:
|
|
177
268
|
|
|
@@ -180,9 +271,68 @@ Supported operation types:
|
|
|
180
271
|
- `add_relation`
|
|
181
272
|
- `add_source_ref`
|
|
182
273
|
|
|
183
|
-
Unknown operation types are reported as `unsupported_operation`.
|
|
184
|
-
|
|
185
|
-
|
|
274
|
+
Unknown operation types are reported as `unsupported_operation`. Dry-run does
|
|
275
|
+
not modify wiki pages, raw sources, provenance, or `.sdtk/atlas`.
|
|
276
|
+
|
|
277
|
+
### Beginner Compile Operation
|
|
278
|
+
|
|
279
|
+
```powershell
|
|
280
|
+
sdtk-wiki compile --mode safe [--project-path <path>]
|
|
281
|
+
sdtk-wiki compile --mode safe --apply [--project-path <path>]
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
The beginner facade hides timestamped JSON paths:
|
|
285
|
+
|
|
286
|
+
- without `--apply`, it finds the latest semantic extraction report and writes
|
|
287
|
+
the compile preview plus JSON sidecar
|
|
288
|
+
- with `--apply`, it finds the latest sidecar or creates one from the latest
|
|
289
|
+
extraction report, then applies through the same sidecar-only contract
|
|
290
|
+
|
|
291
|
+
`safe` is the only R1 mode. Auto mode, destructive cleanup, web fetch, Ask, raw
|
|
292
|
+
mutation, provenance mutation, source mutation, and `.sdtk/atlas` mutation are
|
|
293
|
+
not performed.
|
|
294
|
+
|
|
295
|
+
### Compile Apply
|
|
296
|
+
|
|
297
|
+
```powershell
|
|
298
|
+
sdtk-wiki wiki compile --plan <compile-apply-plan-json> --project-path <path> --apply --yes
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
Applies only a `record_type: "sdtk_wiki_compile_apply_plan"` JSON sidecar
|
|
302
|
+
generated by compile dry-run. Markdown plans and raw semantic extraction JSON
|
|
303
|
+
are rejected for apply.
|
|
304
|
+
|
|
305
|
+
Apply behavior:
|
|
306
|
+
|
|
307
|
+
- requires `--apply --yes`
|
|
308
|
+
- writes only under `.sdtk/wiki/personal-brain`
|
|
309
|
+
- create-only or same-content no-op
|
|
310
|
+
- no overwrite with different content
|
|
311
|
+
- no delete, archive, rewrite, or reorder
|
|
312
|
+
- no raw/provenance descriptor mutation
|
|
313
|
+
- no `.sdtk/atlas` mutation
|
|
314
|
+
|
|
315
|
+
### Local Search
|
|
316
|
+
|
|
317
|
+
```powershell
|
|
318
|
+
sdtk-wiki search --project-path <path> "<query>"
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
Searches generated personal-brain Markdown pages under
|
|
322
|
+
`.sdtk/wiki/personal-brain/**/*.md`.
|
|
323
|
+
|
|
324
|
+
Search is deterministic, read-only, and non-premium. It does not require
|
|
325
|
+
`wiki.ask` entitlement, does not call an LLM/RAG runtime, does not write query
|
|
326
|
+
history, and does not mutate project files.
|
|
327
|
+
|
|
328
|
+
Beginner query facade:
|
|
329
|
+
|
|
330
|
+
```powershell
|
|
331
|
+
sdtk-wiki query --project-path <path> "<query>"
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
`query` is the simple local search command. It is not premium Ask and does not
|
|
335
|
+
require `wiki.ask`.
|
|
186
336
|
|
|
187
337
|
### Ask
|
|
188
338
|
|
|
@@ -190,7 +340,9 @@ provenance, or `.sdtk/atlas`.
|
|
|
190
340
|
sdtk-wiki ask --question "<text>" [--project-path <path>] [--json] [--source <id-or-path>] [--max-sources <n>] [--save-query]
|
|
191
341
|
```
|
|
192
342
|
|
|
193
|
-
Native `sdtk-wiki ask` is the canonical Q&A command for
|
|
343
|
+
Native `sdtk-wiki ask` is implemented as the canonical Q&A command for
|
|
344
|
+
capability `wiki.ask`, but it is not a free local search command. It requires
|
|
345
|
+
valid `wiki.ask` entitlement and runtime preconditions.
|
|
194
346
|
|
|
195
347
|
Preconditions:
|
|
196
348
|
|
|
@@ -227,8 +379,8 @@ Run report-first maintenance checks:
|
|
|
227
379
|
|
|
228
380
|
```powershell
|
|
229
381
|
sdtk-wiki lint --project-path .
|
|
230
|
-
sdtk-wiki
|
|
231
|
-
sdtk-wiki
|
|
382
|
+
sdtk-wiki discover --plan --project-path .
|
|
383
|
+
sdtk-wiki maintain --mode safe --project-path .
|
|
232
384
|
```
|
|
233
385
|
|
|
234
386
|
Preview a compile plan without applying it:
|
|
@@ -237,6 +389,23 @@ Preview a compile plan without applying it:
|
|
|
237
389
|
sdtk-wiki wiki compile --plan <local-plan.md-or-json> --project-path . --dry-run
|
|
238
390
|
```
|
|
239
391
|
|
|
392
|
+
Build a personal-brain from local Markdown sources and search it:
|
|
393
|
+
|
|
394
|
+
```powershell
|
|
395
|
+
sdtk-wiki ingest docs
|
|
396
|
+
sdtk-wiki compile --mode safe --apply
|
|
397
|
+
sdtk-wiki query "multi-agent"
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
Advanced/audit workflow with explicit report files:
|
|
401
|
+
|
|
402
|
+
```powershell
|
|
403
|
+
sdtk-wiki wiki extract --project-path . --source-root docs --dry-run
|
|
404
|
+
sdtk-wiki wiki compile --project-path . --plan .sdtk/wiki/reports/semantic-extraction-dry-run-<stamp>.json --dry-run
|
|
405
|
+
sdtk-wiki wiki compile --project-path . --plan .sdtk/wiki/reports/compile-apply-plan-<date>.json --apply --yes
|
|
406
|
+
sdtk-wiki search --project-path . "multi-agent"
|
|
407
|
+
```
|
|
408
|
+
|
|
240
409
|
Ask and save an opt-in redacted query record:
|
|
241
410
|
|
|
242
411
|
```powershell
|
|
@@ -244,6 +413,10 @@ sdtk-wiki atlas build --project-path .
|
|
|
244
413
|
sdtk-wiki ask --project-path . --question "Which docs describe the deployment path?" --save-query
|
|
245
414
|
```
|
|
246
415
|
|
|
416
|
+
This flow requires valid `wiki.ask` entitlement/runtime preconditions. Use
|
|
417
|
+
`sdtk-wiki search` for non-premium local validation of generated
|
|
418
|
+
personal-brain pages.
|
|
419
|
+
|
|
247
420
|
## Foundation/Beta Boundaries
|
|
248
421
|
|
|
249
422
|
This release is local-first and report-first. It is a foundation for a
|
|
@@ -252,10 +425,10 @@ second-brain workflow, not a fully autonomous second brain.
|
|
|
252
425
|
Do not claim the Foundation/Beta runtime includes:
|
|
253
426
|
|
|
254
427
|
- web fetch/discover
|
|
255
|
-
- compile `--apply`
|
|
256
428
|
- destructive prune/delete/archive
|
|
257
429
|
- query list/show/delete
|
|
258
430
|
- default full prompt/full answer query persistence
|
|
431
|
+
- premium Ask without valid `wiki.ask` entitlement/runtime preconditions
|
|
259
432
|
- `.sdtk/atlas` as canonical storage
|
|
260
433
|
|
|
261
434
|
See `products/sdtk-wiki/governance/SDTK_WIKI_USAGE_GUIDE.md` for the fuller
|
|
@@ -147,20 +147,48 @@ def _assert_inside(base: Path, target: Path) -> None:
|
|
|
147
147
|
raise ValueError(f"Refusing to write outside SDTK-WIKI workspace: {resolved_target}")
|
|
148
148
|
|
|
149
149
|
|
|
150
|
-
def _is_excluded(
|
|
151
|
-
path: Path,
|
|
152
|
-
root: Path,
|
|
153
|
-
exclude_frags: list[str],
|
|
154
|
-
) -> bool:
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
150
|
+
def _is_excluded(
|
|
151
|
+
path: Path,
|
|
152
|
+
root: Path,
|
|
153
|
+
exclude_frags: list[str],
|
|
154
|
+
) -> bool:
|
|
155
|
+
return _match_exclude(path=path, root=root, exclude_frags=exclude_frags) is not None
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _display_scan_path(path: Path, root: Path) -> str:
|
|
159
|
+
try:
|
|
160
|
+
return path.relative_to(root).as_posix()
|
|
161
|
+
except ValueError:
|
|
162
|
+
return path.as_posix()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _normalise_exclude_fragment(frag: str) -> list[str]:
|
|
166
|
+
norm_frag = frag.replace("\\", "/").strip("/").lower()
|
|
167
|
+
return [part for part in norm_frag.split("/") if part and part != "."]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _match_exclude(
|
|
171
|
+
path: Path,
|
|
172
|
+
root: Path,
|
|
173
|
+
exclude_frags: list[str],
|
|
174
|
+
) -> str | None:
|
|
175
|
+
rel = _display_scan_path(path, root).lower()
|
|
176
|
+
rel_parts = [part for part in rel.split("/") if part and part != "."]
|
|
177
|
+
|
|
178
|
+
for frag in exclude_frags:
|
|
179
|
+
frag_parts = _normalise_exclude_fragment(frag)
|
|
180
|
+
if not frag_parts:
|
|
181
|
+
continue
|
|
182
|
+
if len(frag_parts) == 1:
|
|
183
|
+
if frag_parts[0] in rel_parts:
|
|
184
|
+
return frag
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
for idx in range(0, len(rel_parts) - len(frag_parts) + 1):
|
|
188
|
+
if rel_parts[idx : idx + len(frag_parts)] == frag_parts:
|
|
189
|
+
return frag
|
|
190
|
+
|
|
191
|
+
return None
|
|
164
192
|
|
|
165
193
|
|
|
166
194
|
def _extract_title(text: str) -> str:
|
|
@@ -322,9 +350,9 @@ def _compute_file_hash(md_file: Path) -> str:
|
|
|
322
350
|
return hashlib.sha256(content).hexdigest()
|
|
323
351
|
|
|
324
352
|
|
|
325
|
-
def _parse_doc_record(md_file: Path, root: Path) -> dict[str, Any]:
|
|
326
|
-
rel = md_file
|
|
327
|
-
text = md_file.read_text(encoding="utf-8", errors="replace")
|
|
353
|
+
def _parse_doc_record(md_file: Path, root: Path) -> dict[str, Any]:
|
|
354
|
+
rel = _display_scan_path(md_file, root)
|
|
355
|
+
text = md_file.read_text(encoding="utf-8", errors="replace")
|
|
328
356
|
frontmatter_fields, body_text = _parse_frontmatter(text)
|
|
329
357
|
title = str(
|
|
330
358
|
frontmatter_fields.get("title")
|
|
@@ -363,39 +391,70 @@ def _parse_doc_record(md_file: Path, root: Path) -> dict[str, Any]:
|
|
|
363
391
|
}
|
|
364
392
|
|
|
365
393
|
|
|
366
|
-
def list_indexable_markdown_files(
|
|
367
|
-
root: Path,
|
|
368
|
-
scan_roots: list[Path],
|
|
369
|
-
exclude_frags: list[str],
|
|
370
|
-
) -> list[Path]:
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
394
|
+
def list_indexable_markdown_files(
|
|
395
|
+
root: Path,
|
|
396
|
+
scan_roots: list[Path],
|
|
397
|
+
exclude_frags: list[str],
|
|
398
|
+
) -> list[Path]:
|
|
399
|
+
return collect_indexable_markdown_files(root, scan_roots, exclude_frags)["files"]
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def collect_indexable_markdown_files(
|
|
403
|
+
root: Path,
|
|
404
|
+
scan_roots: list[Path],
|
|
405
|
+
exclude_frags: list[str],
|
|
406
|
+
) -> dict[str, Any]:
|
|
407
|
+
files: list[Path] = []
|
|
408
|
+
seen_paths: set[str] = set()
|
|
409
|
+
skipped_files: list[dict[str, str]] = []
|
|
410
|
+
scanned_count = 0
|
|
411
|
+
|
|
412
|
+
for scan_root in scan_roots:
|
|
413
|
+
if not scan_root.exists():
|
|
414
|
+
print(f"[atlas] Warning: scan root does not exist, skipping: {scan_root}", file=sys.stderr)
|
|
415
|
+
continue
|
|
378
416
|
if scan_root.is_file() and scan_root.suffix.lower() == ".md":
|
|
379
417
|
candidates = [scan_root]
|
|
380
418
|
elif scan_root.is_dir():
|
|
381
419
|
candidates = [p for p in sorted(scan_root.rglob("*.md")) if p.is_file()]
|
|
382
420
|
else:
|
|
383
|
-
candidates = []
|
|
384
|
-
|
|
385
|
-
for md_file in candidates:
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
421
|
+
candidates = []
|
|
422
|
+
|
|
423
|
+
for md_file in candidates:
|
|
424
|
+
scanned_count += 1
|
|
425
|
+
matched_exclude = _match_exclude(md_file, root=root, exclude_frags=exclude_frags)
|
|
426
|
+
display_path = _display_scan_path(md_file, root)
|
|
427
|
+
if matched_exclude is not None:
|
|
428
|
+
skipped_files.append(
|
|
429
|
+
{
|
|
430
|
+
"path": display_path,
|
|
431
|
+
"reason": f"exclude:{matched_exclude}",
|
|
432
|
+
}
|
|
433
|
+
)
|
|
434
|
+
continue
|
|
435
|
+
try:
|
|
436
|
+
rel = md_file.relative_to(root).as_posix()
|
|
437
|
+
except ValueError:
|
|
438
|
+
rel = md_file.as_posix()
|
|
439
|
+
if rel in seen_paths:
|
|
440
|
+
skipped_files.append(
|
|
441
|
+
{
|
|
442
|
+
"path": display_path,
|
|
443
|
+
"reason": "duplicate_scan_root",
|
|
444
|
+
}
|
|
445
|
+
)
|
|
446
|
+
continue
|
|
447
|
+
seen_paths.add(rel)
|
|
448
|
+
files.append(md_file)
|
|
449
|
+
|
|
450
|
+
files.sort(key=lambda p: p.as_posix())
|
|
451
|
+
return {
|
|
452
|
+
"files": files,
|
|
453
|
+
"scanned_count": scanned_count,
|
|
454
|
+
"indexed_count": len(files),
|
|
455
|
+
"skipped_count": len(skipped_files),
|
|
456
|
+
"skipped_files": skipped_files,
|
|
457
|
+
}
|
|
399
458
|
|
|
400
459
|
|
|
401
460
|
# ---------------------------------------------------------------------------
|
|
@@ -639,16 +698,17 @@ def write_wiki_pages_and_provenance(
|
|
|
639
698
|
}
|
|
640
699
|
|
|
641
700
|
|
|
642
|
-
def build_docs_incremental(
|
|
643
|
-
root: Path,
|
|
644
|
-
atlas_dir: Path,
|
|
645
|
-
generated: str,
|
|
646
|
-
scan_roots: list[Path],
|
|
647
|
-
exclude_frags: list[str],
|
|
648
|
-
) -> tuple[list[dict[str, Any]], dict[str, Any], dict[str,
|
|
649
|
-
prior_state = load_atlas_state(atlas_dir)
|
|
650
|
-
prior_documents = prior_state.get("documents", {})
|
|
651
|
-
|
|
701
|
+
def build_docs_incremental(
|
|
702
|
+
root: Path,
|
|
703
|
+
atlas_dir: Path,
|
|
704
|
+
generated: str,
|
|
705
|
+
scan_roots: list[Path],
|
|
706
|
+
exclude_frags: list[str],
|
|
707
|
+
) -> tuple[list[dict[str, Any]], dict[str, Any], dict[str, Any]]:
|
|
708
|
+
prior_state = load_atlas_state(atlas_dir)
|
|
709
|
+
prior_documents = prior_state.get("documents", {})
|
|
710
|
+
scan_result = collect_indexable_markdown_files(root, scan_roots, exclude_frags)
|
|
711
|
+
current_files = scan_result["files"]
|
|
652
712
|
|
|
653
713
|
current_rel_paths = {}
|
|
654
714
|
for md_file in current_files:
|
|
@@ -710,12 +770,16 @@ def build_docs_incremental(
|
|
|
710
770
|
"generated": generated,
|
|
711
771
|
"documents": next_documents,
|
|
712
772
|
}
|
|
713
|
-
build_stats = {
|
|
714
|
-
"discovered_count": len(current_rel_paths),
|
|
715
|
-
"
|
|
716
|
-
"
|
|
717
|
-
"
|
|
718
|
-
|
|
773
|
+
build_stats = {
|
|
774
|
+
"discovered_count": len(current_rel_paths),
|
|
775
|
+
"scanned_count": scan_result["scanned_count"],
|
|
776
|
+
"indexed_count": len(current_rel_paths),
|
|
777
|
+
"skipped_count": scan_result["skipped_count"],
|
|
778
|
+
"skipped_files": scan_result["skipped_files"],
|
|
779
|
+
"reused_count": reused_count,
|
|
780
|
+
"reparsed_count": reparsed_count,
|
|
781
|
+
"removed_count": removed_count,
|
|
782
|
+
}
|
|
719
783
|
return docs, next_state, build_stats
|
|
720
784
|
|
|
721
785
|
|
|
@@ -814,11 +878,11 @@ def build_graph(docs: list[dict[str, Any]]) -> dict[str, Any]:
|
|
|
814
878
|
# ---------------------------------------------------------------------------
|
|
815
879
|
# Summary markdown
|
|
816
880
|
# ---------------------------------------------------------------------------
|
|
817
|
-
def build_summary(
|
|
881
|
+
def build_summary(
|
|
818
882
|
docs: list[dict[str, Any]],
|
|
819
883
|
graph: dict[str, Any],
|
|
820
884
|
generated: str,
|
|
821
|
-
stats: dict[str,
|
|
885
|
+
stats: dict[str, Any] | None,
|
|
822
886
|
root: Path,
|
|
823
887
|
scan_roots: list[Path],
|
|
824
888
|
exclude_frags: list[str],
|
|
@@ -848,16 +912,30 @@ def build_summary(
|
|
|
848
912
|
for fam, cnt in sorted(family_counts.items(), key=lambda x: -x[1]):
|
|
849
913
|
lines.append(f"| {fam} | {cnt} |")
|
|
850
914
|
|
|
851
|
-
if stats is not None:
|
|
852
|
-
lines += [
|
|
853
|
-
"",
|
|
854
|
-
"## Incremental Build",
|
|
855
|
-
"",
|
|
856
|
-
f"Discovered markdown docs: {stats['discovered_count']}",
|
|
857
|
-
f"
|
|
858
|
-
f"
|
|
859
|
-
f"
|
|
860
|
-
|
|
915
|
+
if stats is not None:
|
|
916
|
+
lines += [
|
|
917
|
+
"",
|
|
918
|
+
"## Incremental Build",
|
|
919
|
+
"",
|
|
920
|
+
f"Discovered markdown docs: {stats['discovered_count']}",
|
|
921
|
+
f"Scanned markdown candidates: {stats.get('scanned_count', stats['discovered_count'])}",
|
|
922
|
+
f"Indexed markdown docs: {stats.get('indexed_count', stats['discovered_count'])}",
|
|
923
|
+
f"Skipped markdown docs: {stats.get('skipped_count', 0)}",
|
|
924
|
+
f"Reused cached docs: {stats['reused_count']}",
|
|
925
|
+
f"Reparsed docs: {stats['reparsed_count']}",
|
|
926
|
+
f"Removed stale docs: {stats['removed_count']}",
|
|
927
|
+
]
|
|
928
|
+
skipped_files = stats.get("skipped_files") or []
|
|
929
|
+
if skipped_files:
|
|
930
|
+
lines += [
|
|
931
|
+
"",
|
|
932
|
+
"## Skipped Markdown Files",
|
|
933
|
+
"",
|
|
934
|
+
"| Path | Reason |",
|
|
935
|
+
"|------|--------|",
|
|
936
|
+
]
|
|
937
|
+
for skipped in skipped_files:
|
|
938
|
+
lines.append(f"| {skipped['path']} | {skipped['reason']} |")
|
|
861
939
|
|
|
862
940
|
lines += [
|
|
863
941
|
"",
|
|
@@ -959,12 +1037,19 @@ def build_atlas(
|
|
|
959
1037
|
scan_roots=roots,
|
|
960
1038
|
exclude_frags=frags,
|
|
961
1039
|
)
|
|
962
|
-
print(f"[atlas] Indexed {len(docs)} documents.")
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
1040
|
+
print(f"[atlas] Indexed {len(docs)} documents.")
|
|
1041
|
+
print(
|
|
1042
|
+
f"[atlas] Scan coverage: scanned {stats.get('scanned_count', len(docs))}, "
|
|
1043
|
+
f"indexed {stats.get('indexed_count', len(docs))}, "
|
|
1044
|
+
f"skipped {stats.get('skipped_count', 0)}."
|
|
1045
|
+
)
|
|
1046
|
+
if verbose:
|
|
1047
|
+
print(
|
|
1048
|
+
f"[atlas] Incremental build: reused {stats['reused_count']} cached, "
|
|
1049
|
+
f"reparsed {stats['reparsed_count']}, removed {stats['removed_count']}."
|
|
1050
|
+
)
|
|
1051
|
+
for skipped in stats.get("skipped_files", []):
|
|
1052
|
+
print(f"[atlas] Skipped markdown: {skipped['path']} ({skipped['reason']})")
|
|
968
1053
|
|
|
969
1054
|
print("[atlas] Building graph...")
|
|
970
1055
|
graph = build_graph(docs)
|