agent-wiki-cli 0.3.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. agent_wiki_cli-0.3.28.dist-info/METADATA +425 -0
  2. agent_wiki_cli-0.3.28.dist-info/RECORD +47 -0
  3. agent_wiki_cli-0.3.28.dist-info/WHEEL +5 -0
  4. agent_wiki_cli-0.3.28.dist-info/entry_points.txt +2 -0
  5. agent_wiki_cli-0.3.28.dist-info/licenses/LICENSE +21 -0
  6. agent_wiki_cli-0.3.28.dist-info/top_level.txt +1 -0
  7. llm_wiki_cli/__init__.py +7 -0
  8. llm_wiki_cli/cli.py +231 -0
  9. llm_wiki_cli/commands/__init__.py +1 -0
  10. llm_wiki_cli/commands/bootstrap_cmd.py +1072 -0
  11. llm_wiki_cli/commands/bump_cmd.py +55 -0
  12. llm_wiki_cli/commands/context_cmd.py +427 -0
  13. llm_wiki_cli/commands/extract_cmd.py +745 -0
  14. llm_wiki_cli/commands/generate_prompt_cmd.py +89 -0
  15. llm_wiki_cli/commands/hook_cmd.py +161 -0
  16. llm_wiki_cli/commands/init_cmd.py +92 -0
  17. llm_wiki_cli/commands/lint_cmd.py +294 -0
  18. llm_wiki_cli/commands/migrate_cmd.py +892 -0
  19. llm_wiki_cli/commands/release_cmd.py +163 -0
  20. llm_wiki_cli/commands/status_cmd.py +70 -0
  21. llm_wiki_cli/commands/sync_cmd.py +521 -0
  22. llm_wiki_cli/commands/trigger_cmd.py +205 -0
  23. llm_wiki_cli/commands/uninstall_cmd.py +221 -0
  24. llm_wiki_cli/commands/upgrade_cmd.py +196 -0
  25. llm_wiki_cli/config.py +318 -0
  26. llm_wiki_cli/extractors/__init__.py +46 -0
  27. llm_wiki_cli/extractors/common.py +90 -0
  28. llm_wiki_cli/extractors/go_extractor.py +143 -0
  29. llm_wiki_cli/extractors/go_scripts/go.mod +3 -0
  30. llm_wiki_cli/extractors/go_scripts/main.go +668 -0
  31. llm_wiki_cli/extractors/python_extractor.py +346 -0
  32. llm_wiki_cli/extractors/rust_extractor.py +143 -0
  33. llm_wiki_cli/extractors/rust_scripts/Cargo.lock +110 -0
  34. llm_wiki_cli/extractors/rust_scripts/Cargo.toml +11 -0
  35. llm_wiki_cli/extractors/rust_scripts/src/main.rs +803 -0
  36. llm_wiki_cli/extractors/ts_extractor.py +206 -0
  37. llm_wiki_cli/extractors/ts_scripts/extract.js +485 -0
  38. llm_wiki_cli/extractors/ts_scripts/package.json +10 -0
  39. llm_wiki_cli/services/__init__.py +0 -0
  40. llm_wiki_cli/services/circuit_breaker.py +79 -0
  41. llm_wiki_cli/services/io.py +47 -0
  42. llm_wiki_cli/services/lockfile.py +60 -0
  43. llm_wiki_cli/services/packages.py +173 -0
  44. llm_wiki_cli/services/paths.py +31 -0
  45. llm_wiki_cli/services/schema.py +214 -0
  46. llm_wiki_cli/services/secure_file.py +22 -0
  47. llm_wiki_cli/services/versioning.py +193 -0
@@ -0,0 +1,892 @@
1
+ """Legacy wiki migration command.
2
+
3
+ `llm-wiki migrate` reconciles pages generated by older llm-wiki versions with
4
+ the current collision-aware naming rules. Active canonical pages are
5
+ regenerated from source, old active pages are archived, and previous page
6
+ content is preserved under a Legacy Notes section.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import os
13
+ import re
14
+ import sys
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, timezone
17
+ from pathlib import Path
18
+
19
+ from .bootstrap_cmd import (
20
+ _build_relationships,
21
+ _generate_docker_md,
22
+ _generate_entity_md,
23
+ _generate_index_md,
24
+ _generate_module_md,
25
+ build_entity_page_map,
26
+ build_module_page_map,
27
+ )
28
+ from .extract_cmd import get_docker_inventory, get_inventory_result, print_inventory_failures
29
+ from .sync_cmd import MANIFEST_FILENAME, MANIFEST_VERSION, SyncManifest
30
+ from ..config import DEFAULT_WIKI_DIR, validate_path
31
+ from ..services.io import read_md, write_md
32
+ from ..services.paths import normalize_source_path
33
+
34
+ LEGACY_MARKER = "<!-- llm-wiki-migrate:legacy-notes -->"
35
+ _MANAGED_DIRS = ("entities", "modules", "infrastructure")
36
+ _LINK_RE = re.compile(r"(\[[^\]]+\]\()([^)]+)(\))")
37
+ _HEADING_RE = re.compile(r"^#\s+(.+?)\s*$", re.MULTILINE)
38
+ _LOCATION_RE = re.compile(r"^\*\*Location:\*\*\s*`?(.+?)`?\s*$", re.MULTILINE)
39
+ _PATH_RE = re.compile(r"^\*\*Path:\*\*\s*`?(.+?)`?\s*$", re.MULTILINE)
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class ExistingPage:
44
+ """A currently active wiki page before migration."""
45
+
46
+ kind: str
47
+ path: Path
48
+ rel: str
49
+ stem: str
50
+ content: str
51
+ heading: str | None = None
52
+ location_path: str | None = None
53
+ location_line: int | None = None
54
+ source_path: str | None = None
55
+ archived: bool = False
56
+
57
+
58
+ @dataclass(frozen=True)
59
+ class TargetPage:
60
+ """A canonical page generated from the current source inventory."""
61
+
62
+ kind: str
63
+ stem: str
64
+ rel: str
65
+ content: str
66
+ source_path: str | None = None
67
+ entity_name: str | None = None
68
+ line: int | None = None
69
+
70
+
71
+ @dataclass
72
+ class MigrationPlan:
73
+ """Computed migration operations, shared by apply and dry-run paths."""
74
+
75
+ archive_name: str
76
+ targets: list[TargetPage]
77
+ matches: dict[str, list[ExistingPage]] = field(default_factory=dict)
78
+ unmatched: list[ExistingPage] = field(default_factory=list)
79
+ link_map: dict[str, str] = field(default_factory=dict)
80
+ index_content: str = ""
81
+ manifest: SyncManifest | None = None
82
+
83
+
84
+ @dataclass(frozen=True)
85
+ class MigrationChunk:
86
+ """A bounded subset of currently pending migration work."""
87
+
88
+ number: int
89
+ total: int
90
+ targets: list[TargetPage]
91
+ unmatched: list[ExistingPage]
92
+ include_finalizers: bool = False
93
+
94
+ @property
95
+ def page_operations(self) -> int:
96
+ return len(self.targets) + len(self.unmatched)
97
+
98
+
99
+ def _normalize_source_path(value: str | None, src_dir: str) -> str | None:
100
+ """Normalize extracted markdown source paths to inventory-relative paths."""
101
+ return normalize_source_path(value, src_dir)
102
+
103
+
104
+ def _page_rel(path: Path, wiki_dir: Path) -> str:
105
+ try:
106
+ return path.relative_to(wiki_dir).as_posix()
107
+ except ValueError:
108
+ return path.resolve().relative_to(wiki_dir.resolve()).as_posix()
109
+
110
+
111
+ def _legacy_gitignore_pattern(wiki_dir: Path) -> str:
112
+ """Return the project-root-relative gitignore pattern for legacy archives."""
113
+ cwd = Path.cwd().resolve()
114
+ try:
115
+ rel = wiki_dir.resolve().relative_to(cwd).as_posix()
116
+ except ValueError:
117
+ rel = wiki_dir.as_posix()
118
+ rel = rel.strip("/")
119
+ return f"{rel}/legacy/" if rel else "legacy/"
120
+
121
+
122
+ def _gitignore_has_pattern(content: str, pattern: str) -> bool:
123
+ wanted = pattern.strip().lstrip("/").rstrip("/")
124
+ for raw_line in content.splitlines():
125
+ line = raw_line.strip()
126
+ if not line or line.startswith("#"):
127
+ continue
128
+ if line.lstrip("/").rstrip("/") == wanted:
129
+ return True
130
+ return False
131
+
132
+
133
+ def _legacy_gitignore_needs_write(wiki_dir: Path) -> bool:
134
+ gitignore = Path(".gitignore")
135
+ if not gitignore.exists():
136
+ return True
137
+ return not _gitignore_has_pattern(read_md(gitignore), _legacy_gitignore_pattern(wiki_dir))
138
+
139
+
140
+ def _ensure_legacy_gitignore(wiki_dir: Path, dry_run: bool) -> bool:
141
+ """Ensure migration archives are ignored by git.
142
+
143
+ Returns True when a write was needed, including dry-run previews.
144
+ """
145
+ pattern = _legacy_gitignore_pattern(wiki_dir)
146
+ gitignore = Path(".gitignore")
147
+ content = read_md(gitignore) if gitignore.exists() else ""
148
+ if _gitignore_has_pattern(content, pattern):
149
+ return False
150
+
151
+ print(f" GITIGNORE add {pattern}")
152
+ if dry_run:
153
+ return True
154
+
155
+ addition = f"# LLM Wiki migration archives\n{pattern}\n"
156
+ if content.strip():
157
+ updated = content.rstrip("\n") + "\n\n" + addition
158
+ else:
159
+ updated = addition
160
+ write_md(gitignore, updated)
161
+ return True
162
+
163
+
164
+ def _split_location(value: str) -> tuple[str, int | None]:
165
+ """Split a legacy Location value into path and optional line number."""
166
+ location = value.strip()
167
+ path_part, sep, line_part = location.rpartition(":")
168
+ if sep and line_part.isdigit():
169
+ return path_part, int(line_part)
170
+ return location, None
171
+
172
+
173
+ def _read_existing_page(
174
+ path: Path,
175
+ wiki_dir: Path,
176
+ src_dir: str,
177
+ *,
178
+ archived: bool = False,
179
+ ) -> ExistingPage:
180
+ content = read_md(path)
181
+ rel = _page_rel(path, wiki_dir)
182
+ kind = path.parent.name
183
+
184
+ heading_match = _HEADING_RE.search(content)
185
+ location_match = _LOCATION_RE.search(content)
186
+ path_match = _PATH_RE.search(content)
187
+
188
+ location_path: str | None = None
189
+ location_line: int | None = None
190
+ if location_match:
191
+ raw_location_path, location_line = _split_location(location_match.group(1))
192
+ location_path = _normalize_source_path(raw_location_path, src_dir)
193
+
194
+ source_path = _normalize_source_path(path_match.group(1), src_dir) if path_match else None
195
+
196
+ return ExistingPage(
197
+ kind=kind,
198
+ path=path,
199
+ rel=rel,
200
+ stem=path.stem,
201
+ content=content,
202
+ heading=heading_match.group(1).strip() if heading_match else None,
203
+ location_path=location_path,
204
+ location_line=location_line,
205
+ source_path=source_path,
206
+ archived=archived,
207
+ )
208
+
209
+
210
+ def _active_managed_pages(wiki_dir: Path, src_dir: str) -> list[ExistingPage]:
211
+ pages: list[ExistingPage] = []
212
+ for dirname in _MANAGED_DIRS:
213
+ directory = wiki_dir / dirname
214
+ if not directory.exists():
215
+ continue
216
+ for path in sorted(directory.glob("*.md")):
217
+ if path.is_file():
218
+ pages.append(_read_existing_page(path, wiki_dir, src_dir))
219
+ return pages
220
+
221
+
222
+ def _legacy_archive_roots(wiki_dir: Path) -> list[Path]:
223
+ legacy_dir = wiki_dir / "legacy"
224
+ if not legacy_dir.exists():
225
+ return []
226
+ return sorted(
227
+ (path for path in legacy_dir.glob("migrate-*") if path.is_dir()),
228
+ key=lambda path: path.name,
229
+ reverse=True,
230
+ )
231
+
232
+
233
+ def _archived_managed_pages(wiki_dir: Path, src_dir: str) -> list[ExistingPage]:
234
+ pages: list[ExistingPage] = []
235
+ for archive_root in _legacy_archive_roots(wiki_dir):
236
+ for dirname in _MANAGED_DIRS:
237
+ directory = archive_root / dirname
238
+ if not directory.exists():
239
+ continue
240
+ for path in sorted(directory.glob("*.md")):
241
+ if path.is_file():
242
+ pages.append(
243
+ _read_existing_page(path, archive_root, src_dir, archived=True)
244
+ )
245
+ return pages
246
+
247
+
248
+ def _additional_doc_entries(wiki_dir: Path) -> list[str]:
249
+ ignored_top_level = set(_MANAGED_DIRS) | {"workflows", "legacy"}
250
+ docs: list[str] = []
251
+ if not wiki_dir.exists():
252
+ return docs
253
+
254
+ for path in sorted(wiki_dir.rglob("*.md")):
255
+ if not path.is_file() or _is_legacy_path(path, wiki_dir):
256
+ continue
257
+ rel = _page_rel(path, wiki_dir)
258
+ parts = Path(rel).parts
259
+ if path.name in {"index.md", "log.md"}:
260
+ continue
261
+ if parts and parts[0] in ignored_top_level:
262
+ continue
263
+ docs.append(rel)
264
+ return docs
265
+
266
+
267
+ def _append_additional_docs_index(index_content: str, wiki_dir: Path) -> str:
268
+ docs = _additional_doc_entries(wiki_dir)
269
+ if not docs:
270
+ return index_content
271
+
272
+ lines = [index_content.rstrip(), "", "## Additional Docs", ""]
273
+ for rel in docs:
274
+ label = str(Path(rel).with_suffix("")).replace("\\", "/")
275
+ lines.append(f"- [{label}]({rel})")
276
+ return "\n".join(lines)
277
+
278
+
279
+ def _build_targets(
280
+ wiki_dir: Path,
281
+ src_dir: str,
282
+ inventory: dict,
283
+ docker_inventory: dict,
284
+ ) -> tuple[list[TargetPage], str, SyncManifest]:
285
+ module_page_map = build_module_page_map(inventory)
286
+ entity_page_map = build_entity_page_map(inventory)
287
+ relationships = _build_relationships(inventory, module_page_map)
288
+
289
+ targets: list[TargetPage] = []
290
+ entity_names: list[str] = []
291
+ module_entries: list[dict] = []
292
+ infra_entries: list[dict] = []
293
+
294
+ for filepath, file_data in inventory.items():
295
+ module_page = module_page_map[filepath]
296
+ file_entity_map = {
297
+ cls["name"]: entity_page_map[(cls["name"], filepath)]
298
+ for cls in file_data.get("classes", [])
299
+ }
300
+
301
+ module_entries.append({
302
+ "name": module_page,
303
+ "path": filepath,
304
+ "docstring": file_data.get("module_docstring", ""),
305
+ })
306
+ targets.append(TargetPage(
307
+ kind="modules",
308
+ stem=module_page,
309
+ rel=f"modules/{module_page}.md",
310
+ content=_generate_module_md(filepath, file_data, file_entity_map),
311
+ source_path=filepath,
312
+ ))
313
+
314
+ for cls in file_data.get("classes", []):
315
+ entity_page = entity_page_map[(cls["name"], filepath)]
316
+ entity_names.append(entity_page)
317
+ targets.append(TargetPage(
318
+ kind="entities",
319
+ stem=entity_page,
320
+ rel=f"entities/{entity_page}.md",
321
+ content=_generate_entity_md(cls, filepath, relationships, module_page),
322
+ source_path=filepath,
323
+ entity_name=cls["name"],
324
+ line=cls.get("line"),
325
+ ))
326
+
327
+ for docker_file, docker_info in docker_inventory.items():
328
+ page_name = docker_file.replace("\\", "/").replace("/", "_").replace(".", "_")
329
+ infra_entries.append({"name": page_name, "type": docker_info.get("type", "")})
330
+ targets.append(TargetPage(
331
+ kind="infrastructure",
332
+ stem=page_name,
333
+ rel=f"infrastructure/{page_name}.md",
334
+ content=_generate_docker_md(docker_file, docker_info, module_page_map),
335
+ source_path=docker_file,
336
+ ))
337
+
338
+ workflow_entries = _list_workflows(wiki_dir)
339
+ index_content = _generate_index_md(
340
+ entity_names,
341
+ module_entries,
342
+ workflow_entries or None,
343
+ infra_entries or None,
344
+ )
345
+ manifest = SyncManifest.build_from_inventory(
346
+ inventory,
347
+ src_dir,
348
+ entity_page_map,
349
+ module_page_map,
350
+ )
351
+ return targets, _append_additional_docs_index(index_content, wiki_dir), manifest
352
+
353
+
354
+ def _list_workflows(wiki_dir: Path) -> list[dict]:
355
+ workflow_dir = wiki_dir / "workflows"
356
+ if not workflow_dir.exists():
357
+ return []
358
+ return [{"name": path.stem, "entry": ""} for path in sorted(workflow_dir.glob("*.md"))]
359
+
360
+
361
+ def _unique(values: list[TargetPage]) -> TargetPage | None:
362
+ return values[0] if len(values) == 1 else None
363
+
364
+
365
+ def _build_match_lookups(targets: list[TargetPage]) -> dict[str, dict]:
366
+ lookups: dict[str, dict] = {
367
+ "by_rel": {target.rel: target for target in targets},
368
+ "entities_by_path_name": {},
369
+ "entities_by_name": {},
370
+ "modules_by_source": {},
371
+ "modules_by_source_stem": {},
372
+ "modules_by_page_stem": {},
373
+ "infra_by_source": {},
374
+ "infra_by_page_stem": {},
375
+ }
376
+
377
+ for target in targets:
378
+ if target.kind == "entities" and target.entity_name:
379
+ lookups["entities_by_path_name"].setdefault(
380
+ (target.source_path, target.entity_name), []
381
+ ).append(target)
382
+ lookups["entities_by_name"].setdefault(target.entity_name, []).append(target)
383
+ elif target.kind == "modules":
384
+ lookups["modules_by_source"][target.source_path] = target
385
+ lookups["modules_by_source_stem"].setdefault(
386
+ Path(target.source_path or "").stem, []
387
+ ).append(target)
388
+ lookups["modules_by_page_stem"].setdefault(target.stem, []).append(target)
389
+ elif target.kind == "infrastructure":
390
+ lookups["infra_by_source"][target.source_path] = target
391
+ lookups["infra_by_page_stem"].setdefault(target.stem, []).append(target)
392
+ return lookups
393
+
394
+
395
+ def _match_existing_page(page: ExistingPage, lookups: dict[str, dict]) -> TargetPage | None:
396
+ if page.rel in lookups["by_rel"]:
397
+ return lookups["by_rel"][page.rel]
398
+
399
+ if page.kind == "entities":
400
+ name = page.heading or page.stem.rsplit("_", 1)[-1]
401
+ if page.location_path:
402
+ exact = _unique(
403
+ lookups["entities_by_path_name"].get((page.location_path, name), [])
404
+ )
405
+ if exact:
406
+ return exact
407
+ return _unique(lookups["entities_by_name"].get(name, []))
408
+
409
+ if page.kind == "modules":
410
+ if page.source_path and page.source_path in lookups["modules_by_source"]:
411
+ return lookups["modules_by_source"][page.source_path]
412
+ canonical = _unique(lookups["modules_by_page_stem"].get(page.stem, []))
413
+ if canonical:
414
+ return canonical
415
+ return _unique(lookups["modules_by_source_stem"].get(page.stem, []))
416
+
417
+ if page.kind == "infrastructure":
418
+ if page.source_path and page.source_path in lookups["infra_by_source"]:
419
+ return lookups["infra_by_source"][page.source_path]
420
+ return _unique(lookups["infra_by_page_stem"].get(page.stem, []))
421
+
422
+ return None
423
+
424
+
425
+ def _build_migration_plan(wiki_dir: Path, src_dir: str) -> MigrationPlan:
426
+ inventory_result = get_inventory_result(src_dir, deep=True)
427
+ if inventory_result.failed:
428
+ print_inventory_failures(inventory_result)
429
+ sys.exit(1)
430
+ inventory = inventory_result.inventory
431
+ docker_inventory = get_docker_inventory(src_dir)
432
+ targets, index_content, manifest = _build_targets(
433
+ wiki_dir,
434
+ src_dir,
435
+ inventory,
436
+ docker_inventory,
437
+ )
438
+ lookups = _build_match_lookups(targets)
439
+ target_rels = {target.rel for target in targets}
440
+ plan = MigrationPlan(
441
+ archive_name=f"migrate-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}",
442
+ targets=targets,
443
+ index_content=index_content,
444
+ manifest=manifest,
445
+ )
446
+
447
+ active_pages = _active_managed_pages(wiki_dir, src_dir)
448
+ active_rels = {page.rel for page in active_pages}
449
+
450
+ for page in active_pages:
451
+ target = _match_existing_page(page, lookups)
452
+ if target:
453
+ plan.matches.setdefault(target.rel, []).append(page)
454
+ if page.rel != target.rel:
455
+ plan.link_map[page.rel] = target.rel
456
+ elif page.rel not in target_rels:
457
+ plan.unmatched.append(page)
458
+ plan.link_map.setdefault(page.rel, f"legacy/{plan.archive_name}/{page.rel}")
459
+
460
+ for page in _archived_managed_pages(wiki_dir, src_dir):
461
+ if page.rel in active_rels:
462
+ continue
463
+ target = _match_existing_page(page, lookups)
464
+ if target:
465
+ plan.matches.setdefault(target.rel, []).append(page)
466
+ if page.rel != target.rel:
467
+ plan.link_map.setdefault(page.rel, target.rel)
468
+ else:
469
+ archive_rel = _page_rel(page.path, wiki_dir)
470
+ plan.link_map.setdefault(page.rel, archive_rel)
471
+
472
+ return plan
473
+
474
+
475
+ def _existing_legacy_payload(page: ExistingPage, generated_content: str) -> str:
476
+ content = page.content.strip()
477
+ if not content or content == generated_content.strip():
478
+ return ""
479
+ if LEGACY_MARKER in page.content:
480
+ return page.content.split(LEGACY_MARKER, 1)[1].strip()
481
+ return content
482
+
483
+
484
+ def _split_legacy_sections(payload: str) -> list[str]:
485
+ payload = payload.strip()
486
+ if not payload:
487
+ return []
488
+ if not payload.startswith("### From "):
489
+ return [payload]
490
+ return [
491
+ section.strip()
492
+ for section in re.split(r"(?=^### From )", payload, flags=re.MULTILINE)
493
+ if section.strip()
494
+ ]
495
+
496
+
497
+ def _merge_legacy_notes(target: TargetPage, pages: list[ExistingPage]) -> str:
498
+ sections: list[str] = []
499
+ seen_sections: set[str] = set()
500
+ for page in pages:
501
+ payload = _existing_legacy_payload(page, target.content)
502
+ if not payload:
503
+ continue
504
+ if payload.startswith("### From "):
505
+ candidates = _split_legacy_sections(payload)
506
+ else:
507
+ candidates = [f"### From `{page.rel}`\n\n{payload}"]
508
+ for section in candidates:
509
+ normalized = section.strip()
510
+ if normalized in seen_sections:
511
+ continue
512
+ seen_sections.add(normalized)
513
+ sections.append(normalized)
514
+
515
+ if not sections:
516
+ return target.content
517
+
518
+ return (
519
+ target.content.rstrip()
520
+ + "\n\n## Legacy Notes\n\n"
521
+ + LEGACY_MARKER
522
+ + "\n\n"
523
+ + "\n\n".join(sections).rstrip()
524
+ + "\n"
525
+ )
526
+
527
+
528
+ def _archive_page(page: ExistingPage, wiki_dir: Path, archive_root: Path, dry_run: bool) -> None:
529
+ dest = archive_root / page.rel
530
+ print(f" ARCHIVE {page.rel} -> {_page_rel(dest, wiki_dir)}")
531
+ if dry_run:
532
+ return
533
+ dest.parent.mkdir(parents=True, exist_ok=True)
534
+ write_md(dest, page.content)
535
+
536
+
537
+ def _remove_old_page(page: ExistingPage, dry_run: bool) -> None:
538
+ if dry_run:
539
+ return
540
+ page.path.unlink(missing_ok=True)
541
+
542
+
543
+ def _write_page(wiki_dir: Path, rel: str, content: str, dry_run: bool) -> None:
544
+ path = wiki_dir / rel
545
+ if path.exists() and read_md(path) == content:
546
+ print(f" SKIP unchanged {rel}")
547
+ return
548
+
549
+ print(f" WRITE {rel}")
550
+ if dry_run:
551
+ return
552
+ path.parent.mkdir(parents=True, exist_ok=True)
553
+ write_md(path, content)
554
+
555
+
556
+ def _is_legacy_path(path: Path, wiki_dir: Path) -> bool:
557
+ try:
558
+ return path.relative_to(wiki_dir).parts[:1] == ("legacy",)
559
+ except ValueError:
560
+ try:
561
+ return path.resolve().relative_to(wiki_dir.resolve()).parts[:1] == ("legacy",)
562
+ except ValueError:
563
+ return False
564
+
565
+
566
+ def _active_markdown_pages(wiki_dir: Path) -> list[Path]:
567
+ if not wiki_dir.exists():
568
+ return []
569
+ return [
570
+ path for path in sorted(wiki_dir.rglob("*.md"))
571
+ if path.is_file() and not _is_legacy_path(path, wiki_dir)
572
+ ]
573
+
574
+
575
+ def _rewrite_links_in_content(content: str, page: Path, wiki_dir: Path, link_map: dict[str, str]) -> str:
576
+ if not link_map:
577
+ return content
578
+ wiki_root = wiki_dir.resolve()
579
+
580
+ def replace(match: re.Match[str]) -> str:
581
+ prefix, url, suffix = match.groups()
582
+ if url.startswith(("http://", "https://", "mailto:", "#")):
583
+ return match.group(0)
584
+
585
+ base_url, sep, anchor = url.partition("#")
586
+ if not base_url:
587
+ return match.group(0)
588
+
589
+ try:
590
+ target = (page.parent / base_url).resolve()
591
+ old_rel = target.relative_to(wiki_root).as_posix()
592
+ except ValueError:
593
+ return match.group(0)
594
+
595
+ new_rel = link_map.get(old_rel)
596
+ if not new_rel:
597
+ return match.group(0)
598
+
599
+ try:
600
+ relative = os.path.relpath(wiki_dir / new_rel, start=page.parent).replace(os.sep, "/")
601
+ except ValueError:
602
+ return match.group(0)
603
+ if sep:
604
+ relative += f"#{anchor}"
605
+ return f"{prefix}{relative}{suffix}"
606
+
607
+ return _LINK_RE.sub(replace, content)
608
+
609
+
610
+ def _rewrite_active_links(wiki_dir: Path, link_map: dict[str, str], dry_run: bool) -> int:
611
+ rewritten = 0
612
+ for page in _active_markdown_pages(wiki_dir):
613
+ content = read_md(page)
614
+ updated = _rewrite_links_in_content(content, page, wiki_dir, link_map)
615
+ if updated != content:
616
+ rewritten += 1
617
+ print(f" REWRITE links: {_page_rel(page, wiki_dir)}")
618
+ if not dry_run:
619
+ write_md(page, updated)
620
+ return rewritten
621
+
622
+
623
+ def _should_archive_matched_page(page: ExistingPage, target: TargetPage) -> bool:
624
+ if page.archived:
625
+ return False
626
+ if page.rel != target.rel:
627
+ return True
628
+ if LEGACY_MARKER in page.content:
629
+ return False
630
+ return bool(_existing_legacy_payload(page, target.content))
631
+
632
+
633
+ def _matched_archive_count(plan: MigrationPlan) -> int:
634
+ count = 0
635
+ targets_by_rel = {target.rel: target for target in plan.targets}
636
+ for target_rel, pages in plan.matches.items():
637
+ target = targets_by_rel[target_rel]
638
+ count += sum(1 for page in pages if _should_archive_matched_page(page, target))
639
+ return count
640
+
641
+
642
+ def _target_needs_apply(wiki_dir: Path, target: TargetPage, matched_pages: list[ExistingPage]) -> bool:
643
+ if any(_should_archive_matched_page(page, target) for page in matched_pages):
644
+ return True
645
+ if any(not page.archived and page.rel != target.rel and page.path.exists() for page in matched_pages):
646
+ return True
647
+
648
+ path = wiki_dir / target.rel
649
+ content = _merge_legacy_notes(target, matched_pages)
650
+ return not path.exists() or read_md(path) != content
651
+
652
+
653
+ def _manifest_payload(manifest: SyncManifest) -> str:
654
+ return json.dumps(
655
+ {"version": MANIFEST_VERSION, "sources": manifest.sources},
656
+ indent=2,
657
+ sort_keys=True,
658
+ )
659
+
660
+
661
+ def _manifest_needs_write(wiki_dir: Path, manifest: SyncManifest | None) -> bool:
662
+ if manifest is None:
663
+ return False
664
+ path = wiki_dir / MANIFEST_FILENAME
665
+ return not path.exists() or path.read_text(encoding="utf-8") != _manifest_payload(manifest)
666
+
667
+
668
+ def _pending_link_rewrite_count(wiki_dir: Path, link_map: dict[str, str]) -> int:
669
+ if not link_map:
670
+ return 0
671
+ count = 0
672
+ for page in _active_markdown_pages(wiki_dir):
673
+ content = read_md(page)
674
+ if _rewrite_links_in_content(content, page, wiki_dir, link_map) != content:
675
+ count += 1
676
+ return count
677
+
678
+
679
+ def _finalizers_pending(wiki_dir: Path, plan: MigrationPlan) -> bool:
680
+ index_path = wiki_dir / "index.md"
681
+ index_pending = not index_path.exists() or read_md(index_path) != plan.index_content
682
+ return (
683
+ index_pending
684
+ or _manifest_needs_write(wiki_dir, plan.manifest)
685
+ or _pending_link_rewrite_count(wiki_dir, plan.link_map) > 0
686
+ or (_legacy_archive_ignore_applicable(wiki_dir, plan) and _legacy_gitignore_needs_write(wiki_dir))
687
+ )
688
+
689
+
690
+ def _pending_targets(wiki_dir: Path, plan: MigrationPlan) -> list[TargetPage]:
691
+ return [
692
+ target
693
+ for target in plan.targets
694
+ if _target_needs_apply(wiki_dir, target, plan.matches.get(target.rel, []))
695
+ ]
696
+
697
+
698
+ def _build_chunks(wiki_dir: Path, plan: MigrationPlan, chunk_size: int) -> list[MigrationChunk]:
699
+ if chunk_size < 1:
700
+ raise ValueError("--chunk-size must be greater than zero")
701
+
702
+ units: list[tuple[str, TargetPage | ExistingPage]] = [
703
+ ("target", target) for target in _pending_targets(wiki_dir, plan)
704
+ ]
705
+ units.extend(("unmatched", page) for page in plan.unmatched if page.path.exists())
706
+
707
+ finalizers_pending = _finalizers_pending(wiki_dir, plan)
708
+ if not units:
709
+ if not finalizers_pending:
710
+ return []
711
+ return [MigrationChunk(1, 1, [], [], include_finalizers=True)]
712
+
713
+ total = (len(units) + chunk_size - 1) // chunk_size
714
+ chunks: list[MigrationChunk] = []
715
+ for index in range(total):
716
+ page_units = units[index * chunk_size:(index + 1) * chunk_size]
717
+ targets = [unit for kind, unit in page_units if kind == "target"]
718
+ unmatched = [unit for kind, unit in page_units if kind == "unmatched"]
719
+ chunks.append(MigrationChunk(
720
+ number=index + 1,
721
+ total=total,
722
+ targets=targets,
723
+ unmatched=unmatched,
724
+ include_finalizers=finalizers_pending and index == total - 1,
725
+ ))
726
+ return chunks
727
+
728
+
729
+ def _chunk_link_map(plan: MigrationPlan, chunk: MigrationChunk) -> dict[str, str]:
730
+ if chunk.include_finalizers:
731
+ return plan.link_map
732
+
733
+ rels: set[str] = {page.rel for page in chunk.unmatched}
734
+ for target in chunk.targets:
735
+ rels.update(page.rel for page in plan.matches.get(target.rel, []))
736
+ return {
737
+ old_rel: new_rel
738
+ for old_rel, new_rel in plan.link_map.items()
739
+ if old_rel in rels
740
+ }
741
+
742
+
743
+ def _planned_archive_count(plan: MigrationPlan) -> int:
744
+ return _matched_archive_count(plan) + len(plan.unmatched)
745
+
746
+
747
+ def _legacy_archive_ignore_applicable(wiki_dir: Path, plan: MigrationPlan) -> bool:
748
+ return bool(_legacy_archive_roots(wiki_dir)) or _planned_archive_count(plan) > 0
749
+
750
+
751
+ def _chunk_has_archive_work(plan: MigrationPlan, chunk: MigrationChunk) -> bool:
752
+ if chunk.unmatched:
753
+ return True
754
+ for target in chunk.targets:
755
+ if any(
756
+ _should_archive_matched_page(page, target)
757
+ for page in plan.matches.get(target.rel, [])
758
+ ):
759
+ return True
760
+ return False
761
+
762
+
763
+ def _print_chunk_plan(chunks: list[MigrationChunk], chunk_size: int) -> None:
764
+ print(f"\nMigration chunk plan (max {chunk_size} pending page operation(s) per chunk):")
765
+ if not chunks:
766
+ print(" No pending migration changes.")
767
+ return
768
+
769
+ for chunk in chunks:
770
+ finalizers = " + final index/link/manifest refresh" if chunk.include_finalizers else ""
771
+ print(
772
+ f" {chunk.number}/{chunk.total}: "
773
+ f"{len(chunk.targets)} canonical page(s), "
774
+ f"{len(chunk.unmatched)} unmatched archive(s)"
775
+ f"{finalizers}"
776
+ )
777
+
778
+
779
+ def _apply_chunk(wiki_dir: Path, plan: MigrationPlan, chunk: MigrationChunk, dry_run: bool) -> None:
780
+ archive_root = wiki_dir / "legacy" / plan.archive_name
781
+ if _chunk_has_archive_work(plan, chunk) or (
782
+ chunk.include_finalizers and _legacy_archive_ignore_applicable(wiki_dir, plan)
783
+ ):
784
+ _ensure_legacy_gitignore(wiki_dir, dry_run)
785
+
786
+ for target in chunk.targets:
787
+ matched_pages = plan.matches.get(target.rel, [])
788
+ for page in matched_pages:
789
+ if not _should_archive_matched_page(page, target):
790
+ continue
791
+ _archive_page(page, wiki_dir, archive_root, dry_run)
792
+ if page.rel != target.rel:
793
+ _remove_old_page(page, dry_run)
794
+ _write_page(wiki_dir, target.rel, _merge_legacy_notes(target, matched_pages), dry_run)
795
+
796
+ for page in chunk.unmatched:
797
+ _archive_page(page, wiki_dir, archive_root, dry_run)
798
+ _remove_old_page(page, dry_run)
799
+ print(f" UNMATCHED archived: {page.rel}")
800
+
801
+ if chunk.include_finalizers:
802
+ _write_page(wiki_dir, "index.md", plan.index_content, dry_run)
803
+ else:
804
+ print(" DEFER index/manifest refresh until the final chunk")
805
+
806
+ link_map = _chunk_link_map(plan, chunk)
807
+ rewritten = _rewrite_active_links(wiki_dir, link_map, dry_run)
808
+ if link_map:
809
+ print(f" Link mappings: {len(link_map)} path(s); pages rewritten: {rewritten}")
810
+
811
+ if chunk.include_finalizers and not dry_run and plan.manifest is not None:
812
+ plan.manifest.save(wiki_dir)
813
+ print(f" WRITE {wiki_dir / '.llm-wiki-manifest.json'}")
814
+ elif chunk.include_finalizers and dry_run:
815
+ print(" DRY-RUN: manifest refresh skipped")
816
+
817
+
818
+ def _apply_plan(wiki_dir: Path, plan: MigrationPlan, dry_run: bool) -> None:
819
+ chunk = MigrationChunk(
820
+ number=1,
821
+ total=1,
822
+ targets=plan.targets,
823
+ unmatched=plan.unmatched,
824
+ include_finalizers=True,
825
+ )
826
+ _apply_chunk(wiki_dir, plan, chunk, dry_run)
827
+
828
+
829
+ def run(args) -> None:
830
+ src_dir = getattr(args, "src_dir", ".")
831
+ wiki_dir = Path(getattr(args, "wiki_dir", DEFAULT_WIKI_DIR))
832
+ dry_run = getattr(args, "dry_run", False)
833
+ chunk_size = getattr(args, "chunk_size", None)
834
+ chunk_number = getattr(args, "chunk", None)
835
+ plan_chunks = getattr(args, "plan_chunks", False)
836
+ validate_path(src_dir, "--src-dir")
837
+ validate_path(str(wiki_dir), "--wiki-dir")
838
+
839
+ if chunk_size is None and (chunk_number is not None or plan_chunks):
840
+ print("Error: --chunk and --plan-chunks require --chunk-size.", file=sys.stderr)
841
+ sys.exit(1)
842
+
843
+ if not wiki_dir.exists():
844
+ print(f"Error: Directory {wiki_dir} does not exist.", file=sys.stderr)
845
+ sys.exit(1)
846
+
847
+ print(f"{'Planning' if dry_run else 'Migrating'} wiki at: {wiki_dir}")
848
+ print(f"Source directory: {src_dir}")
849
+
850
+ plan = _build_migration_plan(wiki_dir, src_dir)
851
+ if dry_run:
852
+ print("DRY-RUN: no files will be modified.")
853
+
854
+ if chunk_size is not None:
855
+ chunks = _build_chunks(wiki_dir, plan, chunk_size)
856
+ _print_chunk_plan(chunks, chunk_size)
857
+
858
+ if plan_chunks or (dry_run and chunk_number is None):
859
+ print("PLAN: no files modified.")
860
+ return
861
+
862
+ if not chunks:
863
+ print("\nNo pending migration changes.")
864
+ return
865
+
866
+ selected_number = chunk_number or 1
867
+ if selected_number < 1 or selected_number > len(chunks):
868
+ print(
869
+ f"Error: --chunk must be between 1 and {len(chunks)} for the current plan.",
870
+ file=sys.stderr,
871
+ )
872
+ sys.exit(1)
873
+
874
+ chunk = chunks[selected_number - 1]
875
+ print(f"\nApplying migration chunk {chunk.number}/{chunk.total}:")
876
+ _apply_chunk(wiki_dir, plan, chunk, dry_run)
877
+ print(
878
+ "\nMigration chunk complete: "
879
+ f"{chunk.page_operations} page operation(s)."
880
+ )
881
+ if not chunk.include_finalizers:
882
+ print("Run chunked migrate again after reviewing or committing this chunk.")
883
+ return
884
+
885
+ _apply_plan(wiki_dir, plan, dry_run)
886
+
887
+ print(
888
+ "\nMigration complete: "
889
+ f"{len(plan.targets)} canonical page(s), "
890
+ f"{_matched_archive_count(plan)} archived source page(s), "
891
+ f"{len(plan.unmatched)} unmatched archived page(s)."
892
+ )