agent-wiki-cli 0.3.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_wiki_cli-0.3.28.dist-info/METADATA +425 -0
- agent_wiki_cli-0.3.28.dist-info/RECORD +47 -0
- agent_wiki_cli-0.3.28.dist-info/WHEEL +5 -0
- agent_wiki_cli-0.3.28.dist-info/entry_points.txt +2 -0
- agent_wiki_cli-0.3.28.dist-info/licenses/LICENSE +21 -0
- agent_wiki_cli-0.3.28.dist-info/top_level.txt +1 -0
- llm_wiki_cli/__init__.py +7 -0
- llm_wiki_cli/cli.py +231 -0
- llm_wiki_cli/commands/__init__.py +1 -0
- llm_wiki_cli/commands/bootstrap_cmd.py +1072 -0
- llm_wiki_cli/commands/bump_cmd.py +55 -0
- llm_wiki_cli/commands/context_cmd.py +427 -0
- llm_wiki_cli/commands/extract_cmd.py +745 -0
- llm_wiki_cli/commands/generate_prompt_cmd.py +89 -0
- llm_wiki_cli/commands/hook_cmd.py +161 -0
- llm_wiki_cli/commands/init_cmd.py +92 -0
- llm_wiki_cli/commands/lint_cmd.py +294 -0
- llm_wiki_cli/commands/migrate_cmd.py +892 -0
- llm_wiki_cli/commands/release_cmd.py +163 -0
- llm_wiki_cli/commands/status_cmd.py +70 -0
- llm_wiki_cli/commands/sync_cmd.py +521 -0
- llm_wiki_cli/commands/trigger_cmd.py +205 -0
- llm_wiki_cli/commands/uninstall_cmd.py +221 -0
- llm_wiki_cli/commands/upgrade_cmd.py +196 -0
- llm_wiki_cli/config.py +318 -0
- llm_wiki_cli/extractors/__init__.py +46 -0
- llm_wiki_cli/extractors/common.py +90 -0
- llm_wiki_cli/extractors/go_extractor.py +143 -0
- llm_wiki_cli/extractors/go_scripts/go.mod +3 -0
- llm_wiki_cli/extractors/go_scripts/main.go +668 -0
- llm_wiki_cli/extractors/python_extractor.py +346 -0
- llm_wiki_cli/extractors/rust_extractor.py +143 -0
- llm_wiki_cli/extractors/rust_scripts/Cargo.lock +110 -0
- llm_wiki_cli/extractors/rust_scripts/Cargo.toml +11 -0
- llm_wiki_cli/extractors/rust_scripts/src/main.rs +803 -0
- llm_wiki_cli/extractors/ts_extractor.py +206 -0
- llm_wiki_cli/extractors/ts_scripts/extract.js +485 -0
- llm_wiki_cli/extractors/ts_scripts/package.json +10 -0
- llm_wiki_cli/services/__init__.py +0 -0
- llm_wiki_cli/services/circuit_breaker.py +79 -0
- llm_wiki_cli/services/io.py +47 -0
- llm_wiki_cli/services/lockfile.py +60 -0
- llm_wiki_cli/services/packages.py +173 -0
- llm_wiki_cli/services/paths.py +31 -0
- llm_wiki_cli/services/schema.py +214 -0
- llm_wiki_cli/services/secure_file.py +22 -0
- llm_wiki_cli/services/versioning.py +193 -0
|
@@ -0,0 +1,892 @@
|
|
|
1
|
+
"""Legacy wiki migration command.
|
|
2
|
+
|
|
3
|
+
`llm-wiki migrate` reconciles pages generated by older llm-wiki versions with
|
|
4
|
+
the current collision-aware naming rules. Active canonical pages are
|
|
5
|
+
regenerated from source, old active pages are archived, and previous page
|
|
6
|
+
content is preserved under a Legacy Notes section.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import sys
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from .bootstrap_cmd import (
|
|
20
|
+
_build_relationships,
|
|
21
|
+
_generate_docker_md,
|
|
22
|
+
_generate_entity_md,
|
|
23
|
+
_generate_index_md,
|
|
24
|
+
_generate_module_md,
|
|
25
|
+
build_entity_page_map,
|
|
26
|
+
build_module_page_map,
|
|
27
|
+
)
|
|
28
|
+
from .extract_cmd import get_docker_inventory, get_inventory_result, print_inventory_failures
|
|
29
|
+
from .sync_cmd import MANIFEST_FILENAME, MANIFEST_VERSION, SyncManifest
|
|
30
|
+
from ..config import DEFAULT_WIKI_DIR, validate_path
|
|
31
|
+
from ..services.io import read_md, write_md
|
|
32
|
+
from ..services.paths import normalize_source_path
|
|
33
|
+
|
|
34
|
+
LEGACY_MARKER = "<!-- llm-wiki-migrate:legacy-notes -->"
|
|
35
|
+
_MANAGED_DIRS = ("entities", "modules", "infrastructure")
|
|
36
|
+
_LINK_RE = re.compile(r"(\[[^\]]+\]\()([^)]+)(\))")
|
|
37
|
+
_HEADING_RE = re.compile(r"^#\s+(.+?)\s*$", re.MULTILINE)
|
|
38
|
+
_LOCATION_RE = re.compile(r"^\*\*Location:\*\*\s*`?(.+?)`?\s*$", re.MULTILINE)
|
|
39
|
+
_PATH_RE = re.compile(r"^\*\*Path:\*\*\s*`?(.+?)`?\s*$", re.MULTILINE)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True)
|
|
43
|
+
class ExistingPage:
|
|
44
|
+
"""A currently active wiki page before migration."""
|
|
45
|
+
|
|
46
|
+
kind: str
|
|
47
|
+
path: Path
|
|
48
|
+
rel: str
|
|
49
|
+
stem: str
|
|
50
|
+
content: str
|
|
51
|
+
heading: str | None = None
|
|
52
|
+
location_path: str | None = None
|
|
53
|
+
location_line: int | None = None
|
|
54
|
+
source_path: str | None = None
|
|
55
|
+
archived: bool = False
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(frozen=True)
|
|
59
|
+
class TargetPage:
|
|
60
|
+
"""A canonical page generated from the current source inventory."""
|
|
61
|
+
|
|
62
|
+
kind: str
|
|
63
|
+
stem: str
|
|
64
|
+
rel: str
|
|
65
|
+
content: str
|
|
66
|
+
source_path: str | None = None
|
|
67
|
+
entity_name: str | None = None
|
|
68
|
+
line: int | None = None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class MigrationPlan:
|
|
73
|
+
"""Computed migration operations, shared by apply and dry-run paths."""
|
|
74
|
+
|
|
75
|
+
archive_name: str
|
|
76
|
+
targets: list[TargetPage]
|
|
77
|
+
matches: dict[str, list[ExistingPage]] = field(default_factory=dict)
|
|
78
|
+
unmatched: list[ExistingPage] = field(default_factory=list)
|
|
79
|
+
link_map: dict[str, str] = field(default_factory=dict)
|
|
80
|
+
index_content: str = ""
|
|
81
|
+
manifest: SyncManifest | None = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass(frozen=True)
|
|
85
|
+
class MigrationChunk:
|
|
86
|
+
"""A bounded subset of currently pending migration work."""
|
|
87
|
+
|
|
88
|
+
number: int
|
|
89
|
+
total: int
|
|
90
|
+
targets: list[TargetPage]
|
|
91
|
+
unmatched: list[ExistingPage]
|
|
92
|
+
include_finalizers: bool = False
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def page_operations(self) -> int:
|
|
96
|
+
return len(self.targets) + len(self.unmatched)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _normalize_source_path(value: str | None, src_dir: str) -> str | None:
|
|
100
|
+
"""Normalize extracted markdown source paths to inventory-relative paths."""
|
|
101
|
+
return normalize_source_path(value, src_dir)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _page_rel(path: Path, wiki_dir: Path) -> str:
|
|
105
|
+
try:
|
|
106
|
+
return path.relative_to(wiki_dir).as_posix()
|
|
107
|
+
except ValueError:
|
|
108
|
+
return path.resolve().relative_to(wiki_dir.resolve()).as_posix()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _legacy_gitignore_pattern(wiki_dir: Path) -> str:
|
|
112
|
+
"""Return the project-root-relative gitignore pattern for legacy archives."""
|
|
113
|
+
cwd = Path.cwd().resolve()
|
|
114
|
+
try:
|
|
115
|
+
rel = wiki_dir.resolve().relative_to(cwd).as_posix()
|
|
116
|
+
except ValueError:
|
|
117
|
+
rel = wiki_dir.as_posix()
|
|
118
|
+
rel = rel.strip("/")
|
|
119
|
+
return f"{rel}/legacy/" if rel else "legacy/"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _gitignore_has_pattern(content: str, pattern: str) -> bool:
|
|
123
|
+
wanted = pattern.strip().lstrip("/").rstrip("/")
|
|
124
|
+
for raw_line in content.splitlines():
|
|
125
|
+
line = raw_line.strip()
|
|
126
|
+
if not line or line.startswith("#"):
|
|
127
|
+
continue
|
|
128
|
+
if line.lstrip("/").rstrip("/") == wanted:
|
|
129
|
+
return True
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _legacy_gitignore_needs_write(wiki_dir: Path) -> bool:
|
|
134
|
+
gitignore = Path(".gitignore")
|
|
135
|
+
if not gitignore.exists():
|
|
136
|
+
return True
|
|
137
|
+
return not _gitignore_has_pattern(read_md(gitignore), _legacy_gitignore_pattern(wiki_dir))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _ensure_legacy_gitignore(wiki_dir: Path, dry_run: bool) -> bool:
|
|
141
|
+
"""Ensure migration archives are ignored by git.
|
|
142
|
+
|
|
143
|
+
Returns True when a write was needed, including dry-run previews.
|
|
144
|
+
"""
|
|
145
|
+
pattern = _legacy_gitignore_pattern(wiki_dir)
|
|
146
|
+
gitignore = Path(".gitignore")
|
|
147
|
+
content = read_md(gitignore) if gitignore.exists() else ""
|
|
148
|
+
if _gitignore_has_pattern(content, pattern):
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
print(f" GITIGNORE add {pattern}")
|
|
152
|
+
if dry_run:
|
|
153
|
+
return True
|
|
154
|
+
|
|
155
|
+
addition = f"# LLM Wiki migration archives\n{pattern}\n"
|
|
156
|
+
if content.strip():
|
|
157
|
+
updated = content.rstrip("\n") + "\n\n" + addition
|
|
158
|
+
else:
|
|
159
|
+
updated = addition
|
|
160
|
+
write_md(gitignore, updated)
|
|
161
|
+
return True
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _split_location(value: str) -> tuple[str, int | None]:
|
|
165
|
+
"""Split a legacy Location value into path and optional line number."""
|
|
166
|
+
location = value.strip()
|
|
167
|
+
path_part, sep, line_part = location.rpartition(":")
|
|
168
|
+
if sep and line_part.isdigit():
|
|
169
|
+
return path_part, int(line_part)
|
|
170
|
+
return location, None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _read_existing_page(
|
|
174
|
+
path: Path,
|
|
175
|
+
wiki_dir: Path,
|
|
176
|
+
src_dir: str,
|
|
177
|
+
*,
|
|
178
|
+
archived: bool = False,
|
|
179
|
+
) -> ExistingPage:
|
|
180
|
+
content = read_md(path)
|
|
181
|
+
rel = _page_rel(path, wiki_dir)
|
|
182
|
+
kind = path.parent.name
|
|
183
|
+
|
|
184
|
+
heading_match = _HEADING_RE.search(content)
|
|
185
|
+
location_match = _LOCATION_RE.search(content)
|
|
186
|
+
path_match = _PATH_RE.search(content)
|
|
187
|
+
|
|
188
|
+
location_path: str | None = None
|
|
189
|
+
location_line: int | None = None
|
|
190
|
+
if location_match:
|
|
191
|
+
raw_location_path, location_line = _split_location(location_match.group(1))
|
|
192
|
+
location_path = _normalize_source_path(raw_location_path, src_dir)
|
|
193
|
+
|
|
194
|
+
source_path = _normalize_source_path(path_match.group(1), src_dir) if path_match else None
|
|
195
|
+
|
|
196
|
+
return ExistingPage(
|
|
197
|
+
kind=kind,
|
|
198
|
+
path=path,
|
|
199
|
+
rel=rel,
|
|
200
|
+
stem=path.stem,
|
|
201
|
+
content=content,
|
|
202
|
+
heading=heading_match.group(1).strip() if heading_match else None,
|
|
203
|
+
location_path=location_path,
|
|
204
|
+
location_line=location_line,
|
|
205
|
+
source_path=source_path,
|
|
206
|
+
archived=archived,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _active_managed_pages(wiki_dir: Path, src_dir: str) -> list[ExistingPage]:
|
|
211
|
+
pages: list[ExistingPage] = []
|
|
212
|
+
for dirname in _MANAGED_DIRS:
|
|
213
|
+
directory = wiki_dir / dirname
|
|
214
|
+
if not directory.exists():
|
|
215
|
+
continue
|
|
216
|
+
for path in sorted(directory.glob("*.md")):
|
|
217
|
+
if path.is_file():
|
|
218
|
+
pages.append(_read_existing_page(path, wiki_dir, src_dir))
|
|
219
|
+
return pages
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _legacy_archive_roots(wiki_dir: Path) -> list[Path]:
|
|
223
|
+
legacy_dir = wiki_dir / "legacy"
|
|
224
|
+
if not legacy_dir.exists():
|
|
225
|
+
return []
|
|
226
|
+
return sorted(
|
|
227
|
+
(path for path in legacy_dir.glob("migrate-*") if path.is_dir()),
|
|
228
|
+
key=lambda path: path.name,
|
|
229
|
+
reverse=True,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _archived_managed_pages(wiki_dir: Path, src_dir: str) -> list[ExistingPage]:
|
|
234
|
+
pages: list[ExistingPage] = []
|
|
235
|
+
for archive_root in _legacy_archive_roots(wiki_dir):
|
|
236
|
+
for dirname in _MANAGED_DIRS:
|
|
237
|
+
directory = archive_root / dirname
|
|
238
|
+
if not directory.exists():
|
|
239
|
+
continue
|
|
240
|
+
for path in sorted(directory.glob("*.md")):
|
|
241
|
+
if path.is_file():
|
|
242
|
+
pages.append(
|
|
243
|
+
_read_existing_page(path, archive_root, src_dir, archived=True)
|
|
244
|
+
)
|
|
245
|
+
return pages
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _additional_doc_entries(wiki_dir: Path) -> list[str]:
|
|
249
|
+
ignored_top_level = set(_MANAGED_DIRS) | {"workflows", "legacy"}
|
|
250
|
+
docs: list[str] = []
|
|
251
|
+
if not wiki_dir.exists():
|
|
252
|
+
return docs
|
|
253
|
+
|
|
254
|
+
for path in sorted(wiki_dir.rglob("*.md")):
|
|
255
|
+
if not path.is_file() or _is_legacy_path(path, wiki_dir):
|
|
256
|
+
continue
|
|
257
|
+
rel = _page_rel(path, wiki_dir)
|
|
258
|
+
parts = Path(rel).parts
|
|
259
|
+
if path.name in {"index.md", "log.md"}:
|
|
260
|
+
continue
|
|
261
|
+
if parts and parts[0] in ignored_top_level:
|
|
262
|
+
continue
|
|
263
|
+
docs.append(rel)
|
|
264
|
+
return docs
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _append_additional_docs_index(index_content: str, wiki_dir: Path) -> str:
|
|
268
|
+
docs = _additional_doc_entries(wiki_dir)
|
|
269
|
+
if not docs:
|
|
270
|
+
return index_content
|
|
271
|
+
|
|
272
|
+
lines = [index_content.rstrip(), "", "## Additional Docs", ""]
|
|
273
|
+
for rel in docs:
|
|
274
|
+
label = str(Path(rel).with_suffix("")).replace("\\", "/")
|
|
275
|
+
lines.append(f"- [{label}]({rel})")
|
|
276
|
+
return "\n".join(lines)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _build_targets(
|
|
280
|
+
wiki_dir: Path,
|
|
281
|
+
src_dir: str,
|
|
282
|
+
inventory: dict,
|
|
283
|
+
docker_inventory: dict,
|
|
284
|
+
) -> tuple[list[TargetPage], str, SyncManifest]:
|
|
285
|
+
module_page_map = build_module_page_map(inventory)
|
|
286
|
+
entity_page_map = build_entity_page_map(inventory)
|
|
287
|
+
relationships = _build_relationships(inventory, module_page_map)
|
|
288
|
+
|
|
289
|
+
targets: list[TargetPage] = []
|
|
290
|
+
entity_names: list[str] = []
|
|
291
|
+
module_entries: list[dict] = []
|
|
292
|
+
infra_entries: list[dict] = []
|
|
293
|
+
|
|
294
|
+
for filepath, file_data in inventory.items():
|
|
295
|
+
module_page = module_page_map[filepath]
|
|
296
|
+
file_entity_map = {
|
|
297
|
+
cls["name"]: entity_page_map[(cls["name"], filepath)]
|
|
298
|
+
for cls in file_data.get("classes", [])
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
module_entries.append({
|
|
302
|
+
"name": module_page,
|
|
303
|
+
"path": filepath,
|
|
304
|
+
"docstring": file_data.get("module_docstring", ""),
|
|
305
|
+
})
|
|
306
|
+
targets.append(TargetPage(
|
|
307
|
+
kind="modules",
|
|
308
|
+
stem=module_page,
|
|
309
|
+
rel=f"modules/{module_page}.md",
|
|
310
|
+
content=_generate_module_md(filepath, file_data, file_entity_map),
|
|
311
|
+
source_path=filepath,
|
|
312
|
+
))
|
|
313
|
+
|
|
314
|
+
for cls in file_data.get("classes", []):
|
|
315
|
+
entity_page = entity_page_map[(cls["name"], filepath)]
|
|
316
|
+
entity_names.append(entity_page)
|
|
317
|
+
targets.append(TargetPage(
|
|
318
|
+
kind="entities",
|
|
319
|
+
stem=entity_page,
|
|
320
|
+
rel=f"entities/{entity_page}.md",
|
|
321
|
+
content=_generate_entity_md(cls, filepath, relationships, module_page),
|
|
322
|
+
source_path=filepath,
|
|
323
|
+
entity_name=cls["name"],
|
|
324
|
+
line=cls.get("line"),
|
|
325
|
+
))
|
|
326
|
+
|
|
327
|
+
for docker_file, docker_info in docker_inventory.items():
|
|
328
|
+
page_name = docker_file.replace("\\", "/").replace("/", "_").replace(".", "_")
|
|
329
|
+
infra_entries.append({"name": page_name, "type": docker_info.get("type", "")})
|
|
330
|
+
targets.append(TargetPage(
|
|
331
|
+
kind="infrastructure",
|
|
332
|
+
stem=page_name,
|
|
333
|
+
rel=f"infrastructure/{page_name}.md",
|
|
334
|
+
content=_generate_docker_md(docker_file, docker_info, module_page_map),
|
|
335
|
+
source_path=docker_file,
|
|
336
|
+
))
|
|
337
|
+
|
|
338
|
+
workflow_entries = _list_workflows(wiki_dir)
|
|
339
|
+
index_content = _generate_index_md(
|
|
340
|
+
entity_names,
|
|
341
|
+
module_entries,
|
|
342
|
+
workflow_entries or None,
|
|
343
|
+
infra_entries or None,
|
|
344
|
+
)
|
|
345
|
+
manifest = SyncManifest.build_from_inventory(
|
|
346
|
+
inventory,
|
|
347
|
+
src_dir,
|
|
348
|
+
entity_page_map,
|
|
349
|
+
module_page_map,
|
|
350
|
+
)
|
|
351
|
+
return targets, _append_additional_docs_index(index_content, wiki_dir), manifest
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _list_workflows(wiki_dir: Path) -> list[dict]:
|
|
355
|
+
workflow_dir = wiki_dir / "workflows"
|
|
356
|
+
if not workflow_dir.exists():
|
|
357
|
+
return []
|
|
358
|
+
return [{"name": path.stem, "entry": ""} for path in sorted(workflow_dir.glob("*.md"))]
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _unique(values: list[TargetPage]) -> TargetPage | None:
|
|
362
|
+
return values[0] if len(values) == 1 else None
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _build_match_lookups(targets: list[TargetPage]) -> dict[str, dict]:
|
|
366
|
+
lookups: dict[str, dict] = {
|
|
367
|
+
"by_rel": {target.rel: target for target in targets},
|
|
368
|
+
"entities_by_path_name": {},
|
|
369
|
+
"entities_by_name": {},
|
|
370
|
+
"modules_by_source": {},
|
|
371
|
+
"modules_by_source_stem": {},
|
|
372
|
+
"modules_by_page_stem": {},
|
|
373
|
+
"infra_by_source": {},
|
|
374
|
+
"infra_by_page_stem": {},
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
for target in targets:
|
|
378
|
+
if target.kind == "entities" and target.entity_name:
|
|
379
|
+
lookups["entities_by_path_name"].setdefault(
|
|
380
|
+
(target.source_path, target.entity_name), []
|
|
381
|
+
).append(target)
|
|
382
|
+
lookups["entities_by_name"].setdefault(target.entity_name, []).append(target)
|
|
383
|
+
elif target.kind == "modules":
|
|
384
|
+
lookups["modules_by_source"][target.source_path] = target
|
|
385
|
+
lookups["modules_by_source_stem"].setdefault(
|
|
386
|
+
Path(target.source_path or "").stem, []
|
|
387
|
+
).append(target)
|
|
388
|
+
lookups["modules_by_page_stem"].setdefault(target.stem, []).append(target)
|
|
389
|
+
elif target.kind == "infrastructure":
|
|
390
|
+
lookups["infra_by_source"][target.source_path] = target
|
|
391
|
+
lookups["infra_by_page_stem"].setdefault(target.stem, []).append(target)
|
|
392
|
+
return lookups
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _match_existing_page(page: ExistingPage, lookups: dict[str, dict]) -> TargetPage | None:
|
|
396
|
+
if page.rel in lookups["by_rel"]:
|
|
397
|
+
return lookups["by_rel"][page.rel]
|
|
398
|
+
|
|
399
|
+
if page.kind == "entities":
|
|
400
|
+
name = page.heading or page.stem.rsplit("_", 1)[-1]
|
|
401
|
+
if page.location_path:
|
|
402
|
+
exact = _unique(
|
|
403
|
+
lookups["entities_by_path_name"].get((page.location_path, name), [])
|
|
404
|
+
)
|
|
405
|
+
if exact:
|
|
406
|
+
return exact
|
|
407
|
+
return _unique(lookups["entities_by_name"].get(name, []))
|
|
408
|
+
|
|
409
|
+
if page.kind == "modules":
|
|
410
|
+
if page.source_path and page.source_path in lookups["modules_by_source"]:
|
|
411
|
+
return lookups["modules_by_source"][page.source_path]
|
|
412
|
+
canonical = _unique(lookups["modules_by_page_stem"].get(page.stem, []))
|
|
413
|
+
if canonical:
|
|
414
|
+
return canonical
|
|
415
|
+
return _unique(lookups["modules_by_source_stem"].get(page.stem, []))
|
|
416
|
+
|
|
417
|
+
if page.kind == "infrastructure":
|
|
418
|
+
if page.source_path and page.source_path in lookups["infra_by_source"]:
|
|
419
|
+
return lookups["infra_by_source"][page.source_path]
|
|
420
|
+
return _unique(lookups["infra_by_page_stem"].get(page.stem, []))
|
|
421
|
+
|
|
422
|
+
return None
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def _build_migration_plan(wiki_dir: Path, src_dir: str) -> MigrationPlan:
|
|
426
|
+
inventory_result = get_inventory_result(src_dir, deep=True)
|
|
427
|
+
if inventory_result.failed:
|
|
428
|
+
print_inventory_failures(inventory_result)
|
|
429
|
+
sys.exit(1)
|
|
430
|
+
inventory = inventory_result.inventory
|
|
431
|
+
docker_inventory = get_docker_inventory(src_dir)
|
|
432
|
+
targets, index_content, manifest = _build_targets(
|
|
433
|
+
wiki_dir,
|
|
434
|
+
src_dir,
|
|
435
|
+
inventory,
|
|
436
|
+
docker_inventory,
|
|
437
|
+
)
|
|
438
|
+
lookups = _build_match_lookups(targets)
|
|
439
|
+
target_rels = {target.rel for target in targets}
|
|
440
|
+
plan = MigrationPlan(
|
|
441
|
+
archive_name=f"migrate-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}",
|
|
442
|
+
targets=targets,
|
|
443
|
+
index_content=index_content,
|
|
444
|
+
manifest=manifest,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
active_pages = _active_managed_pages(wiki_dir, src_dir)
|
|
448
|
+
active_rels = {page.rel for page in active_pages}
|
|
449
|
+
|
|
450
|
+
for page in active_pages:
|
|
451
|
+
target = _match_existing_page(page, lookups)
|
|
452
|
+
if target:
|
|
453
|
+
plan.matches.setdefault(target.rel, []).append(page)
|
|
454
|
+
if page.rel != target.rel:
|
|
455
|
+
plan.link_map[page.rel] = target.rel
|
|
456
|
+
elif page.rel not in target_rels:
|
|
457
|
+
plan.unmatched.append(page)
|
|
458
|
+
plan.link_map.setdefault(page.rel, f"legacy/{plan.archive_name}/{page.rel}")
|
|
459
|
+
|
|
460
|
+
for page in _archived_managed_pages(wiki_dir, src_dir):
|
|
461
|
+
if page.rel in active_rels:
|
|
462
|
+
continue
|
|
463
|
+
target = _match_existing_page(page, lookups)
|
|
464
|
+
if target:
|
|
465
|
+
plan.matches.setdefault(target.rel, []).append(page)
|
|
466
|
+
if page.rel != target.rel:
|
|
467
|
+
plan.link_map.setdefault(page.rel, target.rel)
|
|
468
|
+
else:
|
|
469
|
+
archive_rel = _page_rel(page.path, wiki_dir)
|
|
470
|
+
plan.link_map.setdefault(page.rel, archive_rel)
|
|
471
|
+
|
|
472
|
+
return plan
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _existing_legacy_payload(page: ExistingPage, generated_content: str) -> str:
|
|
476
|
+
content = page.content.strip()
|
|
477
|
+
if not content or content == generated_content.strip():
|
|
478
|
+
return ""
|
|
479
|
+
if LEGACY_MARKER in page.content:
|
|
480
|
+
return page.content.split(LEGACY_MARKER, 1)[1].strip()
|
|
481
|
+
return content
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def _split_legacy_sections(payload: str) -> list[str]:
|
|
485
|
+
payload = payload.strip()
|
|
486
|
+
if not payload:
|
|
487
|
+
return []
|
|
488
|
+
if not payload.startswith("### From "):
|
|
489
|
+
return [payload]
|
|
490
|
+
return [
|
|
491
|
+
section.strip()
|
|
492
|
+
for section in re.split(r"(?=^### From )", payload, flags=re.MULTILINE)
|
|
493
|
+
if section.strip()
|
|
494
|
+
]
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def _merge_legacy_notes(target: TargetPage, pages: list[ExistingPage]) -> str:
|
|
498
|
+
sections: list[str] = []
|
|
499
|
+
seen_sections: set[str] = set()
|
|
500
|
+
for page in pages:
|
|
501
|
+
payload = _existing_legacy_payload(page, target.content)
|
|
502
|
+
if not payload:
|
|
503
|
+
continue
|
|
504
|
+
if payload.startswith("### From "):
|
|
505
|
+
candidates = _split_legacy_sections(payload)
|
|
506
|
+
else:
|
|
507
|
+
candidates = [f"### From `{page.rel}`\n\n{payload}"]
|
|
508
|
+
for section in candidates:
|
|
509
|
+
normalized = section.strip()
|
|
510
|
+
if normalized in seen_sections:
|
|
511
|
+
continue
|
|
512
|
+
seen_sections.add(normalized)
|
|
513
|
+
sections.append(normalized)
|
|
514
|
+
|
|
515
|
+
if not sections:
|
|
516
|
+
return target.content
|
|
517
|
+
|
|
518
|
+
return (
|
|
519
|
+
target.content.rstrip()
|
|
520
|
+
+ "\n\n## Legacy Notes\n\n"
|
|
521
|
+
+ LEGACY_MARKER
|
|
522
|
+
+ "\n\n"
|
|
523
|
+
+ "\n\n".join(sections).rstrip()
|
|
524
|
+
+ "\n"
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _archive_page(page: ExistingPage, wiki_dir: Path, archive_root: Path, dry_run: bool) -> None:
|
|
529
|
+
dest = archive_root / page.rel
|
|
530
|
+
print(f" ARCHIVE {page.rel} -> {_page_rel(dest, wiki_dir)}")
|
|
531
|
+
if dry_run:
|
|
532
|
+
return
|
|
533
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
534
|
+
write_md(dest, page.content)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def _remove_old_page(page: ExistingPage, dry_run: bool) -> None:
|
|
538
|
+
if dry_run:
|
|
539
|
+
return
|
|
540
|
+
page.path.unlink(missing_ok=True)
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
def _write_page(wiki_dir: Path, rel: str, content: str, dry_run: bool) -> None:
|
|
544
|
+
path = wiki_dir / rel
|
|
545
|
+
if path.exists() and read_md(path) == content:
|
|
546
|
+
print(f" SKIP unchanged {rel}")
|
|
547
|
+
return
|
|
548
|
+
|
|
549
|
+
print(f" WRITE {rel}")
|
|
550
|
+
if dry_run:
|
|
551
|
+
return
|
|
552
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
553
|
+
write_md(path, content)
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def _is_legacy_path(path: Path, wiki_dir: Path) -> bool:
|
|
557
|
+
try:
|
|
558
|
+
return path.relative_to(wiki_dir).parts[:1] == ("legacy",)
|
|
559
|
+
except ValueError:
|
|
560
|
+
try:
|
|
561
|
+
return path.resolve().relative_to(wiki_dir.resolve()).parts[:1] == ("legacy",)
|
|
562
|
+
except ValueError:
|
|
563
|
+
return False
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def _active_markdown_pages(wiki_dir: Path) -> list[Path]:
|
|
567
|
+
if not wiki_dir.exists():
|
|
568
|
+
return []
|
|
569
|
+
return [
|
|
570
|
+
path for path in sorted(wiki_dir.rglob("*.md"))
|
|
571
|
+
if path.is_file() and not _is_legacy_path(path, wiki_dir)
|
|
572
|
+
]
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def _rewrite_links_in_content(content: str, page: Path, wiki_dir: Path, link_map: dict[str, str]) -> str:
|
|
576
|
+
if not link_map:
|
|
577
|
+
return content
|
|
578
|
+
wiki_root = wiki_dir.resolve()
|
|
579
|
+
|
|
580
|
+
def replace(match: re.Match[str]) -> str:
|
|
581
|
+
prefix, url, suffix = match.groups()
|
|
582
|
+
if url.startswith(("http://", "https://", "mailto:", "#")):
|
|
583
|
+
return match.group(0)
|
|
584
|
+
|
|
585
|
+
base_url, sep, anchor = url.partition("#")
|
|
586
|
+
if not base_url:
|
|
587
|
+
return match.group(0)
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
target = (page.parent / base_url).resolve()
|
|
591
|
+
old_rel = target.relative_to(wiki_root).as_posix()
|
|
592
|
+
except ValueError:
|
|
593
|
+
return match.group(0)
|
|
594
|
+
|
|
595
|
+
new_rel = link_map.get(old_rel)
|
|
596
|
+
if not new_rel:
|
|
597
|
+
return match.group(0)
|
|
598
|
+
|
|
599
|
+
try:
|
|
600
|
+
relative = os.path.relpath(wiki_dir / new_rel, start=page.parent).replace(os.sep, "/")
|
|
601
|
+
except ValueError:
|
|
602
|
+
return match.group(0)
|
|
603
|
+
if sep:
|
|
604
|
+
relative += f"#{anchor}"
|
|
605
|
+
return f"{prefix}{relative}{suffix}"
|
|
606
|
+
|
|
607
|
+
return _LINK_RE.sub(replace, content)
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
def _rewrite_active_links(wiki_dir: Path, link_map: dict[str, str], dry_run: bool) -> int:
|
|
611
|
+
rewritten = 0
|
|
612
|
+
for page in _active_markdown_pages(wiki_dir):
|
|
613
|
+
content = read_md(page)
|
|
614
|
+
updated = _rewrite_links_in_content(content, page, wiki_dir, link_map)
|
|
615
|
+
if updated != content:
|
|
616
|
+
rewritten += 1
|
|
617
|
+
print(f" REWRITE links: {_page_rel(page, wiki_dir)}")
|
|
618
|
+
if not dry_run:
|
|
619
|
+
write_md(page, updated)
|
|
620
|
+
return rewritten
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def _should_archive_matched_page(page: ExistingPage, target: TargetPage) -> bool:
|
|
624
|
+
if page.archived:
|
|
625
|
+
return False
|
|
626
|
+
if page.rel != target.rel:
|
|
627
|
+
return True
|
|
628
|
+
if LEGACY_MARKER in page.content:
|
|
629
|
+
return False
|
|
630
|
+
return bool(_existing_legacy_payload(page, target.content))
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def _matched_archive_count(plan: MigrationPlan) -> int:
|
|
634
|
+
count = 0
|
|
635
|
+
targets_by_rel = {target.rel: target for target in plan.targets}
|
|
636
|
+
for target_rel, pages in plan.matches.items():
|
|
637
|
+
target = targets_by_rel[target_rel]
|
|
638
|
+
count += sum(1 for page in pages if _should_archive_matched_page(page, target))
|
|
639
|
+
return count
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _target_needs_apply(wiki_dir: Path, target: TargetPage, matched_pages: list[ExistingPage]) -> bool:
|
|
643
|
+
if any(_should_archive_matched_page(page, target) for page in matched_pages):
|
|
644
|
+
return True
|
|
645
|
+
if any(not page.archived and page.rel != target.rel and page.path.exists() for page in matched_pages):
|
|
646
|
+
return True
|
|
647
|
+
|
|
648
|
+
path = wiki_dir / target.rel
|
|
649
|
+
content = _merge_legacy_notes(target, matched_pages)
|
|
650
|
+
return not path.exists() or read_md(path) != content
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def _manifest_payload(manifest: SyncManifest) -> str:
|
|
654
|
+
return json.dumps(
|
|
655
|
+
{"version": MANIFEST_VERSION, "sources": manifest.sources},
|
|
656
|
+
indent=2,
|
|
657
|
+
sort_keys=True,
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def _manifest_needs_write(wiki_dir: Path, manifest: SyncManifest | None) -> bool:
|
|
662
|
+
if manifest is None:
|
|
663
|
+
return False
|
|
664
|
+
path = wiki_dir / MANIFEST_FILENAME
|
|
665
|
+
return not path.exists() or path.read_text(encoding="utf-8") != _manifest_payload(manifest)
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def _pending_link_rewrite_count(wiki_dir: Path, link_map: dict[str, str]) -> int:
|
|
669
|
+
if not link_map:
|
|
670
|
+
return 0
|
|
671
|
+
count = 0
|
|
672
|
+
for page in _active_markdown_pages(wiki_dir):
|
|
673
|
+
content = read_md(page)
|
|
674
|
+
if _rewrite_links_in_content(content, page, wiki_dir, link_map) != content:
|
|
675
|
+
count += 1
|
|
676
|
+
return count
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
def _finalizers_pending(wiki_dir: Path, plan: MigrationPlan) -> bool:
|
|
680
|
+
index_path = wiki_dir / "index.md"
|
|
681
|
+
index_pending = not index_path.exists() or read_md(index_path) != plan.index_content
|
|
682
|
+
return (
|
|
683
|
+
index_pending
|
|
684
|
+
or _manifest_needs_write(wiki_dir, plan.manifest)
|
|
685
|
+
or _pending_link_rewrite_count(wiki_dir, plan.link_map) > 0
|
|
686
|
+
or (_legacy_archive_ignore_applicable(wiki_dir, plan) and _legacy_gitignore_needs_write(wiki_dir))
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
def _pending_targets(wiki_dir: Path, plan: MigrationPlan) -> list[TargetPage]:
|
|
691
|
+
return [
|
|
692
|
+
target
|
|
693
|
+
for target in plan.targets
|
|
694
|
+
if _target_needs_apply(wiki_dir, target, plan.matches.get(target.rel, []))
|
|
695
|
+
]
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
def _build_chunks(wiki_dir: Path, plan: MigrationPlan, chunk_size: int) -> list[MigrationChunk]:
|
|
699
|
+
if chunk_size < 1:
|
|
700
|
+
raise ValueError("--chunk-size must be greater than zero")
|
|
701
|
+
|
|
702
|
+
units: list[tuple[str, TargetPage | ExistingPage]] = [
|
|
703
|
+
("target", target) for target in _pending_targets(wiki_dir, plan)
|
|
704
|
+
]
|
|
705
|
+
units.extend(("unmatched", page) for page in plan.unmatched if page.path.exists())
|
|
706
|
+
|
|
707
|
+
finalizers_pending = _finalizers_pending(wiki_dir, plan)
|
|
708
|
+
if not units:
|
|
709
|
+
if not finalizers_pending:
|
|
710
|
+
return []
|
|
711
|
+
return [MigrationChunk(1, 1, [], [], include_finalizers=True)]
|
|
712
|
+
|
|
713
|
+
total = (len(units) + chunk_size - 1) // chunk_size
|
|
714
|
+
chunks: list[MigrationChunk] = []
|
|
715
|
+
for index in range(total):
|
|
716
|
+
page_units = units[index * chunk_size:(index + 1) * chunk_size]
|
|
717
|
+
targets = [unit for kind, unit in page_units if kind == "target"]
|
|
718
|
+
unmatched = [unit for kind, unit in page_units if kind == "unmatched"]
|
|
719
|
+
chunks.append(MigrationChunk(
|
|
720
|
+
number=index + 1,
|
|
721
|
+
total=total,
|
|
722
|
+
targets=targets,
|
|
723
|
+
unmatched=unmatched,
|
|
724
|
+
include_finalizers=finalizers_pending and index == total - 1,
|
|
725
|
+
))
|
|
726
|
+
return chunks
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
def _chunk_link_map(plan: MigrationPlan, chunk: MigrationChunk) -> dict[str, str]:
|
|
730
|
+
if chunk.include_finalizers:
|
|
731
|
+
return plan.link_map
|
|
732
|
+
|
|
733
|
+
rels: set[str] = {page.rel for page in chunk.unmatched}
|
|
734
|
+
for target in chunk.targets:
|
|
735
|
+
rels.update(page.rel for page in plan.matches.get(target.rel, []))
|
|
736
|
+
return {
|
|
737
|
+
old_rel: new_rel
|
|
738
|
+
for old_rel, new_rel in plan.link_map.items()
|
|
739
|
+
if old_rel in rels
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def _planned_archive_count(plan: MigrationPlan) -> int:
|
|
744
|
+
return _matched_archive_count(plan) + len(plan.unmatched)
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def _legacy_archive_ignore_applicable(wiki_dir: Path, plan: MigrationPlan) -> bool:
|
|
748
|
+
return bool(_legacy_archive_roots(wiki_dir)) or _planned_archive_count(plan) > 0
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
def _chunk_has_archive_work(plan: MigrationPlan, chunk: MigrationChunk) -> bool:
|
|
752
|
+
if chunk.unmatched:
|
|
753
|
+
return True
|
|
754
|
+
for target in chunk.targets:
|
|
755
|
+
if any(
|
|
756
|
+
_should_archive_matched_page(page, target)
|
|
757
|
+
for page in plan.matches.get(target.rel, [])
|
|
758
|
+
):
|
|
759
|
+
return True
|
|
760
|
+
return False
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def _print_chunk_plan(chunks: list[MigrationChunk], chunk_size: int) -> None:
|
|
764
|
+
print(f"\nMigration chunk plan (max {chunk_size} pending page operation(s) per chunk):")
|
|
765
|
+
if not chunks:
|
|
766
|
+
print(" No pending migration changes.")
|
|
767
|
+
return
|
|
768
|
+
|
|
769
|
+
for chunk in chunks:
|
|
770
|
+
finalizers = " + final index/link/manifest refresh" if chunk.include_finalizers else ""
|
|
771
|
+
print(
|
|
772
|
+
f" {chunk.number}/{chunk.total}: "
|
|
773
|
+
f"{len(chunk.targets)} canonical page(s), "
|
|
774
|
+
f"{len(chunk.unmatched)} unmatched archive(s)"
|
|
775
|
+
f"{finalizers}"
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
def _apply_chunk(wiki_dir: Path, plan: MigrationPlan, chunk: MigrationChunk, dry_run: bool) -> None:
|
|
780
|
+
archive_root = wiki_dir / "legacy" / plan.archive_name
|
|
781
|
+
if _chunk_has_archive_work(plan, chunk) or (
|
|
782
|
+
chunk.include_finalizers and _legacy_archive_ignore_applicable(wiki_dir, plan)
|
|
783
|
+
):
|
|
784
|
+
_ensure_legacy_gitignore(wiki_dir, dry_run)
|
|
785
|
+
|
|
786
|
+
for target in chunk.targets:
|
|
787
|
+
matched_pages = plan.matches.get(target.rel, [])
|
|
788
|
+
for page in matched_pages:
|
|
789
|
+
if not _should_archive_matched_page(page, target):
|
|
790
|
+
continue
|
|
791
|
+
_archive_page(page, wiki_dir, archive_root, dry_run)
|
|
792
|
+
if page.rel != target.rel:
|
|
793
|
+
_remove_old_page(page, dry_run)
|
|
794
|
+
_write_page(wiki_dir, target.rel, _merge_legacy_notes(target, matched_pages), dry_run)
|
|
795
|
+
|
|
796
|
+
for page in chunk.unmatched:
|
|
797
|
+
_archive_page(page, wiki_dir, archive_root, dry_run)
|
|
798
|
+
_remove_old_page(page, dry_run)
|
|
799
|
+
print(f" UNMATCHED archived: {page.rel}")
|
|
800
|
+
|
|
801
|
+
if chunk.include_finalizers:
|
|
802
|
+
_write_page(wiki_dir, "index.md", plan.index_content, dry_run)
|
|
803
|
+
else:
|
|
804
|
+
print(" DEFER index/manifest refresh until the final chunk")
|
|
805
|
+
|
|
806
|
+
link_map = _chunk_link_map(plan, chunk)
|
|
807
|
+
rewritten = _rewrite_active_links(wiki_dir, link_map, dry_run)
|
|
808
|
+
if link_map:
|
|
809
|
+
print(f" Link mappings: {len(link_map)} path(s); pages rewritten: {rewritten}")
|
|
810
|
+
|
|
811
|
+
if chunk.include_finalizers and not dry_run and plan.manifest is not None:
|
|
812
|
+
plan.manifest.save(wiki_dir)
|
|
813
|
+
print(f" WRITE {wiki_dir / '.llm-wiki-manifest.json'}")
|
|
814
|
+
elif chunk.include_finalizers and dry_run:
|
|
815
|
+
print(" DRY-RUN: manifest refresh skipped")
|
|
816
|
+
|
|
817
|
+
|
|
818
|
+
def _apply_plan(wiki_dir: Path, plan: MigrationPlan, dry_run: bool) -> None:
|
|
819
|
+
chunk = MigrationChunk(
|
|
820
|
+
number=1,
|
|
821
|
+
total=1,
|
|
822
|
+
targets=plan.targets,
|
|
823
|
+
unmatched=plan.unmatched,
|
|
824
|
+
include_finalizers=True,
|
|
825
|
+
)
|
|
826
|
+
_apply_chunk(wiki_dir, plan, chunk, dry_run)
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
def run(args) -> None:
|
|
830
|
+
src_dir = getattr(args, "src_dir", ".")
|
|
831
|
+
wiki_dir = Path(getattr(args, "wiki_dir", DEFAULT_WIKI_DIR))
|
|
832
|
+
dry_run = getattr(args, "dry_run", False)
|
|
833
|
+
chunk_size = getattr(args, "chunk_size", None)
|
|
834
|
+
chunk_number = getattr(args, "chunk", None)
|
|
835
|
+
plan_chunks = getattr(args, "plan_chunks", False)
|
|
836
|
+
validate_path(src_dir, "--src-dir")
|
|
837
|
+
validate_path(str(wiki_dir), "--wiki-dir")
|
|
838
|
+
|
|
839
|
+
if chunk_size is None and (chunk_number is not None or plan_chunks):
|
|
840
|
+
print("Error: --chunk and --plan-chunks require --chunk-size.", file=sys.stderr)
|
|
841
|
+
sys.exit(1)
|
|
842
|
+
|
|
843
|
+
if not wiki_dir.exists():
|
|
844
|
+
print(f"Error: Directory {wiki_dir} does not exist.", file=sys.stderr)
|
|
845
|
+
sys.exit(1)
|
|
846
|
+
|
|
847
|
+
print(f"{'Planning' if dry_run else 'Migrating'} wiki at: {wiki_dir}")
|
|
848
|
+
print(f"Source directory: {src_dir}")
|
|
849
|
+
|
|
850
|
+
plan = _build_migration_plan(wiki_dir, src_dir)
|
|
851
|
+
if dry_run:
|
|
852
|
+
print("DRY-RUN: no files will be modified.")
|
|
853
|
+
|
|
854
|
+
if chunk_size is not None:
|
|
855
|
+
chunks = _build_chunks(wiki_dir, plan, chunk_size)
|
|
856
|
+
_print_chunk_plan(chunks, chunk_size)
|
|
857
|
+
|
|
858
|
+
if plan_chunks or (dry_run and chunk_number is None):
|
|
859
|
+
print("PLAN: no files modified.")
|
|
860
|
+
return
|
|
861
|
+
|
|
862
|
+
if not chunks:
|
|
863
|
+
print("\nNo pending migration changes.")
|
|
864
|
+
return
|
|
865
|
+
|
|
866
|
+
selected_number = chunk_number or 1
|
|
867
|
+
if selected_number < 1 or selected_number > len(chunks):
|
|
868
|
+
print(
|
|
869
|
+
f"Error: --chunk must be between 1 and {len(chunks)} for the current plan.",
|
|
870
|
+
file=sys.stderr,
|
|
871
|
+
)
|
|
872
|
+
sys.exit(1)
|
|
873
|
+
|
|
874
|
+
chunk = chunks[selected_number - 1]
|
|
875
|
+
print(f"\nApplying migration chunk {chunk.number}/{chunk.total}:")
|
|
876
|
+
_apply_chunk(wiki_dir, plan, chunk, dry_run)
|
|
877
|
+
print(
|
|
878
|
+
"\nMigration chunk complete: "
|
|
879
|
+
f"{chunk.page_operations} page operation(s)."
|
|
880
|
+
)
|
|
881
|
+
if not chunk.include_finalizers:
|
|
882
|
+
print("Run chunked migrate again after reviewing or committing this chunk.")
|
|
883
|
+
return
|
|
884
|
+
|
|
885
|
+
_apply_plan(wiki_dir, plan, dry_run)
|
|
886
|
+
|
|
887
|
+
print(
|
|
888
|
+
"\nMigration complete: "
|
|
889
|
+
f"{len(plan.targets)} canonical page(s), "
|
|
890
|
+
f"{_matched_archive_count(plan)} archived source page(s), "
|
|
891
|
+
f"{len(plan.unmatched)} unmatched archived page(s)."
|
|
892
|
+
)
|