@hutusi/amytis 1.14.0 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/.github/workflows/ci.yml +1 -1
  2. package/.github/workflows/publish.yml +2 -2
  3. package/CHANGELOG.md +42 -0
  4. package/CLAUDE.md +90 -219
  5. package/README.md +33 -1
  6. package/README.zh.md +33 -1
  7. package/TODO.md +10 -0
  8. package/bun.lock +205 -539
  9. package/content/books/sample-book/index.mdx +3 -0
  10. package/content/posts/code-block-features-showcase.mdx +223 -0
  11. package/content/series/rst-legacy/deeper-notes/images/test.svg +4 -0
  12. package/content/series/rst-legacy/deeper-notes/index.rst +15 -0
  13. package/content/series/rst-legacy/getting-started.rst +24 -0
  14. package/content/series/rst-legacy/index.rst +9 -0
  15. package/content/series/rst-readme/README.rst +9 -0
  16. package/content/series/rst-readme/readme-index-post.rst +10 -0
  17. package/content/series/rst-toctree/first-post.rst +6 -0
  18. package/content/series/rst-toctree/index.rst +10 -0
  19. package/content/series/rst-toctree/second-post.rst +6 -0
  20. package/content/series/rst-toctree-precedence/first-post.rst +6 -0
  21. package/content/series/rst-toctree-precedence/index.rst +12 -0
  22. package/content/series/rst-toctree-precedence/second-post.rst +6 -0
  23. package/docs/ALERTS.md +112 -0
  24. package/docs/ARCHITECTURE.md +239 -8
  25. package/docs/CODE-BLOCKS.md +238 -0
  26. package/docs/CONTRIBUTING.md +36 -0
  27. package/docs/guides/README.md +11 -0
  28. package/docs/guides/importing-vuepress-books.md +178 -0
  29. package/eslint.config.mjs +20 -6
  30. package/next.config.ts +2 -2
  31. package/package.json +52 -24
  32. package/packages/create-amytis/package.json +1 -1
  33. package/packages/create-amytis/src/index.test.ts +43 -1
  34. package/packages/create-amytis/src/index.ts +64 -8
  35. package/public/next-image-export-optimizer-hashes.json +14 -73
  36. package/scripts/build-pagefind.ts +172 -0
  37. package/scripts/copy-assets.ts +246 -56
  38. package/scripts/generate-code-group-icons.ts +79 -0
  39. package/scripts/generate-knowledge-graph.ts +2 -1
  40. package/scripts/render-rst.py +923 -0
  41. package/scripts/run-with-rst-python.ts +42 -0
  42. package/scripts/sync-vuepress-book.ts +499 -0
  43. package/src/app/[slug]/[postSlug]/page.tsx +20 -10
  44. package/src/app/[slug]/page/[page]/page.tsx +15 -0
  45. package/src/app/books/[slug]/{[chapter] → [...chapter]}/page.tsx +32 -10
  46. package/src/app/books/[slug]/page.tsx +67 -32
  47. package/src/app/globals.css +639 -94
  48. package/src/app/page.tsx +1 -1
  49. package/src/app/series/[slug]/page/[page]/page.tsx +74 -6
  50. package/src/app/series/[slug]/page.tsx +11 -13
  51. package/src/app/series/page.tsx +3 -3
  52. package/src/app/sitemap.ts +3 -3
  53. package/src/components/ArticleCopyCleaner.tsx +64 -0
  54. package/src/components/AuthorCard.tsx +25 -16
  55. package/src/components/BookMobileNav.tsx +44 -50
  56. package/src/components/BookSidebar.tsx +0 -0
  57. package/src/components/CodeBlock.test.tsx +93 -8
  58. package/src/components/CodeBlock.tsx +39 -101
  59. package/src/components/CodeBlockToolbar.tsx +88 -0
  60. package/src/components/CodeGroup.tsx +81 -0
  61. package/src/components/CoverImage.tsx +6 -2
  62. package/src/components/ExternalLinkIcon.tsx +15 -0
  63. package/src/components/FeaturedStoriesSection.tsx +3 -3
  64. package/src/components/GithubAlert.tsx +97 -0
  65. package/src/components/MarkdownRenderer.test.tsx +30 -4
  66. package/src/components/MarkdownRenderer.tsx +148 -24
  67. package/src/components/Mermaid.tsx +32 -1
  68. package/src/components/PostList.tsx +1 -1
  69. package/src/components/PostNavigation.tsx +13 -2
  70. package/src/components/PostSidebar.tsx +13 -2
  71. package/src/components/RstRenderer.test.tsx +93 -0
  72. package/src/components/RstRenderer.tsx +157 -0
  73. package/src/components/Search.tsx +18 -4
  74. package/src/components/SeriesCatalog.tsx +1 -1
  75. package/src/components/ShareBar.tsx +5 -0
  76. package/src/components/TocPanel.tsx +10 -2
  77. package/src/i18n/translations.ts +2 -0
  78. package/src/layouts/BookLayout.tsx +35 -4
  79. package/src/layouts/PostLayout.tsx +10 -2
  80. package/src/layouts/SimpleLayout.tsx +10 -3
  81. package/src/lib/code-group-icons.test.ts +78 -0
  82. package/src/lib/code-group-icons.ts +148 -0
  83. package/src/lib/image-utils.test.ts +19 -0
  84. package/src/lib/image-utils.ts +11 -0
  85. package/src/lib/markdown.test.ts +195 -14
  86. package/src/lib/markdown.ts +928 -254
  87. package/src/lib/normalize-vuepress-math.ts +118 -0
  88. package/src/lib/rehype-fence-meta.ts +22 -0
  89. package/src/lib/rehype-image-metadata.ts +2 -2
  90. package/src/lib/remark-book-chapter-links.ts +106 -0
  91. package/src/lib/remark-code-group.ts +54 -0
  92. package/src/lib/remark-github-alerts.test.ts +83 -0
  93. package/src/lib/remark-github-alerts.ts +65 -0
  94. package/src/lib/remark-vuepress-containers.ts +130 -0
  95. package/src/lib/rst-renderer.test.ts +355 -0
  96. package/src/lib/rst-renderer.ts +629 -0
  97. package/src/lib/rst.test.ts +350 -0
  98. package/src/lib/rst.ts +674 -0
  99. package/src/lib/series-redirects.ts +42 -0
  100. package/src/lib/shiki-rst.ts +185 -0
  101. package/src/lib/shiki.test.ts +153 -0
  102. package/src/lib/shiki.ts +292 -0
  103. package/src/lib/urls.ts +57 -0
  104. package/src/test-utils/render.ts +23 -0
  105. package/tests/fixtures/sync-vuepress-book/docs/.vuepress/config.js +43 -0
  106. package/tests/fixtures/sync-vuepress-book/docs/intro/welcome.md +7 -0
  107. package/tests/fixtures/sync-vuepress-book/docs/maths/linear/assets/diagram.png +1 -0
  108. package/tests/fixtures/sync-vuepress-book/docs/maths/linear/matrices.md +7 -0
  109. package/tests/fixtures/sync-vuepress-book/docs/maths/linear/vectors.md +9 -0
  110. package/tests/helpers/env.ts +19 -0
  111. package/tests/integration/book-chapter-links.test.ts +107 -0
  112. package/tests/integration/books-nested-toc.test.ts +176 -0
  113. package/tests/integration/books.test.ts +3 -2
  114. package/tests/integration/code-block-features.test.ts +188 -0
  115. package/tests/integration/code-group.test.ts +183 -0
  116. package/tests/integration/code-notation.test.ts +97 -0
  117. package/tests/integration/feed-utils.test.ts +13 -0
  118. package/tests/integration/github-alerts.test.ts +82 -0
  119. package/tests/integration/markdown-external-links.test.ts +103 -0
  120. package/tests/integration/normalize-vuepress-math.test.ts +149 -0
  121. package/tests/integration/reading-time-headings.test.ts +12 -14
  122. package/tests/integration/series-draft.test.ts +12 -5
  123. package/tests/integration/series.test.ts +93 -0
  124. package/tests/integration/sync-vuepress-book.test.ts +240 -0
  125. package/tests/integration/vuepress-containers.test.ts +107 -0
  126. package/tests/tooling/build-pagefind.test.ts +66 -0
  127. package/tests/tooling/new-post.test.ts +1 -1
  128. package/tests/unit/static-params.test.ts +166 -13
@@ -0,0 +1,923 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import copy
7
+ import html
8
+ import json
9
+ import posixpath
10
+ import re
11
+ import sys
12
+ from contextlib import contextmanager
13
+ from html.parser import HTMLParser
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+
18
+ CSV_FIELDS = {"tags", "authors", "posts", "redirectfrom"}
19
+ BOOLEAN_FIELDS = {"featured", "pinned", "draft", "latex", "toc", "commentable"}
20
+ SCALAR_FIELDS = {
21
+ "date",
22
+ "subtitle",
23
+ "excerpt",
24
+ "category",
25
+ "author",
26
+ "layout",
27
+ "series",
28
+ "coverimage",
29
+ "sort",
30
+ "type",
31
+ }
32
+ LEGACY_DOC_ROLE_BOUNDARY = "__AMYTIS_RST_DOC_ROLE_BOUNDARY__"
33
+ LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE = f"{LEGACY_DOC_ROLE_BOUNDARY} "
34
+
35
+
36
+ class RstRenderError(Exception):
37
+ pass
38
+
39
+
40
+ class BodyFragmentParser(HTMLParser):
41
+ def __init__(self) -> None:
42
+ super().__init__(convert_charrefs=False)
43
+ self._target: str | None = None
44
+ self._depth = 0
45
+ self._fragments: list[str] = []
46
+
47
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
48
+ if self._target is None and tag in {"main", "body"}:
49
+ self._target = tag
50
+ self._depth = 1
51
+ return
52
+
53
+ if self._target is not None:
54
+ self._depth += 1
55
+ starttag_text = self.get_starttag_text()
56
+ if starttag_text is not None:
57
+ self._fragments.append(starttag_text)
58
+
59
+ def handle_endtag(self, tag: str) -> None:
60
+ if self._target is None:
61
+ return
62
+
63
+ if self._depth == 1 and tag == self._target:
64
+ self._target = None
65
+ self._depth = 0
66
+ return
67
+
68
+ self._depth -= 1
69
+ self._fragments.append(f"</{tag}>")
70
+
71
+ def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
72
+ if self._target is not None:
73
+ starttag_text = self.get_starttag_text()
74
+ if starttag_text is not None:
75
+ self._fragments.append(starttag_text)
76
+
77
+ def handle_data(self, data: str) -> None:
78
+ if self._target is not None:
79
+ self._fragments.append(data)
80
+
81
+ def handle_comment(self, data: str) -> None:
82
+ if self._target is not None:
83
+ self._fragments.append(f"<!--{data}-->")
84
+
85
+ def handle_entityref(self, name: str) -> None:
86
+ if self._target is not None:
87
+ self._fragments.append(f"&{name};")
88
+
89
+ def handle_charref(self, name: str) -> None:
90
+ if self._target is not None:
91
+ self._fragments.append(f"&#{name};")
92
+
93
+ def handle_decl(self, decl: str) -> None:
94
+ if self._target is not None:
95
+ self._fragments.append(f"<!{decl}>")
96
+
97
+ def get_fragment(self) -> str:
98
+ return "".join(self._fragments).strip()
99
+
100
+
101
+ def detect_series_root(source_file: Path) -> Path | None:
102
+ parts = source_file.resolve().parts
103
+ try:
104
+ series_index = parts.index("series")
105
+ except ValueError:
106
+ return None
107
+
108
+ if series_index + 1 >= len(parts):
109
+ return None
110
+
111
+ return Path(*parts[: series_index + 2])
112
+
113
+
114
+ def slug_from_doc_path(doc_path: Path) -> str:
115
+ if doc_path.name in {"index.rst", "README.rst"}:
116
+ return doc_path.parent.name
117
+ return doc_path.stem
118
+
119
+
120
+ def resolve_doc_target_path(source_file: Path, target: str) -> Path | None:
121
+ candidate_base = (source_file.parent / target).resolve()
122
+ candidate_rst = candidate_base if candidate_base.suffix == ".rst" else candidate_base.parent / f"{candidate_base.name}.rst"
123
+ candidate_paths = [
124
+ candidate_rst,
125
+ candidate_base / "index.rst",
126
+ candidate_base / "README.rst",
127
+ ]
128
+
129
+ for candidate in candidate_paths:
130
+ if candidate.exists():
131
+ return candidate
132
+
133
+ return None
134
+
135
+
136
+ def resolve_doc_target_uri(source_file: Path, target: str) -> str | None:
137
+ target_path = resolve_doc_target_path(source_file, target)
138
+ if target_path is None:
139
+ return None
140
+
141
+ series_root = detect_series_root(target_path)
142
+ if series_root is None:
143
+ return None
144
+
145
+ series_slug = series_root.name
146
+ slug = slug_from_doc_path(target_path)
147
+ return f"/{series_slug}/{slug}"
148
+
149
+
150
+ def register_doc_role(source_file: Path, warnings: list[str]) -> None:
151
+ from docutils import nodes
152
+ from docutils.parsers.rst import roles
153
+
154
+ def doc_role( # type: ignore[override]
155
+ _name: str,
156
+ rawtext: str,
157
+ text: str,
158
+ _lineno: int,
159
+ _inliner: Any,
160
+ _options: dict[str, Any] | None = None,
161
+ _content: list[str] | None = None,
162
+ ) -> tuple[list[Any], list[Any]]:
163
+ label = None
164
+ target = text.strip()
165
+ match = re.match(r"(.+?)\s*<(.+)>$", target)
166
+ if match:
167
+ label = match.group(1).strip()
168
+ target = match.group(2).strip()
169
+
170
+ refuri = resolve_doc_target_uri(source_file, target)
171
+ if refuri is None:
172
+ warnings.append(f'Unresolved :doc: target "{target}" in {source_file}')
173
+ display_text = label or target.split("/")[-1]
174
+ return [nodes.literal(rawtext, display_text)], []
175
+
176
+ display_text = label or target.split("/")[-1]
177
+ return [nodes.reference(rawtext, display_text, refuri=refuri)], []
178
+
179
+ roles.register_canonical_role("doc", doc_role)
180
+
181
+
182
+ def register_passthrough_roles(warnings: list[str]) -> None:
183
+ from docutils import nodes
184
+ from docutils.parsers.rst import roles
185
+
186
+ def parse_role_target(text: str) -> tuple[str | None, str]:
187
+ target = text.strip()
188
+ match = re.match(r"(.+?)\s*<(.+)>$", target)
189
+ if match:
190
+ return match.group(1).strip(), match.group(2).strip()
191
+ return None, target
192
+
193
+ def normalize_internal_ref(target: str) -> str:
194
+ cleaned = target.strip().strip("`")
195
+ cleaned = cleaned.replace("_", "-")
196
+ cleaned = re.sub(r"\s+", "-", cleaned)
197
+ return cleaned
198
+
199
+ def resolve_named_refid(inliner: Any, target: str) -> str | None:
200
+ document = getattr(inliner, "document", None)
201
+ if document is None:
202
+ return None
203
+
204
+ nameids = getattr(document, "nameids", {})
205
+ refid = nameids.get(target)
206
+ if isinstance(refid, str) and refid:
207
+ return refid
208
+ return None
209
+
210
+ def make_passthrough_role(role_name: str):
211
+ def passthrough_role( # type: ignore[override]
212
+ _name: str,
213
+ rawtext: str,
214
+ text: str,
215
+ _lineno: int,
216
+ _inliner: Any,
217
+ _options: dict[str, Any] | None = None,
218
+ _content: list[str] | None = None,
219
+ ) -> tuple[list[Any], list[Any]]:
220
+ warnings.append(f'Unsupported interpreted text role ":{role_name}:" rendered as plain inline text.')
221
+ return [nodes.inline(rawtext, text, classes=[role_name])], []
222
+
223
+ return passthrough_role
224
+
225
+ def ref_role( # type: ignore[override]
226
+ _name: str,
227
+ rawtext: str,
228
+ text: str,
229
+ _lineno: int,
230
+ _inliner: Any,
231
+ _options: dict[str, Any] | None = None,
232
+ _content: list[str] | None = None,
233
+ ) -> tuple[list[Any], list[Any]]:
234
+ label, target = parse_role_target(text)
235
+ display_text = label or target
236
+ refid = resolve_named_refid(_inliner, target) or normalize_internal_ref(target)
237
+ return [nodes.reference(rawtext, display_text, refuri=f"#{refid}")], []
238
+
239
+ def numref_role( # type: ignore[override]
240
+ _name: str,
241
+ rawtext: str,
242
+ text: str,
243
+ _lineno: int,
244
+ _inliner: Any,
245
+ _options: dict[str, Any] | None = None,
246
+ _content: list[str] | None = None,
247
+ ) -> tuple[list[Any], list[Any]]:
248
+ label, target = parse_role_target(text)
249
+ display_text = target if label and "%s" in label else (label or target)
250
+ refid = resolve_named_refid(_inliner, target)
251
+ if refid is not None:
252
+ return [nodes.reference(rawtext, display_text, refuri=f"#{refid}", classes=["numref"])], []
253
+
254
+ warnings.append('Unsupported interpreted text role ":numref:" rendered as plain inline text.')
255
+ return [nodes.inline(rawtext, display_text, classes=["numref"])], []
256
+
257
+ for role_name in ("dtag",):
258
+ roles.register_canonical_role(role_name, make_passthrough_role(role_name))
259
+ roles.register_canonical_role("ref", ref_role)
260
+ roles.register_canonical_role("numref", numref_role)
261
+
262
+
263
+ @contextmanager
264
+ def temporary_role_overrides(source_file: Path, warnings: list[str]):
265
+ from docutils.parsers.rst import roles
266
+
267
+ if not hasattr(roles, "_role_registry") or not hasattr(roles, "_roles"):
268
+ raise RstRenderError(
269
+ "Incompatible docutils roles registry layout. Expected _role_registry and _roles "
270
+ "attributes for temporary role overrides."
271
+ )
272
+
273
+ tracked_names = ("doc", "dtag", "ref", "numref")
274
+ previous_registry = {name: roles._role_registry.get(name) for name in tracked_names}
275
+ previous_local = {name: roles._roles.get(name) for name in tracked_names}
276
+
277
+ register_doc_role(source_file, warnings)
278
+ register_passthrough_roles(warnings)
279
+
280
+ try:
281
+ yield
282
+ finally:
283
+ for name, role_fn in previous_registry.items():
284
+ if role_fn is None:
285
+ roles._role_registry.pop(name, None)
286
+ else:
287
+ roles._role_registry[name] = role_fn
288
+
289
+ for name, role_fn in previous_local.items():
290
+ if role_fn is None:
291
+ roles._roles.pop(name, None)
292
+ else:
293
+ roles._roles[name] = role_fn
294
+
295
+
296
+ def parse_args() -> argparse.Namespace:
297
+ parser = argparse.ArgumentParser(description="Render a single rST file to JSON via docutils.")
298
+ parser.add_argument("--file", help="Absolute or relative path to the .rst file")
299
+ parser.add_argument(
300
+ "--image-base-slug",
301
+ help="Public-relative base slug for local assets, for example posts/my-post",
302
+ )
303
+ parser.add_argument(
304
+ "--batch-stdin",
305
+ action="store_true",
306
+ help="Read a JSON array of batch render entries from stdin",
307
+ )
308
+ parser.add_argument(
309
+ "--batch-file",
310
+ help="Read a JSON array of batch render entries from a file",
311
+ )
312
+ parser.add_argument(
313
+ "--strict",
314
+ action="store_true",
315
+ help="Fail on missing local assets instead of reporting them in the output",
316
+ )
317
+ args = parser.parse_args()
318
+
319
+ if args.batch_stdin or args.batch_file:
320
+ if args.batch_stdin and args.batch_file:
321
+ parser.error("--batch-stdin and --batch-file cannot be combined")
322
+ if args.file or args.image_base_slug:
323
+ parser.error("--batch-stdin/--batch-file cannot be combined with --file or --image-base-slug")
324
+ return args
325
+
326
+ if not args.file or not args.image_base_slug:
327
+ parser.error("--file and --image-base-slug are required unless --batch-stdin is used")
328
+
329
+ return args
330
+
331
+
332
+ def resolve_source_file(raw_file: str) -> Path:
333
+ source_file = Path(raw_file).expanduser()
334
+ if not source_file.is_absolute():
335
+ source_file = Path.cwd() / source_file
336
+ return source_file.resolve()
337
+
338
+
339
+ def normalize_metadata_value(key: str, value: str) -> Any:
340
+ lowered = key.lower()
341
+ stripped = value.strip()
342
+
343
+ if lowered in CSV_FIELDS:
344
+ return [part.strip() for part in stripped.split(",") if part.strip()]
345
+
346
+ if lowered in BOOLEAN_FIELDS:
347
+ normalized = stripped.lower()
348
+ if normalized == "true":
349
+ return True
350
+ if normalized == "false":
351
+ return False
352
+ raise RstRenderError(f'Invalid boolean for "{key}": {value}')
353
+
354
+ if lowered in SCALAR_FIELDS:
355
+ return stripped
356
+
357
+ return stripped
358
+
359
+
360
+ def normalize_legacy_doc_role_syntax(source: str) -> str:
361
+ if LEGACY_DOC_ROLE_BOUNDARY in source or LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE in source:
362
+ raise RstRenderError(
363
+ f'Source already contains reserved legacy :doc: boundary marker "{LEGACY_DOC_ROLE_BOUNDARY}".'
364
+ )
365
+
366
+ return re.sub(
367
+ r"(?<![\s\\(\[{<])(:doc:`[^`\n]+`)",
368
+ LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE + r"\1",
369
+ source,
370
+ )
371
+
372
+
373
+ def extract_metadata(document: Any) -> dict[str, Any]:
374
+ from docutils import nodes
375
+
376
+ metadata: dict[str, Any] = {}
377
+
378
+ for child in document.children:
379
+ if isinstance(child, nodes.docinfo):
380
+ for entry in child.children:
381
+ if isinstance(entry, nodes.authors):
382
+ metadata["authors"] = [author.astext().strip() for author in entry.children if author.astext().strip()]
383
+ continue
384
+ if isinstance(entry, nodes.author):
385
+ metadata["author"] = entry.astext().strip()
386
+ continue
387
+ if isinstance(entry, nodes.field) and len(entry.children) >= 2:
388
+ name = entry.children[0].astext().strip()
389
+ value = entry.children[1].astext().strip()
390
+ if name and value:
391
+ metadata[name] = normalize_metadata_value(name, value)
392
+ continue
393
+
394
+ key = entry.tagname.lower()
395
+ value = entry.astext().strip()
396
+ if value:
397
+ metadata[key] = normalize_metadata_value(key, value)
398
+ continue
399
+
400
+ if isinstance(child, nodes.field_list):
401
+ for field in child.children:
402
+ if not isinstance(field, nodes.field):
403
+ continue
404
+ name = field.children[0].astext().strip()
405
+ value = field.children[1].astext().strip()
406
+ if not name or not value:
407
+ continue
408
+ metadata[name] = normalize_metadata_value(name, value)
409
+ continue
410
+
411
+ if isinstance(child, nodes.title):
412
+ continue
413
+
414
+ break
415
+
416
+ if "author" in metadata and "authors" not in metadata:
417
+ metadata["authors"] = [metadata["author"]]
418
+
419
+ normalized: dict[str, Any] = {}
420
+ for key, value in metadata.items():
421
+ lowered = key.lower()
422
+ if lowered == "coverimage":
423
+ normalized["coverImage"] = value
424
+ elif lowered == "redirectfrom":
425
+ normalized["redirectFrom"] = value
426
+ else:
427
+ normalized[lowered] = value
428
+
429
+ return normalized
430
+
431
+
432
+ def resolve_asset_uri(uri: str, source_file: Path, image_base_slug: str) -> tuple[str, bool]:
433
+ stripped = uri.strip()
434
+ if not stripped:
435
+ return stripped, False
436
+
437
+ if stripped.startswith(("http://", "https://", "data:", "mailto:", "#", "/")):
438
+ return stripped, True
439
+
440
+ candidate = (source_file.parent / stripped).resolve()
441
+ exists = candidate.exists()
442
+
443
+ normalized_base = image_base_slug.strip("/")
444
+ relative_uri = stripped.replace("\\", "/")
445
+ resolved = "/" + posixpath.normpath(posixpath.join(normalized_base, relative_uri)).lstrip("/")
446
+ return resolved, exists
447
+
448
+
449
+ def extract_assets(document: Any, source_file: Path, image_base_slug: str) -> list[dict[str, Any]]:
450
+ from docutils import nodes
451
+
452
+ assets: list[dict[str, Any]] = []
453
+ for image in document.findall(nodes.image):
454
+ original = image.get("uri", "").strip()
455
+ if not original:
456
+ continue
457
+ resolved, exists = resolve_asset_uri(original, source_file, image_base_slug)
458
+ assets.append({
459
+ "original": original,
460
+ "resolved": resolved,
461
+ "exists": exists,
462
+ })
463
+
464
+ return assets
465
+
466
+
467
+ def rewrite_html_assets(rendered_html: str, assets: list[dict[str, Any]]) -> str:
468
+ rewritten = rendered_html
469
+
470
+ for asset in assets:
471
+ original = asset["original"]
472
+ resolved = asset["resolved"]
473
+ escaped_original = re.escape(html.escape(original, quote=True))
474
+
475
+ rewritten = re.sub(
476
+ rf'(\s(?:src|href)=["\']){escaped_original}(["\'])',
477
+ rf'\1{html.escape(resolved, quote=True)}\2',
478
+ rewritten,
479
+ )
480
+
481
+ return rewritten
482
+
483
+
484
+ def extract_headings(document: Any) -> list[dict[str, Any]]:
485
+ from docutils import nodes
486
+
487
+ headings: list[dict[str, Any]] = []
488
+ for section in document.findall(nodes.section):
489
+ title = next((child for child in section.children if isinstance(child, nodes.title)), None)
490
+ if title is None:
491
+ continue
492
+
493
+ ids = section.get("ids", [])
494
+ depth = 0
495
+ parent = section.parent
496
+ while parent is not None:
497
+ if isinstance(parent, nodes.section):
498
+ depth += 1
499
+ parent = parent.parent
500
+
501
+ headings.append({
502
+ "id": ids[0] if ids else "",
503
+ "text": title.astext().strip(),
504
+ "level": depth + 2,
505
+ })
506
+
507
+ return headings
508
+
509
+
510
+ def extract_body_text(document: Any) -> str:
511
+ from docutils import nodes
512
+
513
+ body_tree = copy.deepcopy(document)
514
+ for node in list(body_tree.findall(nodes.system_message)):
515
+ parent = node.parent
516
+ if parent is not None:
517
+ parent.remove(node)
518
+
519
+ for node in list(body_tree.findall(nodes.footnote)):
520
+ parent = node.parent
521
+ if parent is not None:
522
+ parent.remove(node)
523
+
524
+ for node in list(body_tree.findall(nodes.footnote_reference)):
525
+ parent = node.parent
526
+ if parent is not None:
527
+ parent.remove(node)
528
+
529
+ body_parts: list[str] = []
530
+ for child in body_tree.children:
531
+ if isinstance(child, (nodes.docinfo, nodes.field_list, nodes.comment, nodes.title, nodes.system_message, nodes.footnote)):
532
+ continue
533
+ if child.tagname == "footnote_list":
534
+ continue
535
+ text = child.astext().strip()
536
+ if text:
537
+ body_parts.append(text)
538
+
539
+ return "\n\n".join(body_parts).replace(LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE, "").replace(LEGACY_DOC_ROLE_BOUNDARY, "").strip()
540
+
541
+
542
+ def remove_system_messages(document: Any) -> None:
543
+ from docutils import nodes
544
+
545
+ for node in list(document.findall(nodes.system_message)):
546
+ parent = node.parent
547
+ if parent is not None:
548
+ parent.remove(node)
549
+
550
+
551
+ def strip_preamble_nodes(document: Any) -> Any:
552
+ from docutils import nodes
553
+
554
+ stripped = copy.deepcopy(document)
555
+ for child in list(stripped.children):
556
+ if isinstance(child, (nodes.docinfo, nodes.field_list, nodes.comment)):
557
+ stripped.remove(child)
558
+ continue
559
+ if isinstance(child, nodes.title):
560
+ continue
561
+ break
562
+
563
+ return stripped
564
+
565
+
566
+ def _language_from_classes(classes: list[str] | None) -> str:
567
+ """Recover the source language from a literal_block's class list when the
568
+ explicit `language` attribute is absent. Docutils stores ``.. code-block:: foo``
569
+ as classes=['code', 'foo']; the first class that isn't a docutils-internal
570
+ marker is the language name.
571
+ """
572
+ if not classes:
573
+ return ""
574
+ for cls in classes:
575
+ if cls not in ("code", "literal-block", "linenos"):
576
+ return cls
577
+ return ""
578
+
579
+
580
+ def _build_amytis_code_marker(
581
+ text: str,
582
+ language: str,
583
+ highlight_lines: list[int] | None,
584
+ linenos: bool,
585
+ title: str | None,
586
+ ) -> str:
587
+ """Build the opaque <pre data-amytis-code> marker that the JS-side
588
+ Shiki post-processor in src/lib/shiki-rst.ts replaces with highlighted HTML.
589
+ """
590
+ attrs = ['data-amytis-code=""']
591
+ if language:
592
+ attrs.append(f'data-language="{html.escape(language, quote=True)}"')
593
+ if highlight_lines:
594
+ attrs.append(
595
+ f'data-highlight-lines="{",".join(str(n) for n in highlight_lines)}"'
596
+ )
597
+ if linenos:
598
+ attrs.append('data-line-numbers="true"')
599
+ if title:
600
+ attrs.append(f'data-title="{html.escape(title, quote=True)}"')
601
+
602
+ escaped = html.escape(text, quote=False)
603
+ return f'<pre {" ".join(attrs)}><code>{escaped}</code></pre>'
604
+
605
+
606
+ def _build_inner_block_marker(block: Any) -> tuple[str, str]:
607
+ """Helper: build the per-block <pre data-amytis-code> marker AND return the
608
+ tab label (from the new `:label:` option, or the language as fallback).
609
+ Used by both the standalone-literal_block path and the code-group path.
610
+ """
611
+ classes = list(block.get("classes") or [])
612
+ language = block.get("language") or _language_from_classes(classes)
613
+ highlight_args = block.get("highlight_args") or {}
614
+ hl_lines = list(highlight_args.get("hl_lines") or [])
615
+ linenos = "linenos" in classes
616
+ caption_text = block.get("amytis_caption") # set by the directive when :caption: is present
617
+ label = block.get("amytis_label") or language or ""
618
+
619
+ marker = _build_amytis_code_marker(
620
+ text=block.astext(),
621
+ language=language,
622
+ highlight_lines=hl_lines,
623
+ linenos=linenos,
624
+ title=caption_text,
625
+ )
626
+ return marker, label
627
+
628
+
629
+ def transform_literal_blocks_to_markers(document: Any) -> None:
630
+ """Replace every literal_block with an opaque <pre data-amytis-code> marker
631
+ so the JS-side post-processor can run Shiki uniformly. Caption-bearing
632
+ literal-block-wrapper containers are flattened into the marker's data-title.
633
+
634
+ Code-group containers (emitted by the .. code-group:: directive) are
635
+ handled FIRST so their child literal_blocks are consumed before the
636
+ standalone-block pass sees them — otherwise the standalone pass would
637
+ replace them and we'd lose the grouping wrapper.
638
+ """
639
+ from docutils import nodes
640
+ import json
641
+
642
+ # Pass 1: collapse caption containers so child literal_blocks carry their caption
643
+ # as a custom attribute. (Doing this once here means the helper doesn't need to
644
+ # walk back up to find a parent literal-block-wrapper.)
645
+ for container in list(document.findall(nodes.container)):
646
+ if "literal-block-wrapper" not in (container.get("classes") or []):
647
+ continue
648
+ caption_node = next(
649
+ (c for c in container.children if isinstance(c, nodes.caption)),
650
+ None,
651
+ )
652
+ inner_block = next(
653
+ (c for c in container.children if isinstance(c, nodes.literal_block)),
654
+ None,
655
+ )
656
+ if caption_node is not None and inner_block is not None:
657
+ inner_block["amytis_caption"] = caption_node.astext().strip()
658
+ container.parent.replace(container, inner_block)
659
+
660
+ # Pass 2: handle code-group containers. The directive marks them with the
661
+ # 'amytis-code-group-source' class. Per CLAUDE.md "strict build over silent
662
+ # runtime failure", malformed groups raise rather than getting dropped, and
663
+ # group ids are issued from a monotonic counter so two groups with identical
664
+ # label sets never share an id (which would couple their tab radios).
665
+ group_counter = 0
666
+ for container in list(document.findall(nodes.container)):
667
+ if "amytis-code-group-source" not in (container.get("classes") or []):
668
+ continue
669
+
670
+ inner_blocks = list(container.findall(nodes.literal_block))
671
+ if not inner_blocks:
672
+ raise RstRenderError(
673
+ "Empty or malformed '.. code-group::' directive: expected at least one nested .. code-block:: child."
674
+ )
675
+
676
+ markers: list[str] = []
677
+ labels: list[str] = []
678
+ for block in inner_blocks:
679
+ marker, label = _build_inner_block_marker(block)
680
+ markers.append(marker)
681
+ labels.append(label)
682
+
683
+ group_counter += 1
684
+ group_id = f"rst-{group_counter}"
685
+ labels_json = html.escape(json.dumps(labels, ensure_ascii=False), quote=True)
686
+ wrapper_html = (
687
+ f'<div data-amytis-code-group="" data-labels="{labels_json}" '
688
+ f'data-group-id="{group_id}">'
689
+ + "".join(markers)
690
+ + "</div>"
691
+ )
692
+ container.parent.replace(container, nodes.raw("", wrapper_html, format="html"))
693
+
694
+ # Pass 3: replace remaining (non-grouped) literal_blocks.
695
+ for block in list(document.findall(nodes.literal_block)):
696
+ marker, _ = _build_inner_block_marker(block)
697
+ block.parent.replace(block, nodes.raw("", marker, format="html"))
698
+
699
+
700
+ def extract_html_body_from_doctree(document: Any) -> str:
701
+ from docutils.core import publish_from_doctree
702
+
703
+ rendered = publish_from_doctree(
704
+ document,
705
+ writer_name="html5",
706
+ settings_overrides={
707
+ "embed_stylesheet": False,
708
+ "stylesheet_path": None,
709
+ "output_encoding": "unicode",
710
+ "initial_header_level": 2,
711
+ "report_level": 2,
712
+ "halt_level": 5,
713
+ "file_insertion_enabled": False,
714
+ "raw_enabled": False,
715
+ },
716
+ )
717
+
718
+ parser = BodyFragmentParser()
719
+ parser.feed(rendered)
720
+ html_fragment = parser.get_fragment()
721
+ if not html_fragment:
722
+ raise RstRenderError("Docutils HTML output did not contain a <main> or <body> fragment.")
723
+
724
+ return html_fragment.replace(LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE, "").replace(LEGACY_DOC_ROLE_BOUNDARY, "")
725
+
726
+
727
+ def build_output(document: Any, source_file: Path, image_base_slug: str, warnings: list[str]) -> dict[str, Any]:
728
+ from docutils import nodes
729
+
730
+ title_node = next(document.findall(nodes.title), None)
731
+ if title_node is None:
732
+ raise RstRenderError("Missing document title.")
733
+
734
+ assets = extract_assets(document, source_file, image_base_slug)
735
+ # Read-only extractions first; the literal-block transformation mutates the tree.
736
+ text = extract_body_text(document)
737
+ headings = extract_headings(document)
738
+ metadata = extract_metadata(document)
739
+
740
+ transform_literal_blocks_to_markers(document)
741
+ html_body = extract_html_body_from_doctree(strip_preamble_nodes(document))
742
+
743
+ return {
744
+ "title": title_node.astext().strip(),
745
+ "html": rewrite_html_assets(html_body, assets),
746
+ "text": text,
747
+ "headings": headings,
748
+ "metadata": metadata,
749
+ "assets": assets,
750
+ "warnings": list(dict.fromkeys(warnings)),
751
+ }
752
+
753
+
754
+ _amytis_directives_registered = False
755
+
756
+
757
+ def register_amytis_directives() -> None:
758
+ """Register the .. code-group:: directive and a code-block subclass that
759
+ accepts a :label: option. Both are global to the docutils registry, so
760
+ registering once per process is enough.
761
+
762
+ The code-block override only ADDS the :label: option; standard behavior
763
+ (language argument, :linenos:, :emphasize-lines:, :caption:) goes through
764
+ docutils' built-in implementation unchanged. The label is stashed on the
765
+ resulting literal_block via a custom amytis_label attribute and consumed
766
+ by transform_literal_blocks_to_markers.
767
+ """
768
+ global _amytis_directives_registered
769
+ if _amytis_directives_registered:
770
+ return
771
+
772
+ from docutils import nodes
773
+ from docutils.parsers.rst import Directive, directives
774
+ from docutils.parsers.rst.directives.body import CodeBlock as BaseCodeBlock
775
+
776
+ class LabeledCodeBlock(BaseCodeBlock):
777
+ option_spec = {
778
+ **BaseCodeBlock.option_spec,
779
+ "label": directives.unchanged,
780
+ }
781
+
782
+ def run(self):
783
+ result = super().run()
784
+ label = self.options.get("label")
785
+ if label:
786
+ for node in result:
787
+ for lb in node.findall(nodes.literal_block):
788
+ lb["amytis_label"] = label
789
+ return result
790
+
791
+ class CodeGroup(Directive):
792
+ """Wrap nested code-blocks into a tabbed code-group.
793
+
794
+ Body content is parsed as rST and contributes literal_block children;
795
+ transform_literal_blocks_to_markers later consumes the whole subtree
796
+ and emits the <div data-amytis-code-group> wrapper marker.
797
+ """
798
+
799
+ has_content = True
800
+ required_arguments = 0
801
+ optional_arguments = 0
802
+ option_spec = {}
803
+
804
+ def run(self):
805
+ wrapper = nodes.container()
806
+ wrapper["classes"].append("amytis-code-group-source")
807
+ self.state.nested_parse(self.content, self.content_offset, wrapper)
808
+ return [wrapper]
809
+
810
+ directives.register_directive("code-block", LabeledCodeBlock)
811
+ directives.register_directive("code", LabeledCodeBlock)
812
+ directives.register_directive("sourcecode", LabeledCodeBlock)
813
+ directives.register_directive("code-group", CodeGroup)
814
+ _amytis_directives_registered = True
815
+
816
+
817
+ def render_single_file(source_file: Path, image_base_slug: str, strict: bool) -> dict[str, Any]:
818
+ from docutils.core import publish_doctree
819
+
820
+ register_amytis_directives()
821
+ warnings: list[str] = []
822
+ source = normalize_legacy_doc_role_syntax(source_file.read_text(encoding="utf-8"))
823
+ with temporary_role_overrides(source_file, warnings):
824
+ document = publish_doctree(
825
+ source=source,
826
+ settings_overrides={
827
+ "report_level": 2,
828
+ "halt_level": 5,
829
+ "file_insertion_enabled": False,
830
+ "raw_enabled": False,
831
+ },
832
+ )
833
+ remove_system_messages(document)
834
+ output = build_output(document, source_file, image_base_slug, warnings)
835
+
836
+ if strict:
837
+ missing = [asset for asset in output["assets"] if not asset["exists"]]
838
+ if missing:
839
+ first = missing[0]
840
+ raise RstRenderError(
841
+ f'Missing local asset "{first["original"]}" in {source_file}'
842
+ )
843
+
844
+ return output
845
+
846
+
847
+ def render_batch(raw_input: str, strict: bool) -> list[dict[str, Any]]:
848
+ try:
849
+ entries = json.loads(raw_input)
850
+ except json.JSONDecodeError as exc:
851
+ raise RstRenderError(f"Invalid batch JSON: {exc.msg}") from exc
852
+
853
+ if not isinstance(entries, list):
854
+ raise RstRenderError("Invalid batch JSON: expected an array.")
855
+
856
+ results: list[dict[str, Any]] = []
857
+ for entry in entries:
858
+ if not isinstance(entry, dict):
859
+ raise RstRenderError("Invalid batch entry: expected an object.")
860
+
861
+ raw_file = entry.get("file")
862
+ image_base_slug = entry.get("imageBaseSlug")
863
+ if not isinstance(raw_file, str) or not isinstance(image_base_slug, str):
864
+ raise RstRenderError("Invalid batch entry: missing file or imageBaseSlug.")
865
+
866
+ source_file = resolve_source_file(raw_file)
867
+ if not source_file.exists():
868
+ raise RstRenderError(f"rST file not found: {source_file}")
869
+
870
+ output = render_single_file(source_file, image_base_slug, strict)
871
+ results.append({
872
+ "file": str(source_file),
873
+ "ok": True,
874
+ "result": output,
875
+ })
876
+
877
+ return results
878
+
879
+
880
+ def main() -> int:
881
+ args = parse_args()
882
+ source_file: Path | None = None
883
+
884
+ try:
885
+ from docutils.core import publish_doctree # noqa: F401
886
+ except ImportError:
887
+ print(
888
+ "Missing Python dependency: docutils. Install it with `python3 -m pip install docutils`.",
889
+ file=sys.stderr,
890
+ )
891
+ return 1
892
+
893
+ try:
894
+ if args.batch_stdin or args.batch_file:
895
+ if args.batch_file:
896
+ raw_batch_input = Path(args.batch_file).read_text(encoding="utf-8")
897
+ else:
898
+ raw_batch_input = sys.stdin.read()
899
+ print(json.dumps(render_batch(raw_batch_input, args.strict), ensure_ascii=False))
900
+ return 0
901
+
902
+ source_file = resolve_source_file(args.file)
903
+ if not source_file.exists():
904
+ print(f"rST file not found: {source_file}", file=sys.stderr)
905
+ return 1
906
+
907
+ print(json.dumps(render_single_file(source_file, args.image_base_slug, args.strict), ensure_ascii=False))
908
+ return 0
909
+ except RstRenderError as exc:
910
+ print(str(exc), file=sys.stderr)
911
+ return 1
912
+ except (OSError, ValueError, KeyError, AttributeError) as exc:
913
+ if source_file is not None:
914
+ print(f"Failed to render {source_file}: {exc}", file=sys.stderr)
915
+ else:
916
+ print(f"Failed to render: {exc}", file=sys.stderr)
917
+ return 1
918
+ except Exception:
919
+ raise
920
+
921
+
922
+ if __name__ == "__main__":
923
+ sys.exit(main())