@hutusi/amytis 1.14.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.github/workflows/ci.yml +1 -1
  2. package/.github/workflows/publish.yml +2 -2
  3. package/CHANGELOG.md +16 -0
  4. package/README.md +33 -1
  5. package/README.zh.md +33 -1
  6. package/TODO.md +10 -0
  7. package/bun.lock +69 -41
  8. package/content/series/rst-legacy/deeper-notes/images/test.svg +4 -0
  9. package/content/series/rst-legacy/deeper-notes/index.rst +15 -0
  10. package/content/series/rst-legacy/getting-started.rst +24 -0
  11. package/content/series/rst-legacy/index.rst +9 -0
  12. package/content/series/rst-readme/README.rst +9 -0
  13. package/content/series/rst-readme/readme-index-post.rst +10 -0
  14. package/content/series/rst-toctree/first-post.rst +6 -0
  15. package/content/series/rst-toctree/index.rst +10 -0
  16. package/content/series/rst-toctree/second-post.rst +6 -0
  17. package/content/series/rst-toctree-precedence/first-post.rst +6 -0
  18. package/content/series/rst-toctree-precedence/index.rst +12 -0
  19. package/content/series/rst-toctree-precedence/second-post.rst +6 -0
  20. package/docs/ARCHITECTURE.md +22 -3
  21. package/docs/CONTRIBUTING.md +11 -0
  22. package/eslint.config.mjs +2 -0
  23. package/next.config.ts +2 -2
  24. package/package.json +22 -16
  25. package/packages/create-amytis/package.json +1 -1
  26. package/packages/create-amytis/src/index.test.ts +43 -1
  27. package/packages/create-amytis/src/index.ts +64 -8
  28. package/public/next-image-export-optimizer-hashes.json +14 -73
  29. package/scripts/build-pagefind.ts +172 -0
  30. package/scripts/copy-assets.ts +246 -56
  31. package/scripts/generate-knowledge-graph.ts +2 -1
  32. package/scripts/render-rst.py +719 -0
  33. package/scripts/run-with-rst-python.ts +42 -0
  34. package/src/app/[slug]/[postSlug]/page.tsx +20 -10
  35. package/src/app/[slug]/page/[page]/page.tsx +15 -0
  36. package/src/app/globals.css +165 -0
  37. package/src/app/series/[slug]/page/[page]/page.tsx +74 -6
  38. package/src/app/series/[slug]/page.tsx +11 -13
  39. package/src/app/series/page.tsx +3 -3
  40. package/src/components/AuthorCard.tsx +25 -16
  41. package/src/components/CoverImage.tsx +5 -2
  42. package/src/components/MarkdownRenderer.test.tsx +16 -0
  43. package/src/components/MarkdownRenderer.tsx +4 -1
  44. package/src/components/RstRenderer.test.tsx +93 -0
  45. package/src/components/RstRenderer.tsx +122 -0
  46. package/src/layouts/PostLayout.tsx +5 -1
  47. package/src/layouts/SimpleLayout.tsx +10 -3
  48. package/src/lib/image-utils.test.ts +19 -0
  49. package/src/lib/image-utils.ts +11 -0
  50. package/src/lib/markdown.test.ts +140 -2
  51. package/src/lib/markdown.ts +731 -210
  52. package/src/lib/rehype-image-metadata.ts +2 -2
  53. package/src/lib/rst-renderer.test.ts +355 -0
  54. package/src/lib/rst-renderer.ts +617 -0
  55. package/src/lib/rst.test.ts +140 -0
  56. package/src/lib/rst.ts +470 -0
  57. package/src/lib/series-redirects.ts +42 -0
  58. package/tests/integration/feed-utils.test.ts +13 -0
  59. package/tests/integration/reading-time-headings.test.ts +5 -9
  60. package/tests/integration/series-draft.test.ts +16 -2
  61. package/tests/integration/series.test.ts +93 -0
  62. package/tests/tooling/build-pagefind.test.ts +66 -0
  63. package/tests/unit/static-params.test.ts +140 -0
@@ -0,0 +1,719 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import copy
7
+ import html
8
+ import json
9
+ import posixpath
10
+ import re
11
+ import sys
12
+ from contextlib import contextmanager
13
+ from html.parser import HTMLParser
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+
18
+ CSV_FIELDS = {"tags", "authors", "posts", "redirectfrom"}
19
+ BOOLEAN_FIELDS = {"featured", "pinned", "draft", "latex", "toc", "commentable"}
20
+ SCALAR_FIELDS = {
21
+ "date",
22
+ "subtitle",
23
+ "excerpt",
24
+ "category",
25
+ "author",
26
+ "layout",
27
+ "series",
28
+ "coverimage",
29
+ "sort",
30
+ "type",
31
+ }
32
+ LEGACY_DOC_ROLE_BOUNDARY = "__AMYTIS_RST_DOC_ROLE_BOUNDARY__"
33
+ LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE = f"{LEGACY_DOC_ROLE_BOUNDARY} "
34
+
35
+
36
+ class RstRenderError(Exception):
37
+ pass
38
+
39
+
40
+ class BodyFragmentParser(HTMLParser):
41
+ def __init__(self) -> None:
42
+ super().__init__(convert_charrefs=False)
43
+ self._target: str | None = None
44
+ self._depth = 0
45
+ self._fragments: list[str] = []
46
+
47
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
48
+ if self._target is None and tag in {"main", "body"}:
49
+ self._target = tag
50
+ self._depth = 1
51
+ return
52
+
53
+ if self._target is not None:
54
+ self._depth += 1
55
+ starttag_text = self.get_starttag_text()
56
+ if starttag_text is not None:
57
+ self._fragments.append(starttag_text)
58
+
59
+ def handle_endtag(self, tag: str) -> None:
60
+ if self._target is None:
61
+ return
62
+
63
+ if self._depth == 1 and tag == self._target:
64
+ self._target = None
65
+ self._depth = 0
66
+ return
67
+
68
+ self._depth -= 1
69
+ self._fragments.append(f"</{tag}>")
70
+
71
+ def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
72
+ if self._target is not None:
73
+ starttag_text = self.get_starttag_text()
74
+ if starttag_text is not None:
75
+ self._fragments.append(starttag_text)
76
+
77
+ def handle_data(self, data: str) -> None:
78
+ if self._target is not None:
79
+ self._fragments.append(data)
80
+
81
+ def handle_comment(self, data: str) -> None:
82
+ if self._target is not None:
83
+ self._fragments.append(f"<!--{data}-->")
84
+
85
+ def handle_entityref(self, name: str) -> None:
86
+ if self._target is not None:
87
+ self._fragments.append(f"&{name};")
88
+
89
+ def handle_charref(self, name: str) -> None:
90
+ if self._target is not None:
91
+ self._fragments.append(f"&#{name};")
92
+
93
+ def handle_decl(self, decl: str) -> None:
94
+ if self._target is not None:
95
+ self._fragments.append(f"<!{decl}>")
96
+
97
+ def get_fragment(self) -> str:
98
+ return "".join(self._fragments).strip()
99
+
100
+
101
+ def detect_series_root(source_file: Path) -> Path | None:
102
+ parts = source_file.resolve().parts
103
+ try:
104
+ series_index = parts.index("series")
105
+ except ValueError:
106
+ return None
107
+
108
+ if series_index + 1 >= len(parts):
109
+ return None
110
+
111
+ return Path(*parts[: series_index + 2])
112
+
113
+
114
+ def slug_from_doc_path(doc_path: Path) -> str:
115
+ if doc_path.name in {"index.rst", "README.rst"}:
116
+ return doc_path.parent.name
117
+ return doc_path.stem
118
+
119
+
120
+ def resolve_doc_target_path(source_file: Path, target: str) -> Path | None:
121
+ candidate_base = (source_file.parent / target).resolve()
122
+ candidate_rst = candidate_base if candidate_base.suffix == ".rst" else candidate_base.parent / f"{candidate_base.name}.rst"
123
+ candidate_paths = [
124
+ candidate_rst,
125
+ candidate_base / "index.rst",
126
+ candidate_base / "README.rst",
127
+ ]
128
+
129
+ for candidate in candidate_paths:
130
+ if candidate.exists():
131
+ return candidate
132
+
133
+ return None
134
+
135
+
136
+ def resolve_doc_target_uri(source_file: Path, target: str) -> str | None:
137
+ target_path = resolve_doc_target_path(source_file, target)
138
+ if target_path is None:
139
+ return None
140
+
141
+ series_root = detect_series_root(target_path)
142
+ if series_root is None:
143
+ return None
144
+
145
+ series_slug = series_root.name
146
+ slug = slug_from_doc_path(target_path)
147
+ return f"/{series_slug}/{slug}"
148
+
149
+
150
+ def register_doc_role(source_file: Path, warnings: list[str]) -> None:
151
+ from docutils import nodes
152
+ from docutils.parsers.rst import roles
153
+
154
+ def doc_role( # type: ignore[override]
155
+ _name: str,
156
+ rawtext: str,
157
+ text: str,
158
+ _lineno: int,
159
+ _inliner: Any,
160
+ _options: dict[str, Any] | None = None,
161
+ _content: list[str] | None = None,
162
+ ) -> tuple[list[Any], list[Any]]:
163
+ label = None
164
+ target = text.strip()
165
+ match = re.match(r"(.+?)\s*<(.+)>$", target)
166
+ if match:
167
+ label = match.group(1).strip()
168
+ target = match.group(2).strip()
169
+
170
+ refuri = resolve_doc_target_uri(source_file, target)
171
+ if refuri is None:
172
+ warnings.append(f'Unresolved :doc: target "{target}" in {source_file}')
173
+ display_text = label or target.split("/")[-1]
174
+ return [nodes.literal(rawtext, display_text)], []
175
+
176
+ display_text = label or target.split("/")[-1]
177
+ return [nodes.reference(rawtext, display_text, refuri=refuri)], []
178
+
179
+ roles.register_canonical_role("doc", doc_role)
180
+
181
+
182
+ def register_passthrough_roles(warnings: list[str]) -> None:
183
+ from docutils import nodes
184
+ from docutils.parsers.rst import roles
185
+
186
+ def parse_role_target(text: str) -> tuple[str | None, str]:
187
+ target = text.strip()
188
+ match = re.match(r"(.+?)\s*<(.+)>$", target)
189
+ if match:
190
+ return match.group(1).strip(), match.group(2).strip()
191
+ return None, target
192
+
193
+ def normalize_internal_ref(target: str) -> str:
194
+ cleaned = target.strip().strip("`")
195
+ cleaned = cleaned.replace("_", "-")
196
+ cleaned = re.sub(r"\s+", "-", cleaned)
197
+ return cleaned
198
+
199
+ def resolve_named_refid(inliner: Any, target: str) -> str | None:
200
+ document = getattr(inliner, "document", None)
201
+ if document is None:
202
+ return None
203
+
204
+ nameids = getattr(document, "nameids", {})
205
+ refid = nameids.get(target)
206
+ if isinstance(refid, str) and refid:
207
+ return refid
208
+ return None
209
+
210
+ def make_passthrough_role(role_name: str):
211
+ def passthrough_role( # type: ignore[override]
212
+ _name: str,
213
+ rawtext: str,
214
+ text: str,
215
+ _lineno: int,
216
+ _inliner: Any,
217
+ _options: dict[str, Any] | None = None,
218
+ _content: list[str] | None = None,
219
+ ) -> tuple[list[Any], list[Any]]:
220
+ warnings.append(f'Unsupported interpreted text role ":{role_name}:" rendered as plain inline text.')
221
+ return [nodes.inline(rawtext, text, classes=[role_name])], []
222
+
223
+ return passthrough_role
224
+
225
+ def ref_role( # type: ignore[override]
226
+ _name: str,
227
+ rawtext: str,
228
+ text: str,
229
+ _lineno: int,
230
+ _inliner: Any,
231
+ _options: dict[str, Any] | None = None,
232
+ _content: list[str] | None = None,
233
+ ) -> tuple[list[Any], list[Any]]:
234
+ label, target = parse_role_target(text)
235
+ display_text = label or target
236
+ refid = resolve_named_refid(_inliner, target) or normalize_internal_ref(target)
237
+ return [nodes.reference(rawtext, display_text, refuri=f"#{refid}")], []
238
+
239
+ def numref_role( # type: ignore[override]
240
+ _name: str,
241
+ rawtext: str,
242
+ text: str,
243
+ _lineno: int,
244
+ _inliner: Any,
245
+ _options: dict[str, Any] | None = None,
246
+ _content: list[str] | None = None,
247
+ ) -> tuple[list[Any], list[Any]]:
248
+ label, target = parse_role_target(text)
249
+ display_text = target if label and "%s" in label else (label or target)
250
+ refid = resolve_named_refid(_inliner, target)
251
+ if refid is not None:
252
+ return [nodes.reference(rawtext, display_text, refuri=f"#{refid}", classes=["numref"])], []
253
+
254
+ warnings.append('Unsupported interpreted text role ":numref:" rendered as plain inline text.')
255
+ return [nodes.inline(rawtext, display_text, classes=["numref"])], []
256
+
257
+ for role_name in ("dtag",):
258
+ roles.register_canonical_role(role_name, make_passthrough_role(role_name))
259
+ roles.register_canonical_role("ref", ref_role)
260
+ roles.register_canonical_role("numref", numref_role)
261
+
262
+
263
+ @contextmanager
264
+ def temporary_role_overrides(source_file: Path, warnings: list[str]):
265
+ from docutils.parsers.rst import roles
266
+
267
+ if not hasattr(roles, "_role_registry") or not hasattr(roles, "_roles"):
268
+ raise RstRenderError(
269
+ "Incompatible docutils roles registry layout. Expected _role_registry and _roles "
270
+ "attributes for temporary role overrides."
271
+ )
272
+
273
+ tracked_names = ("doc", "dtag", "ref", "numref")
274
+ previous_registry = {name: roles._role_registry.get(name) for name in tracked_names}
275
+ previous_local = {name: roles._roles.get(name) for name in tracked_names}
276
+
277
+ register_doc_role(source_file, warnings)
278
+ register_passthrough_roles(warnings)
279
+
280
+ try:
281
+ yield
282
+ finally:
283
+ for name, role_fn in previous_registry.items():
284
+ if role_fn is None:
285
+ roles._role_registry.pop(name, None)
286
+ else:
287
+ roles._role_registry[name] = role_fn
288
+
289
+ for name, role_fn in previous_local.items():
290
+ if role_fn is None:
291
+ roles._roles.pop(name, None)
292
+ else:
293
+ roles._roles[name] = role_fn
294
+
295
+
296
+ def parse_args() -> argparse.Namespace:
297
+ parser = argparse.ArgumentParser(description="Render a single rST file to JSON via docutils.")
298
+ parser.add_argument("--file", help="Absolute or relative path to the .rst file")
299
+ parser.add_argument(
300
+ "--image-base-slug",
301
+ help="Public-relative base slug for local assets, for example posts/my-post",
302
+ )
303
+ parser.add_argument(
304
+ "--batch-stdin",
305
+ action="store_true",
306
+ help="Read a JSON array of batch render entries from stdin",
307
+ )
308
+ parser.add_argument(
309
+ "--batch-file",
310
+ help="Read a JSON array of batch render entries from a file",
311
+ )
312
+ parser.add_argument(
313
+ "--strict",
314
+ action="store_true",
315
+ help="Fail on missing local assets instead of reporting them in the output",
316
+ )
317
+ args = parser.parse_args()
318
+
319
+ if args.batch_stdin or args.batch_file:
320
+ if args.batch_stdin and args.batch_file:
321
+ parser.error("--batch-stdin and --batch-file cannot be combined")
322
+ if args.file or args.image_base_slug:
323
+ parser.error("--batch-stdin/--batch-file cannot be combined with --file or --image-base-slug")
324
+ return args
325
+
326
+ if not args.file or not args.image_base_slug:
327
+ parser.error("--file and --image-base-slug are required unless --batch-stdin is used")
328
+
329
+ return args
330
+
331
+
332
+ def resolve_source_file(raw_file: str) -> Path:
333
+ source_file = Path(raw_file).expanduser()
334
+ if not source_file.is_absolute():
335
+ source_file = Path.cwd() / source_file
336
+ return source_file.resolve()
337
+
338
+
339
+ def normalize_metadata_value(key: str, value: str) -> Any:
340
+ lowered = key.lower()
341
+ stripped = value.strip()
342
+
343
+ if lowered in CSV_FIELDS:
344
+ return [part.strip() for part in stripped.split(",") if part.strip()]
345
+
346
+ if lowered in BOOLEAN_FIELDS:
347
+ normalized = stripped.lower()
348
+ if normalized == "true":
349
+ return True
350
+ if normalized == "false":
351
+ return False
352
+ raise RstRenderError(f'Invalid boolean for "{key}": {value}')
353
+
354
+ if lowered in SCALAR_FIELDS:
355
+ return stripped
356
+
357
+ return stripped
358
+
359
+
360
+ def normalize_legacy_doc_role_syntax(source: str) -> str:
361
+ if LEGACY_DOC_ROLE_BOUNDARY in source or LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE in source:
362
+ raise RstRenderError(
363
+ f'Source already contains reserved legacy :doc: boundary marker "{LEGACY_DOC_ROLE_BOUNDARY}".'
364
+ )
365
+
366
+ return re.sub(
367
+ r"(?<![\s\\(\[{<])(:doc:`[^`\n]+`)",
368
+ LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE + r"\1",
369
+ source,
370
+ )
371
+
372
+
373
+ def extract_metadata(document: Any) -> dict[str, Any]:
374
+ from docutils import nodes
375
+
376
+ metadata: dict[str, Any] = {}
377
+
378
+ for child in document.children:
379
+ if isinstance(child, nodes.docinfo):
380
+ for entry in child.children:
381
+ if isinstance(entry, nodes.authors):
382
+ metadata["authors"] = [author.astext().strip() for author in entry.children if author.astext().strip()]
383
+ continue
384
+ if isinstance(entry, nodes.author):
385
+ metadata["author"] = entry.astext().strip()
386
+ continue
387
+ if isinstance(entry, nodes.field) and len(entry.children) >= 2:
388
+ name = entry.children[0].astext().strip()
389
+ value = entry.children[1].astext().strip()
390
+ if name and value:
391
+ metadata[name] = normalize_metadata_value(name, value)
392
+ continue
393
+
394
+ key = entry.tagname.lower()
395
+ value = entry.astext().strip()
396
+ if value:
397
+ metadata[key] = normalize_metadata_value(key, value)
398
+ continue
399
+
400
+ if isinstance(child, nodes.field_list):
401
+ for field in child.children:
402
+ if not isinstance(field, nodes.field):
403
+ continue
404
+ name = field.children[0].astext().strip()
405
+ value = field.children[1].astext().strip()
406
+ if not name or not value:
407
+ continue
408
+ metadata[name] = normalize_metadata_value(name, value)
409
+ continue
410
+
411
+ if isinstance(child, nodes.title):
412
+ continue
413
+
414
+ break
415
+
416
+ if "author" in metadata and "authors" not in metadata:
417
+ metadata["authors"] = [metadata["author"]]
418
+
419
+ normalized: dict[str, Any] = {}
420
+ for key, value in metadata.items():
421
+ lowered = key.lower()
422
+ if lowered == "coverimage":
423
+ normalized["coverImage"] = value
424
+ elif lowered == "redirectfrom":
425
+ normalized["redirectFrom"] = value
426
+ else:
427
+ normalized[lowered] = value
428
+
429
+ return normalized
430
+
431
+
432
+ def resolve_asset_uri(uri: str, source_file: Path, image_base_slug: str) -> tuple[str, bool]:
433
+ stripped = uri.strip()
434
+ if not stripped:
435
+ return stripped, False
436
+
437
+ if stripped.startswith(("http://", "https://", "data:", "mailto:", "#", "/")):
438
+ return stripped, True
439
+
440
+ candidate = (source_file.parent / stripped).resolve()
441
+ exists = candidate.exists()
442
+
443
+ normalized_base = image_base_slug.strip("/")
444
+ relative_uri = stripped.replace("\\", "/")
445
+ resolved = "/" + posixpath.normpath(posixpath.join(normalized_base, relative_uri)).lstrip("/")
446
+ return resolved, exists
447
+
448
+
449
+ def extract_assets(document: Any, source_file: Path, image_base_slug: str) -> list[dict[str, Any]]:
450
+ from docutils import nodes
451
+
452
+ assets: list[dict[str, Any]] = []
453
+ for image in document.findall(nodes.image):
454
+ original = image.get("uri", "").strip()
455
+ if not original:
456
+ continue
457
+ resolved, exists = resolve_asset_uri(original, source_file, image_base_slug)
458
+ assets.append({
459
+ "original": original,
460
+ "resolved": resolved,
461
+ "exists": exists,
462
+ })
463
+
464
+ return assets
465
+
466
+
467
+ def rewrite_html_assets(rendered_html: str, assets: list[dict[str, Any]]) -> str:
468
+ rewritten = rendered_html
469
+
470
+ for asset in assets:
471
+ original = asset["original"]
472
+ resolved = asset["resolved"]
473
+ escaped_original = re.escape(html.escape(original, quote=True))
474
+
475
+ rewritten = re.sub(
476
+ rf'(\s(?:src|href)=["\']){escaped_original}(["\'])',
477
+ rf'\1{html.escape(resolved, quote=True)}\2',
478
+ rewritten,
479
+ )
480
+
481
+ return rewritten
482
+
483
+
484
+ def extract_headings(document: Any) -> list[dict[str, Any]]:
485
+ from docutils import nodes
486
+
487
+ headings: list[dict[str, Any]] = []
488
+ for section in document.findall(nodes.section):
489
+ title = next((child for child in section.children if isinstance(child, nodes.title)), None)
490
+ if title is None:
491
+ continue
492
+
493
+ ids = section.get("ids", [])
494
+ depth = 0
495
+ parent = section.parent
496
+ while parent is not None:
497
+ if isinstance(parent, nodes.section):
498
+ depth += 1
499
+ parent = parent.parent
500
+
501
+ headings.append({
502
+ "id": ids[0] if ids else "",
503
+ "text": title.astext().strip(),
504
+ "level": depth + 2,
505
+ })
506
+
507
+ return headings
508
+
509
+
510
+ def extract_body_text(document: Any) -> str:
511
+ from docutils import nodes
512
+
513
+ body_tree = copy.deepcopy(document)
514
+ for node in list(body_tree.findall(nodes.system_message)):
515
+ parent = node.parent
516
+ if parent is not None:
517
+ parent.remove(node)
518
+
519
+ for node in list(body_tree.findall(nodes.footnote)):
520
+ parent = node.parent
521
+ if parent is not None:
522
+ parent.remove(node)
523
+
524
+ for node in list(body_tree.findall(nodes.footnote_reference)):
525
+ parent = node.parent
526
+ if parent is not None:
527
+ parent.remove(node)
528
+
529
+ body_parts: list[str] = []
530
+ for child in body_tree.children:
531
+ if isinstance(child, (nodes.docinfo, nodes.field_list, nodes.comment, nodes.title, nodes.system_message, nodes.footnote)):
532
+ continue
533
+ if child.tagname == "footnote_list":
534
+ continue
535
+ text = child.astext().strip()
536
+ if text:
537
+ body_parts.append(text)
538
+
539
+ return "\n\n".join(body_parts).replace(LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE, "").replace(LEGACY_DOC_ROLE_BOUNDARY, "").strip()
540
+
541
+
542
+ def remove_system_messages(document: Any) -> None:
543
+ from docutils import nodes
544
+
545
+ for node in list(document.findall(nodes.system_message)):
546
+ parent = node.parent
547
+ if parent is not None:
548
+ parent.remove(node)
549
+
550
+
551
+ def strip_preamble_nodes(document: Any) -> Any:
552
+ from docutils import nodes
553
+
554
+ stripped = copy.deepcopy(document)
555
+ for child in list(stripped.children):
556
+ if isinstance(child, (nodes.docinfo, nodes.field_list, nodes.comment)):
557
+ stripped.remove(child)
558
+ continue
559
+ if isinstance(child, nodes.title):
560
+ continue
561
+ break
562
+
563
+ return stripped
564
+
565
+
566
+ def extract_html_body_from_doctree(document: Any) -> str:
567
+ from docutils.core import publish_from_doctree
568
+
569
+ rendered = publish_from_doctree(
570
+ document,
571
+ writer_name="html5",
572
+ settings_overrides={
573
+ "embed_stylesheet": False,
574
+ "stylesheet_path": None,
575
+ "output_encoding": "unicode",
576
+ "initial_header_level": 2,
577
+ "report_level": 2,
578
+ "halt_level": 5,
579
+ "file_insertion_enabled": False,
580
+ "raw_enabled": False,
581
+ },
582
+ )
583
+
584
+ parser = BodyFragmentParser()
585
+ parser.feed(rendered)
586
+ html_fragment = parser.get_fragment()
587
+ if not html_fragment:
588
+ raise RstRenderError("Docutils HTML output did not contain a <main> or <body> fragment.")
589
+
590
+ return html_fragment.replace(LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE, "").replace(LEGACY_DOC_ROLE_BOUNDARY, "")
591
+
592
+
593
+ def build_output(document: Any, source_file: Path, image_base_slug: str, warnings: list[str]) -> dict[str, Any]:
594
+ from docutils import nodes
595
+
596
+ title_node = next(document.findall(nodes.title), None)
597
+ if title_node is None:
598
+ raise RstRenderError("Missing document title.")
599
+
600
+ assets = extract_assets(document, source_file, image_base_slug)
601
+ html_body = extract_html_body_from_doctree(strip_preamble_nodes(document))
602
+
603
+ return {
604
+ "title": title_node.astext().strip(),
605
+ "html": rewrite_html_assets(html_body, assets),
606
+ "text": extract_body_text(document),
607
+ "headings": extract_headings(document),
608
+ "metadata": extract_metadata(document),
609
+ "assets": assets,
610
+ "warnings": list(dict.fromkeys(warnings)),
611
+ }
612
+
613
+
614
+ def render_single_file(source_file: Path, image_base_slug: str, strict: bool) -> dict[str, Any]:
615
+ from docutils.core import publish_doctree
616
+
617
+ warnings: list[str] = []
618
+ source = normalize_legacy_doc_role_syntax(source_file.read_text(encoding="utf-8"))
619
+ with temporary_role_overrides(source_file, warnings):
620
+ document = publish_doctree(
621
+ source=source,
622
+ settings_overrides={
623
+ "report_level": 2,
624
+ "halt_level": 5,
625
+ "file_insertion_enabled": False,
626
+ "raw_enabled": False,
627
+ },
628
+ )
629
+ remove_system_messages(document)
630
+ output = build_output(document, source_file, image_base_slug, warnings)
631
+
632
+ if strict:
633
+ missing = [asset for asset in output["assets"] if not asset["exists"]]
634
+ if missing:
635
+ first = missing[0]
636
+ raise RstRenderError(
637
+ f'Missing local asset "{first["original"]}" in {source_file}'
638
+ )
639
+
640
+ return output
641
+
642
+
643
+ def render_batch(raw_input: str, strict: bool) -> list[dict[str, Any]]:
644
+ try:
645
+ entries = json.loads(raw_input)
646
+ except json.JSONDecodeError as exc:
647
+ raise RstRenderError(f"Invalid batch JSON: {exc.msg}") from exc
648
+
649
+ if not isinstance(entries, list):
650
+ raise RstRenderError("Invalid batch JSON: expected an array.")
651
+
652
+ results: list[dict[str, Any]] = []
653
+ for entry in entries:
654
+ if not isinstance(entry, dict):
655
+ raise RstRenderError("Invalid batch entry: expected an object.")
656
+
657
+ raw_file = entry.get("file")
658
+ image_base_slug = entry.get("imageBaseSlug")
659
+ if not isinstance(raw_file, str) or not isinstance(image_base_slug, str):
660
+ raise RstRenderError("Invalid batch entry: missing file or imageBaseSlug.")
661
+
662
+ source_file = resolve_source_file(raw_file)
663
+ if not source_file.exists():
664
+ raise RstRenderError(f"rST file not found: {source_file}")
665
+
666
+ output = render_single_file(source_file, image_base_slug, strict)
667
+ results.append({
668
+ "file": str(source_file),
669
+ "ok": True,
670
+ "result": output,
671
+ })
672
+
673
+ return results
674
+
675
+
676
+ def main() -> int:
677
+ args = parse_args()
678
+ source_file: Path | None = None
679
+
680
+ try:
681
+ from docutils.core import publish_doctree # noqa: F401
682
+ except ImportError:
683
+ print(
684
+ "Missing Python dependency: docutils. Install it with `python3 -m pip install docutils`.",
685
+ file=sys.stderr,
686
+ )
687
+ return 1
688
+
689
+ try:
690
+ if args.batch_stdin or args.batch_file:
691
+ if args.batch_file:
692
+ raw_batch_input = Path(args.batch_file).read_text(encoding="utf-8")
693
+ else:
694
+ raw_batch_input = sys.stdin.read()
695
+ print(json.dumps(render_batch(raw_batch_input, args.strict), ensure_ascii=False))
696
+ return 0
697
+
698
+ source_file = resolve_source_file(args.file)
699
+ if not source_file.exists():
700
+ print(f"rST file not found: {source_file}", file=sys.stderr)
701
+ return 1
702
+
703
+ print(json.dumps(render_single_file(source_file, args.image_base_slug, args.strict), ensure_ascii=False))
704
+ return 0
705
+ except RstRenderError as exc:
706
+ print(str(exc), file=sys.stderr)
707
+ return 1
708
+ except (OSError, ValueError, KeyError, AttributeError) as exc:
709
+ if source_file is not None:
710
+ print(f"Failed to render {source_file}: {exc}", file=sys.stderr)
711
+ else:
712
+ print(f"Failed to render: {exc}", file=sys.stderr)
713
+ return 1
714
+ except Exception:
715
+ raise
716
+
717
+
718
+ if __name__ == "__main__":
719
+ sys.exit(main())