@hutusi/amytis 1.14.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +1 -1
- package/.github/workflows/publish.yml +2 -2
- package/CHANGELOG.md +16 -0
- package/README.md +33 -1
- package/README.zh.md +33 -1
- package/TODO.md +10 -0
- package/bun.lock +69 -41
- package/content/series/rst-legacy/deeper-notes/images/test.svg +4 -0
- package/content/series/rst-legacy/deeper-notes/index.rst +15 -0
- package/content/series/rst-legacy/getting-started.rst +24 -0
- package/content/series/rst-legacy/index.rst +9 -0
- package/content/series/rst-readme/README.rst +9 -0
- package/content/series/rst-readme/readme-index-post.rst +10 -0
- package/content/series/rst-toctree/first-post.rst +6 -0
- package/content/series/rst-toctree/index.rst +10 -0
- package/content/series/rst-toctree/second-post.rst +6 -0
- package/content/series/rst-toctree-precedence/first-post.rst +6 -0
- package/content/series/rst-toctree-precedence/index.rst +12 -0
- package/content/series/rst-toctree-precedence/second-post.rst +6 -0
- package/docs/ARCHITECTURE.md +22 -3
- package/docs/CONTRIBUTING.md +11 -0
- package/eslint.config.mjs +2 -0
- package/next.config.ts +2 -2
- package/package.json +22 -16
- package/packages/create-amytis/package.json +1 -1
- package/packages/create-amytis/src/index.test.ts +43 -1
- package/packages/create-amytis/src/index.ts +64 -8
- package/public/next-image-export-optimizer-hashes.json +14 -73
- package/scripts/build-pagefind.ts +172 -0
- package/scripts/copy-assets.ts +246 -56
- package/scripts/generate-knowledge-graph.ts +2 -1
- package/scripts/render-rst.py +719 -0
- package/scripts/run-with-rst-python.ts +42 -0
- package/src/app/[slug]/[postSlug]/page.tsx +20 -10
- package/src/app/[slug]/page/[page]/page.tsx +15 -0
- package/src/app/globals.css +165 -0
- package/src/app/series/[slug]/page/[page]/page.tsx +74 -6
- package/src/app/series/[slug]/page.tsx +11 -13
- package/src/app/series/page.tsx +3 -3
- package/src/components/AuthorCard.tsx +25 -16
- package/src/components/CoverImage.tsx +5 -2
- package/src/components/MarkdownRenderer.test.tsx +16 -0
- package/src/components/MarkdownRenderer.tsx +4 -1
- package/src/components/RstRenderer.test.tsx +93 -0
- package/src/components/RstRenderer.tsx +122 -0
- package/src/layouts/PostLayout.tsx +5 -1
- package/src/layouts/SimpleLayout.tsx +10 -3
- package/src/lib/image-utils.test.ts +19 -0
- package/src/lib/image-utils.ts +11 -0
- package/src/lib/markdown.test.ts +140 -2
- package/src/lib/markdown.ts +731 -210
- package/src/lib/rehype-image-metadata.ts +2 -2
- package/src/lib/rst-renderer.test.ts +355 -0
- package/src/lib/rst-renderer.ts +617 -0
- package/src/lib/rst.test.ts +140 -0
- package/src/lib/rst.ts +470 -0
- package/src/lib/series-redirects.ts +42 -0
- package/tests/integration/feed-utils.test.ts +13 -0
- package/tests/integration/reading-time-headings.test.ts +5 -9
- package/tests/integration/series-draft.test.ts +16 -2
- package/tests/integration/series.test.ts +93 -0
- package/tests/tooling/build-pagefind.test.ts +66 -0
- package/tests/unit/static-params.test.ts +140 -0
|
@@ -0,0 +1,719 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import copy
|
|
7
|
+
import html
|
|
8
|
+
import json
|
|
9
|
+
import posixpath
|
|
10
|
+
import re
|
|
11
|
+
import sys
|
|
12
|
+
from contextlib import contextmanager
|
|
13
|
+
from html.parser import HTMLParser
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
CSV_FIELDS = {"tags", "authors", "posts", "redirectfrom"}
|
|
19
|
+
BOOLEAN_FIELDS = {"featured", "pinned", "draft", "latex", "toc", "commentable"}
|
|
20
|
+
SCALAR_FIELDS = {
|
|
21
|
+
"date",
|
|
22
|
+
"subtitle",
|
|
23
|
+
"excerpt",
|
|
24
|
+
"category",
|
|
25
|
+
"author",
|
|
26
|
+
"layout",
|
|
27
|
+
"series",
|
|
28
|
+
"coverimage",
|
|
29
|
+
"sort",
|
|
30
|
+
"type",
|
|
31
|
+
}
|
|
32
|
+
LEGACY_DOC_ROLE_BOUNDARY = "__AMYTIS_RST_DOC_ROLE_BOUNDARY__"
|
|
33
|
+
LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE = f"{LEGACY_DOC_ROLE_BOUNDARY} "
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class RstRenderError(Exception):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class BodyFragmentParser(HTMLParser):
|
|
41
|
+
def __init__(self) -> None:
|
|
42
|
+
super().__init__(convert_charrefs=False)
|
|
43
|
+
self._target: str | None = None
|
|
44
|
+
self._depth = 0
|
|
45
|
+
self._fragments: list[str] = []
|
|
46
|
+
|
|
47
|
+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
48
|
+
if self._target is None and tag in {"main", "body"}:
|
|
49
|
+
self._target = tag
|
|
50
|
+
self._depth = 1
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
if self._target is not None:
|
|
54
|
+
self._depth += 1
|
|
55
|
+
starttag_text = self.get_starttag_text()
|
|
56
|
+
if starttag_text is not None:
|
|
57
|
+
self._fragments.append(starttag_text)
|
|
58
|
+
|
|
59
|
+
def handle_endtag(self, tag: str) -> None:
|
|
60
|
+
if self._target is None:
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
if self._depth == 1 and tag == self._target:
|
|
64
|
+
self._target = None
|
|
65
|
+
self._depth = 0
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
self._depth -= 1
|
|
69
|
+
self._fragments.append(f"</{tag}>")
|
|
70
|
+
|
|
71
|
+
def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
72
|
+
if self._target is not None:
|
|
73
|
+
starttag_text = self.get_starttag_text()
|
|
74
|
+
if starttag_text is not None:
|
|
75
|
+
self._fragments.append(starttag_text)
|
|
76
|
+
|
|
77
|
+
def handle_data(self, data: str) -> None:
|
|
78
|
+
if self._target is not None:
|
|
79
|
+
self._fragments.append(data)
|
|
80
|
+
|
|
81
|
+
def handle_comment(self, data: str) -> None:
|
|
82
|
+
if self._target is not None:
|
|
83
|
+
self._fragments.append(f"<!--{data}-->")
|
|
84
|
+
|
|
85
|
+
def handle_entityref(self, name: str) -> None:
|
|
86
|
+
if self._target is not None:
|
|
87
|
+
self._fragments.append(f"&{name};")
|
|
88
|
+
|
|
89
|
+
def handle_charref(self, name: str) -> None:
|
|
90
|
+
if self._target is not None:
|
|
91
|
+
self._fragments.append(f"&#{name};")
|
|
92
|
+
|
|
93
|
+
def handle_decl(self, decl: str) -> None:
|
|
94
|
+
if self._target is not None:
|
|
95
|
+
self._fragments.append(f"<!{decl}>")
|
|
96
|
+
|
|
97
|
+
def get_fragment(self) -> str:
|
|
98
|
+
return "".join(self._fragments).strip()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def detect_series_root(source_file: Path) -> Path | None:
|
|
102
|
+
parts = source_file.resolve().parts
|
|
103
|
+
try:
|
|
104
|
+
series_index = parts.index("series")
|
|
105
|
+
except ValueError:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
if series_index + 1 >= len(parts):
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
return Path(*parts[: series_index + 2])
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def slug_from_doc_path(doc_path: Path) -> str:
|
|
115
|
+
if doc_path.name in {"index.rst", "README.rst"}:
|
|
116
|
+
return doc_path.parent.name
|
|
117
|
+
return doc_path.stem
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def resolve_doc_target_path(source_file: Path, target: str) -> Path | None:
|
|
121
|
+
candidate_base = (source_file.parent / target).resolve()
|
|
122
|
+
candidate_rst = candidate_base if candidate_base.suffix == ".rst" else candidate_base.parent / f"{candidate_base.name}.rst"
|
|
123
|
+
candidate_paths = [
|
|
124
|
+
candidate_rst,
|
|
125
|
+
candidate_base / "index.rst",
|
|
126
|
+
candidate_base / "README.rst",
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
for candidate in candidate_paths:
|
|
130
|
+
if candidate.exists():
|
|
131
|
+
return candidate
|
|
132
|
+
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def resolve_doc_target_uri(source_file: Path, target: str) -> str | None:
|
|
137
|
+
target_path = resolve_doc_target_path(source_file, target)
|
|
138
|
+
if target_path is None:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
series_root = detect_series_root(target_path)
|
|
142
|
+
if series_root is None:
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
series_slug = series_root.name
|
|
146
|
+
slug = slug_from_doc_path(target_path)
|
|
147
|
+
return f"/{series_slug}/{slug}"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def register_doc_role(source_file: Path, warnings: list[str]) -> None:
|
|
151
|
+
from docutils import nodes
|
|
152
|
+
from docutils.parsers.rst import roles
|
|
153
|
+
|
|
154
|
+
def doc_role( # type: ignore[override]
|
|
155
|
+
_name: str,
|
|
156
|
+
rawtext: str,
|
|
157
|
+
text: str,
|
|
158
|
+
_lineno: int,
|
|
159
|
+
_inliner: Any,
|
|
160
|
+
_options: dict[str, Any] | None = None,
|
|
161
|
+
_content: list[str] | None = None,
|
|
162
|
+
) -> tuple[list[Any], list[Any]]:
|
|
163
|
+
label = None
|
|
164
|
+
target = text.strip()
|
|
165
|
+
match = re.match(r"(.+?)\s*<(.+)>$", target)
|
|
166
|
+
if match:
|
|
167
|
+
label = match.group(1).strip()
|
|
168
|
+
target = match.group(2).strip()
|
|
169
|
+
|
|
170
|
+
refuri = resolve_doc_target_uri(source_file, target)
|
|
171
|
+
if refuri is None:
|
|
172
|
+
warnings.append(f'Unresolved :doc: target "{target}" in {source_file}')
|
|
173
|
+
display_text = label or target.split("/")[-1]
|
|
174
|
+
return [nodes.literal(rawtext, display_text)], []
|
|
175
|
+
|
|
176
|
+
display_text = label or target.split("/")[-1]
|
|
177
|
+
return [nodes.reference(rawtext, display_text, refuri=refuri)], []
|
|
178
|
+
|
|
179
|
+
roles.register_canonical_role("doc", doc_role)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def register_passthrough_roles(warnings: list[str]) -> None:
|
|
183
|
+
from docutils import nodes
|
|
184
|
+
from docutils.parsers.rst import roles
|
|
185
|
+
|
|
186
|
+
def parse_role_target(text: str) -> tuple[str | None, str]:
|
|
187
|
+
target = text.strip()
|
|
188
|
+
match = re.match(r"(.+?)\s*<(.+)>$", target)
|
|
189
|
+
if match:
|
|
190
|
+
return match.group(1).strip(), match.group(2).strip()
|
|
191
|
+
return None, target
|
|
192
|
+
|
|
193
|
+
def normalize_internal_ref(target: str) -> str:
|
|
194
|
+
cleaned = target.strip().strip("`")
|
|
195
|
+
cleaned = cleaned.replace("_", "-")
|
|
196
|
+
cleaned = re.sub(r"\s+", "-", cleaned)
|
|
197
|
+
return cleaned
|
|
198
|
+
|
|
199
|
+
def resolve_named_refid(inliner: Any, target: str) -> str | None:
|
|
200
|
+
document = getattr(inliner, "document", None)
|
|
201
|
+
if document is None:
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
nameids = getattr(document, "nameids", {})
|
|
205
|
+
refid = nameids.get(target)
|
|
206
|
+
if isinstance(refid, str) and refid:
|
|
207
|
+
return refid
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
def make_passthrough_role(role_name: str):
|
|
211
|
+
def passthrough_role( # type: ignore[override]
|
|
212
|
+
_name: str,
|
|
213
|
+
rawtext: str,
|
|
214
|
+
text: str,
|
|
215
|
+
_lineno: int,
|
|
216
|
+
_inliner: Any,
|
|
217
|
+
_options: dict[str, Any] | None = None,
|
|
218
|
+
_content: list[str] | None = None,
|
|
219
|
+
) -> tuple[list[Any], list[Any]]:
|
|
220
|
+
warnings.append(f'Unsupported interpreted text role ":{role_name}:" rendered as plain inline text.')
|
|
221
|
+
return [nodes.inline(rawtext, text, classes=[role_name])], []
|
|
222
|
+
|
|
223
|
+
return passthrough_role
|
|
224
|
+
|
|
225
|
+
def ref_role( # type: ignore[override]
|
|
226
|
+
_name: str,
|
|
227
|
+
rawtext: str,
|
|
228
|
+
text: str,
|
|
229
|
+
_lineno: int,
|
|
230
|
+
_inliner: Any,
|
|
231
|
+
_options: dict[str, Any] | None = None,
|
|
232
|
+
_content: list[str] | None = None,
|
|
233
|
+
) -> tuple[list[Any], list[Any]]:
|
|
234
|
+
label, target = parse_role_target(text)
|
|
235
|
+
display_text = label or target
|
|
236
|
+
refid = resolve_named_refid(_inliner, target) or normalize_internal_ref(target)
|
|
237
|
+
return [nodes.reference(rawtext, display_text, refuri=f"#{refid}")], []
|
|
238
|
+
|
|
239
|
+
def numref_role( # type: ignore[override]
|
|
240
|
+
_name: str,
|
|
241
|
+
rawtext: str,
|
|
242
|
+
text: str,
|
|
243
|
+
_lineno: int,
|
|
244
|
+
_inliner: Any,
|
|
245
|
+
_options: dict[str, Any] | None = None,
|
|
246
|
+
_content: list[str] | None = None,
|
|
247
|
+
) -> tuple[list[Any], list[Any]]:
|
|
248
|
+
label, target = parse_role_target(text)
|
|
249
|
+
display_text = target if label and "%s" in label else (label or target)
|
|
250
|
+
refid = resolve_named_refid(_inliner, target)
|
|
251
|
+
if refid is not None:
|
|
252
|
+
return [nodes.reference(rawtext, display_text, refuri=f"#{refid}", classes=["numref"])], []
|
|
253
|
+
|
|
254
|
+
warnings.append('Unsupported interpreted text role ":numref:" rendered as plain inline text.')
|
|
255
|
+
return [nodes.inline(rawtext, display_text, classes=["numref"])], []
|
|
256
|
+
|
|
257
|
+
for role_name in ("dtag",):
|
|
258
|
+
roles.register_canonical_role(role_name, make_passthrough_role(role_name))
|
|
259
|
+
roles.register_canonical_role("ref", ref_role)
|
|
260
|
+
roles.register_canonical_role("numref", numref_role)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@contextmanager
|
|
264
|
+
def temporary_role_overrides(source_file: Path, warnings: list[str]):
|
|
265
|
+
from docutils.parsers.rst import roles
|
|
266
|
+
|
|
267
|
+
if not hasattr(roles, "_role_registry") or not hasattr(roles, "_roles"):
|
|
268
|
+
raise RstRenderError(
|
|
269
|
+
"Incompatible docutils roles registry layout. Expected _role_registry and _roles "
|
|
270
|
+
"attributes for temporary role overrides."
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
tracked_names = ("doc", "dtag", "ref", "numref")
|
|
274
|
+
previous_registry = {name: roles._role_registry.get(name) for name in tracked_names}
|
|
275
|
+
previous_local = {name: roles._roles.get(name) for name in tracked_names}
|
|
276
|
+
|
|
277
|
+
register_doc_role(source_file, warnings)
|
|
278
|
+
register_passthrough_roles(warnings)
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
yield
|
|
282
|
+
finally:
|
|
283
|
+
for name, role_fn in previous_registry.items():
|
|
284
|
+
if role_fn is None:
|
|
285
|
+
roles._role_registry.pop(name, None)
|
|
286
|
+
else:
|
|
287
|
+
roles._role_registry[name] = role_fn
|
|
288
|
+
|
|
289
|
+
for name, role_fn in previous_local.items():
|
|
290
|
+
if role_fn is None:
|
|
291
|
+
roles._roles.pop(name, None)
|
|
292
|
+
else:
|
|
293
|
+
roles._roles[name] = role_fn
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def parse_args() -> argparse.Namespace:
|
|
297
|
+
parser = argparse.ArgumentParser(description="Render a single rST file to JSON via docutils.")
|
|
298
|
+
parser.add_argument("--file", help="Absolute or relative path to the .rst file")
|
|
299
|
+
parser.add_argument(
|
|
300
|
+
"--image-base-slug",
|
|
301
|
+
help="Public-relative base slug for local assets, for example posts/my-post",
|
|
302
|
+
)
|
|
303
|
+
parser.add_argument(
|
|
304
|
+
"--batch-stdin",
|
|
305
|
+
action="store_true",
|
|
306
|
+
help="Read a JSON array of batch render entries from stdin",
|
|
307
|
+
)
|
|
308
|
+
parser.add_argument(
|
|
309
|
+
"--batch-file",
|
|
310
|
+
help="Read a JSON array of batch render entries from a file",
|
|
311
|
+
)
|
|
312
|
+
parser.add_argument(
|
|
313
|
+
"--strict",
|
|
314
|
+
action="store_true",
|
|
315
|
+
help="Fail on missing local assets instead of reporting them in the output",
|
|
316
|
+
)
|
|
317
|
+
args = parser.parse_args()
|
|
318
|
+
|
|
319
|
+
if args.batch_stdin or args.batch_file:
|
|
320
|
+
if args.batch_stdin and args.batch_file:
|
|
321
|
+
parser.error("--batch-stdin and --batch-file cannot be combined")
|
|
322
|
+
if args.file or args.image_base_slug:
|
|
323
|
+
parser.error("--batch-stdin/--batch-file cannot be combined with --file or --image-base-slug")
|
|
324
|
+
return args
|
|
325
|
+
|
|
326
|
+
if not args.file or not args.image_base_slug:
|
|
327
|
+
parser.error("--file and --image-base-slug are required unless --batch-stdin is used")
|
|
328
|
+
|
|
329
|
+
return args
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def resolve_source_file(raw_file: str) -> Path:
|
|
333
|
+
source_file = Path(raw_file).expanduser()
|
|
334
|
+
if not source_file.is_absolute():
|
|
335
|
+
source_file = Path.cwd() / source_file
|
|
336
|
+
return source_file.resolve()
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def normalize_metadata_value(key: str, value: str) -> Any:
|
|
340
|
+
lowered = key.lower()
|
|
341
|
+
stripped = value.strip()
|
|
342
|
+
|
|
343
|
+
if lowered in CSV_FIELDS:
|
|
344
|
+
return [part.strip() for part in stripped.split(",") if part.strip()]
|
|
345
|
+
|
|
346
|
+
if lowered in BOOLEAN_FIELDS:
|
|
347
|
+
normalized = stripped.lower()
|
|
348
|
+
if normalized == "true":
|
|
349
|
+
return True
|
|
350
|
+
if normalized == "false":
|
|
351
|
+
return False
|
|
352
|
+
raise RstRenderError(f'Invalid boolean for "{key}": {value}')
|
|
353
|
+
|
|
354
|
+
if lowered in SCALAR_FIELDS:
|
|
355
|
+
return stripped
|
|
356
|
+
|
|
357
|
+
return stripped
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def normalize_legacy_doc_role_syntax(source: str) -> str:
|
|
361
|
+
if LEGACY_DOC_ROLE_BOUNDARY in source or LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE in source:
|
|
362
|
+
raise RstRenderError(
|
|
363
|
+
f'Source already contains reserved legacy :doc: boundary marker "{LEGACY_DOC_ROLE_BOUNDARY}".'
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
return re.sub(
|
|
367
|
+
r"(?<![\s\\(\[{<])(:doc:`[^`\n]+`)",
|
|
368
|
+
LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE + r"\1",
|
|
369
|
+
source,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def extract_metadata(document: Any) -> dict[str, Any]:
|
|
374
|
+
from docutils import nodes
|
|
375
|
+
|
|
376
|
+
metadata: dict[str, Any] = {}
|
|
377
|
+
|
|
378
|
+
for child in document.children:
|
|
379
|
+
if isinstance(child, nodes.docinfo):
|
|
380
|
+
for entry in child.children:
|
|
381
|
+
if isinstance(entry, nodes.authors):
|
|
382
|
+
metadata["authors"] = [author.astext().strip() for author in entry.children if author.astext().strip()]
|
|
383
|
+
continue
|
|
384
|
+
if isinstance(entry, nodes.author):
|
|
385
|
+
metadata["author"] = entry.astext().strip()
|
|
386
|
+
continue
|
|
387
|
+
if isinstance(entry, nodes.field) and len(entry.children) >= 2:
|
|
388
|
+
name = entry.children[0].astext().strip()
|
|
389
|
+
value = entry.children[1].astext().strip()
|
|
390
|
+
if name and value:
|
|
391
|
+
metadata[name] = normalize_metadata_value(name, value)
|
|
392
|
+
continue
|
|
393
|
+
|
|
394
|
+
key = entry.tagname.lower()
|
|
395
|
+
value = entry.astext().strip()
|
|
396
|
+
if value:
|
|
397
|
+
metadata[key] = normalize_metadata_value(key, value)
|
|
398
|
+
continue
|
|
399
|
+
|
|
400
|
+
if isinstance(child, nodes.field_list):
|
|
401
|
+
for field in child.children:
|
|
402
|
+
if not isinstance(field, nodes.field):
|
|
403
|
+
continue
|
|
404
|
+
name = field.children[0].astext().strip()
|
|
405
|
+
value = field.children[1].astext().strip()
|
|
406
|
+
if not name or not value:
|
|
407
|
+
continue
|
|
408
|
+
metadata[name] = normalize_metadata_value(name, value)
|
|
409
|
+
continue
|
|
410
|
+
|
|
411
|
+
if isinstance(child, nodes.title):
|
|
412
|
+
continue
|
|
413
|
+
|
|
414
|
+
break
|
|
415
|
+
|
|
416
|
+
if "author" in metadata and "authors" not in metadata:
|
|
417
|
+
metadata["authors"] = [metadata["author"]]
|
|
418
|
+
|
|
419
|
+
normalized: dict[str, Any] = {}
|
|
420
|
+
for key, value in metadata.items():
|
|
421
|
+
lowered = key.lower()
|
|
422
|
+
if lowered == "coverimage":
|
|
423
|
+
normalized["coverImage"] = value
|
|
424
|
+
elif lowered == "redirectfrom":
|
|
425
|
+
normalized["redirectFrom"] = value
|
|
426
|
+
else:
|
|
427
|
+
normalized[lowered] = value
|
|
428
|
+
|
|
429
|
+
return normalized
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def resolve_asset_uri(uri: str, source_file: Path, image_base_slug: str) -> tuple[str, bool]:
|
|
433
|
+
stripped = uri.strip()
|
|
434
|
+
if not stripped:
|
|
435
|
+
return stripped, False
|
|
436
|
+
|
|
437
|
+
if stripped.startswith(("http://", "https://", "data:", "mailto:", "#", "/")):
|
|
438
|
+
return stripped, True
|
|
439
|
+
|
|
440
|
+
candidate = (source_file.parent / stripped).resolve()
|
|
441
|
+
exists = candidate.exists()
|
|
442
|
+
|
|
443
|
+
normalized_base = image_base_slug.strip("/")
|
|
444
|
+
relative_uri = stripped.replace("\\", "/")
|
|
445
|
+
resolved = "/" + posixpath.normpath(posixpath.join(normalized_base, relative_uri)).lstrip("/")
|
|
446
|
+
return resolved, exists
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def extract_assets(document: Any, source_file: Path, image_base_slug: str) -> list[dict[str, Any]]:
|
|
450
|
+
from docutils import nodes
|
|
451
|
+
|
|
452
|
+
assets: list[dict[str, Any]] = []
|
|
453
|
+
for image in document.findall(nodes.image):
|
|
454
|
+
original = image.get("uri", "").strip()
|
|
455
|
+
if not original:
|
|
456
|
+
continue
|
|
457
|
+
resolved, exists = resolve_asset_uri(original, source_file, image_base_slug)
|
|
458
|
+
assets.append({
|
|
459
|
+
"original": original,
|
|
460
|
+
"resolved": resolved,
|
|
461
|
+
"exists": exists,
|
|
462
|
+
})
|
|
463
|
+
|
|
464
|
+
return assets
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def rewrite_html_assets(rendered_html: str, assets: list[dict[str, Any]]) -> str:
|
|
468
|
+
rewritten = rendered_html
|
|
469
|
+
|
|
470
|
+
for asset in assets:
|
|
471
|
+
original = asset["original"]
|
|
472
|
+
resolved = asset["resolved"]
|
|
473
|
+
escaped_original = re.escape(html.escape(original, quote=True))
|
|
474
|
+
|
|
475
|
+
rewritten = re.sub(
|
|
476
|
+
rf'(\s(?:src|href)=["\']){escaped_original}(["\'])',
|
|
477
|
+
rf'\1{html.escape(resolved, quote=True)}\2',
|
|
478
|
+
rewritten,
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
return rewritten
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def extract_headings(document: Any) -> list[dict[str, Any]]:
|
|
485
|
+
from docutils import nodes
|
|
486
|
+
|
|
487
|
+
headings: list[dict[str, Any]] = []
|
|
488
|
+
for section in document.findall(nodes.section):
|
|
489
|
+
title = next((child for child in section.children if isinstance(child, nodes.title)), None)
|
|
490
|
+
if title is None:
|
|
491
|
+
continue
|
|
492
|
+
|
|
493
|
+
ids = section.get("ids", [])
|
|
494
|
+
depth = 0
|
|
495
|
+
parent = section.parent
|
|
496
|
+
while parent is not None:
|
|
497
|
+
if isinstance(parent, nodes.section):
|
|
498
|
+
depth += 1
|
|
499
|
+
parent = parent.parent
|
|
500
|
+
|
|
501
|
+
headings.append({
|
|
502
|
+
"id": ids[0] if ids else "",
|
|
503
|
+
"text": title.astext().strip(),
|
|
504
|
+
"level": depth + 2,
|
|
505
|
+
})
|
|
506
|
+
|
|
507
|
+
return headings
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def extract_body_text(document: Any) -> str:
|
|
511
|
+
from docutils import nodes
|
|
512
|
+
|
|
513
|
+
body_tree = copy.deepcopy(document)
|
|
514
|
+
for node in list(body_tree.findall(nodes.system_message)):
|
|
515
|
+
parent = node.parent
|
|
516
|
+
if parent is not None:
|
|
517
|
+
parent.remove(node)
|
|
518
|
+
|
|
519
|
+
for node in list(body_tree.findall(nodes.footnote)):
|
|
520
|
+
parent = node.parent
|
|
521
|
+
if parent is not None:
|
|
522
|
+
parent.remove(node)
|
|
523
|
+
|
|
524
|
+
for node in list(body_tree.findall(nodes.footnote_reference)):
|
|
525
|
+
parent = node.parent
|
|
526
|
+
if parent is not None:
|
|
527
|
+
parent.remove(node)
|
|
528
|
+
|
|
529
|
+
body_parts: list[str] = []
|
|
530
|
+
for child in body_tree.children:
|
|
531
|
+
if isinstance(child, (nodes.docinfo, nodes.field_list, nodes.comment, nodes.title, nodes.system_message, nodes.footnote)):
|
|
532
|
+
continue
|
|
533
|
+
if child.tagname == "footnote_list":
|
|
534
|
+
continue
|
|
535
|
+
text = child.astext().strip()
|
|
536
|
+
if text:
|
|
537
|
+
body_parts.append(text)
|
|
538
|
+
|
|
539
|
+
return "\n\n".join(body_parts).replace(LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE, "").replace(LEGACY_DOC_ROLE_BOUNDARY, "").strip()
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def remove_system_messages(document: Any) -> None:
|
|
543
|
+
from docutils import nodes
|
|
544
|
+
|
|
545
|
+
for node in list(document.findall(nodes.system_message)):
|
|
546
|
+
parent = node.parent
|
|
547
|
+
if parent is not None:
|
|
548
|
+
parent.remove(node)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def strip_preamble_nodes(document: Any) -> Any:
|
|
552
|
+
from docutils import nodes
|
|
553
|
+
|
|
554
|
+
stripped = copy.deepcopy(document)
|
|
555
|
+
for child in list(stripped.children):
|
|
556
|
+
if isinstance(child, (nodes.docinfo, nodes.field_list, nodes.comment)):
|
|
557
|
+
stripped.remove(child)
|
|
558
|
+
continue
|
|
559
|
+
if isinstance(child, nodes.title):
|
|
560
|
+
continue
|
|
561
|
+
break
|
|
562
|
+
|
|
563
|
+
return stripped
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def extract_html_body_from_doctree(document: Any) -> str:
|
|
567
|
+
from docutils.core import publish_from_doctree
|
|
568
|
+
|
|
569
|
+
rendered = publish_from_doctree(
|
|
570
|
+
document,
|
|
571
|
+
writer_name="html5",
|
|
572
|
+
settings_overrides={
|
|
573
|
+
"embed_stylesheet": False,
|
|
574
|
+
"stylesheet_path": None,
|
|
575
|
+
"output_encoding": "unicode",
|
|
576
|
+
"initial_header_level": 2,
|
|
577
|
+
"report_level": 2,
|
|
578
|
+
"halt_level": 5,
|
|
579
|
+
"file_insertion_enabled": False,
|
|
580
|
+
"raw_enabled": False,
|
|
581
|
+
},
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
parser = BodyFragmentParser()
|
|
585
|
+
parser.feed(rendered)
|
|
586
|
+
html_fragment = parser.get_fragment()
|
|
587
|
+
if not html_fragment:
|
|
588
|
+
raise RstRenderError("Docutils HTML output did not contain a <main> or <body> fragment.")
|
|
589
|
+
|
|
590
|
+
return html_fragment.replace(LEGACY_DOC_ROLE_BOUNDARY_WITH_SPACE, "").replace(LEGACY_DOC_ROLE_BOUNDARY, "")
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def build_output(document: Any, source_file: Path, image_base_slug: str, warnings: list[str]) -> dict[str, Any]:
|
|
594
|
+
from docutils import nodes
|
|
595
|
+
|
|
596
|
+
title_node = next(document.findall(nodes.title), None)
|
|
597
|
+
if title_node is None:
|
|
598
|
+
raise RstRenderError("Missing document title.")
|
|
599
|
+
|
|
600
|
+
assets = extract_assets(document, source_file, image_base_slug)
|
|
601
|
+
html_body = extract_html_body_from_doctree(strip_preamble_nodes(document))
|
|
602
|
+
|
|
603
|
+
return {
|
|
604
|
+
"title": title_node.astext().strip(),
|
|
605
|
+
"html": rewrite_html_assets(html_body, assets),
|
|
606
|
+
"text": extract_body_text(document),
|
|
607
|
+
"headings": extract_headings(document),
|
|
608
|
+
"metadata": extract_metadata(document),
|
|
609
|
+
"assets": assets,
|
|
610
|
+
"warnings": list(dict.fromkeys(warnings)),
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def render_single_file(source_file: Path, image_base_slug: str, strict: bool) -> dict[str, Any]:
|
|
615
|
+
from docutils.core import publish_doctree
|
|
616
|
+
|
|
617
|
+
warnings: list[str] = []
|
|
618
|
+
source = normalize_legacy_doc_role_syntax(source_file.read_text(encoding="utf-8"))
|
|
619
|
+
with temporary_role_overrides(source_file, warnings):
|
|
620
|
+
document = publish_doctree(
|
|
621
|
+
source=source,
|
|
622
|
+
settings_overrides={
|
|
623
|
+
"report_level": 2,
|
|
624
|
+
"halt_level": 5,
|
|
625
|
+
"file_insertion_enabled": False,
|
|
626
|
+
"raw_enabled": False,
|
|
627
|
+
},
|
|
628
|
+
)
|
|
629
|
+
remove_system_messages(document)
|
|
630
|
+
output = build_output(document, source_file, image_base_slug, warnings)
|
|
631
|
+
|
|
632
|
+
if strict:
|
|
633
|
+
missing = [asset for asset in output["assets"] if not asset["exists"]]
|
|
634
|
+
if missing:
|
|
635
|
+
first = missing[0]
|
|
636
|
+
raise RstRenderError(
|
|
637
|
+
f'Missing local asset "{first["original"]}" in {source_file}'
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
return output
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def render_batch(raw_input: str, strict: bool) -> list[dict[str, Any]]:
|
|
644
|
+
try:
|
|
645
|
+
entries = json.loads(raw_input)
|
|
646
|
+
except json.JSONDecodeError as exc:
|
|
647
|
+
raise RstRenderError(f"Invalid batch JSON: {exc.msg}") from exc
|
|
648
|
+
|
|
649
|
+
if not isinstance(entries, list):
|
|
650
|
+
raise RstRenderError("Invalid batch JSON: expected an array.")
|
|
651
|
+
|
|
652
|
+
results: list[dict[str, Any]] = []
|
|
653
|
+
for entry in entries:
|
|
654
|
+
if not isinstance(entry, dict):
|
|
655
|
+
raise RstRenderError("Invalid batch entry: expected an object.")
|
|
656
|
+
|
|
657
|
+
raw_file = entry.get("file")
|
|
658
|
+
image_base_slug = entry.get("imageBaseSlug")
|
|
659
|
+
if not isinstance(raw_file, str) or not isinstance(image_base_slug, str):
|
|
660
|
+
raise RstRenderError("Invalid batch entry: missing file or imageBaseSlug.")
|
|
661
|
+
|
|
662
|
+
source_file = resolve_source_file(raw_file)
|
|
663
|
+
if not source_file.exists():
|
|
664
|
+
raise RstRenderError(f"rST file not found: {source_file}")
|
|
665
|
+
|
|
666
|
+
output = render_single_file(source_file, image_base_slug, strict)
|
|
667
|
+
results.append({
|
|
668
|
+
"file": str(source_file),
|
|
669
|
+
"ok": True,
|
|
670
|
+
"result": output,
|
|
671
|
+
})
|
|
672
|
+
|
|
673
|
+
return results
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def main() -> int:
|
|
677
|
+
args = parse_args()
|
|
678
|
+
source_file: Path | None = None
|
|
679
|
+
|
|
680
|
+
try:
|
|
681
|
+
from docutils.core import publish_doctree # noqa: F401
|
|
682
|
+
except ImportError:
|
|
683
|
+
print(
|
|
684
|
+
"Missing Python dependency: docutils. Install it with `python3 -m pip install docutils`.",
|
|
685
|
+
file=sys.stderr,
|
|
686
|
+
)
|
|
687
|
+
return 1
|
|
688
|
+
|
|
689
|
+
try:
|
|
690
|
+
if args.batch_stdin or args.batch_file:
|
|
691
|
+
if args.batch_file:
|
|
692
|
+
raw_batch_input = Path(args.batch_file).read_text(encoding="utf-8")
|
|
693
|
+
else:
|
|
694
|
+
raw_batch_input = sys.stdin.read()
|
|
695
|
+
print(json.dumps(render_batch(raw_batch_input, args.strict), ensure_ascii=False))
|
|
696
|
+
return 0
|
|
697
|
+
|
|
698
|
+
source_file = resolve_source_file(args.file)
|
|
699
|
+
if not source_file.exists():
|
|
700
|
+
print(f"rST file not found: {source_file}", file=sys.stderr)
|
|
701
|
+
return 1
|
|
702
|
+
|
|
703
|
+
print(json.dumps(render_single_file(source_file, args.image_base_slug, args.strict), ensure_ascii=False))
|
|
704
|
+
return 0
|
|
705
|
+
except RstRenderError as exc:
|
|
706
|
+
print(str(exc), file=sys.stderr)
|
|
707
|
+
return 1
|
|
708
|
+
except (OSError, ValueError, KeyError, AttributeError) as exc:
|
|
709
|
+
if source_file is not None:
|
|
710
|
+
print(f"Failed to render {source_file}: {exc}", file=sys.stderr)
|
|
711
|
+
else:
|
|
712
|
+
print(f"Failed to render: {exc}", file=sys.stderr)
|
|
713
|
+
return 1
|
|
714
|
+
except Exception:
|
|
715
|
+
raise
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
if __name__ == "__main__":
|
|
719
|
+
sys.exit(main())
|