pyDiffTools 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydifftools/__init__.py +11 -0
- pydifftools/check_numbers.py +70 -0
- pydifftools/command_line.py +747 -0
- pydifftools/command_registry.py +65 -0
- pydifftools/comment_functions.py +39 -0
- pydifftools/continuous.py +194 -0
- pydifftools/copy_files.py +75 -0
- pydifftools/diff-doc.js +193 -0
- pydifftools/doc_contents.py +147 -0
- pydifftools/flowchart/__init__.py +15 -0
- pydifftools/flowchart/dot_to_yaml.py +114 -0
- pydifftools/flowchart/graph.py +620 -0
- pydifftools/flowchart/watch_graph.py +168 -0
- pydifftools/html_comments.py +33 -0
- pydifftools/html_uncomments.py +524 -0
- pydifftools/match_spaces.py +235 -0
- pydifftools/notebook/__init__.py +0 -0
- pydifftools/notebook/fast_build.py +1502 -0
- pydifftools/notebook/tex_to_qmd.py +319 -0
- pydifftools/onewordify.py +149 -0
- pydifftools/onewordify_undo.py +54 -0
- pydifftools/outline.py +173 -0
- pydifftools/rearrange_tex.py +188 -0
- pydifftools/searchacro.py +80 -0
- pydifftools/separate_comments.py +73 -0
- pydifftools/split_conflict.py +213 -0
- pydifftools/unseparate_comments.py +69 -0
- pydifftools/update_check.py +31 -0
- pydifftools/wrap_sentences.py +501 -0
- pydifftools/xml2xlsx.vbs +33 -0
- pydifftools-0.1.8.dist-info/METADATA +146 -0
- pydifftools-0.1.8.dist-info/RECORD +36 -0
- pydifftools-0.1.8.dist-info/WHEEL +5 -0
- pydifftools-0.1.8.dist-info/entry_points.txt +2 -0
- pydifftools-0.1.8.dist-info/licenses/LICENSE.md +28 -0
- pydifftools-0.1.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1502 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Minimal build script using Pandoc instead of Quarto."""
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import subprocess
|
|
10
|
+
import time
|
|
11
|
+
import traceback
|
|
12
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from http.server import ThreadingHTTPServer, SimpleHTTPRequestHandler
|
|
15
|
+
import threading
|
|
16
|
+
import shutil
|
|
17
|
+
import yaml
|
|
18
|
+
from pydifftools.command_registry import register_command
|
|
19
|
+
from watchdog.events import FileSystemEventHandler
|
|
20
|
+
from watchdog.observers.polling import PollingObserver as Observer
|
|
21
|
+
from selenium import webdriver
|
|
22
|
+
from selenium.common.exceptions import (
|
|
23
|
+
WebDriverException,
|
|
24
|
+
NoSuchWindowException,
|
|
25
|
+
)
|
|
26
|
+
from jinja2 import Environment, FileSystemLoader
|
|
27
|
+
import nbformat
|
|
28
|
+
from nbconvert.preprocessors import ExecutePreprocessor
|
|
29
|
+
from nbconvert.preprocessors.execute import NotebookClient
|
|
30
|
+
from pygments import highlight
|
|
31
|
+
from pygments.lexers import PythonLexer
|
|
32
|
+
from pygments.formatters import HtmlFormatter
|
|
33
|
+
from ansi2html import Ansi2HTMLConverter
|
|
34
|
+
|
|
35
|
+
_ansi_conv = Ansi2HTMLConverter(inline=True)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _ansi_to_html(text: str, *, default_style: str | None = None) -> str:
|
|
39
|
+
"""Return HTML for text that may contain ANSI escape codes."""
|
|
40
|
+
html = _ansi_conv.convert(text, full=False)
|
|
41
|
+
if default_style and "span class" not in html:
|
|
42
|
+
html = f'<span style="{default_style}">{html}</span>'
|
|
43
|
+
return f"<pre>{html}</pre>"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class LoggingExecutePreprocessor(ExecutePreprocessor):
|
|
47
|
+
"""Execute notebook cells with progress printed to stdout."""
|
|
48
|
+
|
|
49
|
+
def preprocess(self, nb, resources=None, km=None):
|
|
50
|
+
NotebookClient.__init__(self, nb, km)
|
|
51
|
+
self.reset_execution_trackers()
|
|
52
|
+
self._check_assign_resources(resources)
|
|
53
|
+
cell_count = len(self.nb.cells)
|
|
54
|
+
|
|
55
|
+
with self.setup_kernel():
|
|
56
|
+
assert self.kc
|
|
57
|
+
info_msg = self.wait_for_reply(self.kc.kernel_info())
|
|
58
|
+
assert info_msg
|
|
59
|
+
self.nb.metadata["language_info"] = info_msg["content"][
|
|
60
|
+
"language_info"
|
|
61
|
+
]
|
|
62
|
+
for index, cell in enumerate(self.nb.cells):
|
|
63
|
+
print(
|
|
64
|
+
f"Executing cell {index + 1}/{cell_count}...", flush=True
|
|
65
|
+
)
|
|
66
|
+
self.preprocess_cell(cell, resources, index)
|
|
67
|
+
self.set_widgets_metadata()
|
|
68
|
+
|
|
69
|
+
return self.nb, self.resources
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
include_pattern = re.compile(
|
|
73
|
+
r"\{\{\s*<\s*(include|embed)\s+([^>\s]+)\s*>\s*\}\}"
|
|
74
|
+
)
|
|
75
|
+
# Python code block pattern
|
|
76
|
+
code_pattern = re.compile(r"```\{python[^}]*\}\n(.*?)```", re.DOTALL)
|
|
77
|
+
# Markdown image pattern
|
|
78
|
+
image_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")
|
|
79
|
+
|
|
80
|
+
# Collect anchor definitions {#sec:id}, {#fig:id}, {#tab:id}
|
|
81
|
+
anchor_pattern = re.compile(r"\{#(sec|fig|tab):([A-Za-z0-9_-]+)\}")
|
|
82
|
+
heading_pattern = re.compile(
|
|
83
|
+
r"^(#+)\s+(.*?)\s*\{#(sec|fig|tab):([A-Za-z0-9_-]+)\}"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class RenderNotebook:
|
|
88
|
+
"""Track trunks, branches, and leaves along with build state."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, render_files, tree, include_map):
|
|
91
|
+
self.render_files = render_files
|
|
92
|
+
self.tree = tree
|
|
93
|
+
self.include_map = include_map
|
|
94
|
+
self.nodes = {}
|
|
95
|
+
self._build_nodes()
|
|
96
|
+
|
|
97
|
+
def _build_nodes(self):
|
|
98
|
+
for path in self.tree:
|
|
99
|
+
if path not in self.nodes:
|
|
100
|
+
if path in self.tree:
|
|
101
|
+
children = list(self.tree[path])
|
|
102
|
+
else:
|
|
103
|
+
children = []
|
|
104
|
+
if path in self.include_map:
|
|
105
|
+
parents = list(self.include_map[path])
|
|
106
|
+
else:
|
|
107
|
+
parents = []
|
|
108
|
+
self.nodes[path] = {
|
|
109
|
+
"type": "trunk" if path in self.render_files else "branch",
|
|
110
|
+
"children": children,
|
|
111
|
+
"parents": parents,
|
|
112
|
+
"has_notebook": False,
|
|
113
|
+
"needs_build": False,
|
|
114
|
+
}
|
|
115
|
+
for path in list(self.nodes.keys()):
|
|
116
|
+
if (
|
|
117
|
+
not self.nodes[path]["children"]
|
|
118
|
+
and path not in self.render_files
|
|
119
|
+
):
|
|
120
|
+
self.nodes[path]["type"] = "leaf"
|
|
121
|
+
src = PROJECT_ROOT / path
|
|
122
|
+
if src.exists():
|
|
123
|
+
text = src.read_text()
|
|
124
|
+
self.nodes[path]["has_notebook"] = bool(
|
|
125
|
+
code_pattern.search(text)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
def all_paths(self):
|
|
129
|
+
return list(self.nodes.keys())
|
|
130
|
+
|
|
131
|
+
def mark_outdated(self, checksums):
|
|
132
|
+
for path in self.nodes:
|
|
133
|
+
src = PROJECT_ROOT / path
|
|
134
|
+
if not src.exists():
|
|
135
|
+
self.nodes[path]["needs_build"] = False
|
|
136
|
+
continue
|
|
137
|
+
new_hash = self._hash_file(src)
|
|
138
|
+
if path in checksums:
|
|
139
|
+
old_hash = checksums[path]
|
|
140
|
+
else:
|
|
141
|
+
old_hash = None
|
|
142
|
+
self.nodes[path]["needs_build"] = new_hash != old_hash
|
|
143
|
+
|
|
144
|
+
def _hash_file(self, path):
|
|
145
|
+
data = path.read_bytes()
|
|
146
|
+
return hashlib.md5(data).hexdigest()
|
|
147
|
+
|
|
148
|
+
def stage_targets(self, changed_paths):
|
|
149
|
+
if changed_paths:
|
|
150
|
+
for path in changed_paths:
|
|
151
|
+
if path in self.nodes:
|
|
152
|
+
self.nodes[path]["needs_build"] = True
|
|
153
|
+
for parent in self.nodes[path]["parents"]:
|
|
154
|
+
if parent in self.nodes:
|
|
155
|
+
self.nodes[parent]["needs_build"] = True
|
|
156
|
+
return sorted([p for p, d in self.nodes.items() if d["needs_build"]])
|
|
157
|
+
|
|
158
|
+
def update_checksums(self, checksums):
|
|
159
|
+
for path, data in self.nodes.items():
|
|
160
|
+
if not data["needs_build"]:
|
|
161
|
+
continue
|
|
162
|
+
src = PROJECT_ROOT / path
|
|
163
|
+
if src.exists():
|
|
164
|
+
checksums[path] = self._hash_file(src)
|
|
165
|
+
|
|
166
|
+
def render_order(self):
|
|
167
|
+
return build_order(self.render_files, self.tree)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def load_checksums():
|
|
171
|
+
path = BUILD_DIR / "checksums.json"
|
|
172
|
+
if path.exists():
|
|
173
|
+
try:
|
|
174
|
+
return json.loads(path.read_text())
|
|
175
|
+
except Exception:
|
|
176
|
+
return {}
|
|
177
|
+
return {}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def save_checksums(checksums):
|
|
181
|
+
path = BUILD_DIR / "checksums.json"
|
|
182
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
183
|
+
path.write_text(json.dumps(checksums, indent=2))
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def load_rendered_files():
|
|
187
|
+
text = Path("_quarto.yml").read_text()
|
|
188
|
+
cfg = yaml.safe_load(text)
|
|
189
|
+
return list(cfg.get("project", {}).get("render", []))
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def load_bibliography_csl():
|
|
193
|
+
text = Path("_quarto.yml").read_text()
|
|
194
|
+
cfg = yaml.safe_load(text)
|
|
195
|
+
bib = None
|
|
196
|
+
csl = None
|
|
197
|
+
if "bibliography" in cfg:
|
|
198
|
+
bib = cfg["bibliography"]
|
|
199
|
+
if "csl" in cfg:
|
|
200
|
+
csl = cfg["csl"]
|
|
201
|
+
fmt = cfg.get("format", {})
|
|
202
|
+
if isinstance(fmt, dict):
|
|
203
|
+
for v in fmt.values():
|
|
204
|
+
if isinstance(v, dict):
|
|
205
|
+
if bib is None and "bibliography" in v:
|
|
206
|
+
bib = v["bibliography"]
|
|
207
|
+
if csl is None and "csl" in v:
|
|
208
|
+
csl = v["csl"]
|
|
209
|
+
return bib, csl
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def outputs_to_html(outputs: list[dict]) -> str:
|
|
213
|
+
"""Convert Jupyter cell outputs to HTML with embedded images."""
|
|
214
|
+
parts = []
|
|
215
|
+
for out in outputs:
|
|
216
|
+
typ = out.get("output_type")
|
|
217
|
+
if typ == "stream":
|
|
218
|
+
text = out.get("text", "")
|
|
219
|
+
parts.append(_ansi_to_html(text))
|
|
220
|
+
elif typ in {"display_data", "execute_result"}:
|
|
221
|
+
data = out.get("data", {})
|
|
222
|
+
if "text/html" in data:
|
|
223
|
+
parts.append(data["text/html"])
|
|
224
|
+
elif "image/png" in data:
|
|
225
|
+
src = f"data:image/png;base64,{data['image/png']}"
|
|
226
|
+
parts.append(f"<img src='{src}'/>")
|
|
227
|
+
elif "image/jpeg" in data:
|
|
228
|
+
src = f"data:image/jpeg;base64,{data['image/jpeg']}"
|
|
229
|
+
parts.append(f"<img src='{src}'/>")
|
|
230
|
+
elif "text/plain" in data:
|
|
231
|
+
parts.append(_ansi_to_html(data["text/plain"]))
|
|
232
|
+
elif typ == "error":
|
|
233
|
+
tb = "\n".join(out.get("traceback", []))
|
|
234
|
+
if not tb:
|
|
235
|
+
tb = f"{out.get('ename', '')}: {out.get('evalue', '')}"
|
|
236
|
+
parts.append(_ansi_to_html(tb, default_style="color:red;"))
|
|
237
|
+
return "\n".join(parts)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
NOTEBOOK_CACHE_DIR = Path("_nbcache")
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def execute_code_blocks(blocks):
|
|
244
|
+
"""Run code blocks as Jupyter notebooks with caching."""
|
|
245
|
+
NOTEBOOK_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
246
|
+
outputs = {}
|
|
247
|
+
code_map = {}
|
|
248
|
+
jobs = []
|
|
249
|
+
|
|
250
|
+
# Collect notebook chunks so we can present progress like (1/3).
|
|
251
|
+
for src, cells in blocks.items():
|
|
252
|
+
if not cells:
|
|
253
|
+
continue
|
|
254
|
+
codes = [c for c, _ in cells]
|
|
255
|
+
md5s = [m for _, m in cells]
|
|
256
|
+
groups = []
|
|
257
|
+
current_codes = []
|
|
258
|
+
current_md5s = []
|
|
259
|
+
current_indices = []
|
|
260
|
+
for idx, code in enumerate(codes, start=1):
|
|
261
|
+
stripped = code.lstrip()
|
|
262
|
+
# Split execution into separate notebooks whenever a cell
|
|
263
|
+
# begins with ``%reset -f`` so that changing code after a
|
|
264
|
+
# reset only reruns the affected portion instead of the entire
|
|
265
|
+
# file.
|
|
266
|
+
if current_codes and stripped.startswith("%reset -f"):
|
|
267
|
+
groups.append((current_indices, current_codes, current_md5s))
|
|
268
|
+
current_codes = []
|
|
269
|
+
current_md5s = []
|
|
270
|
+
current_indices = []
|
|
271
|
+
current_codes.append(code)
|
|
272
|
+
current_md5s.append(md5s[idx - 1])
|
|
273
|
+
current_indices.append(idx)
|
|
274
|
+
if current_codes:
|
|
275
|
+
groups.append((current_indices, current_codes, current_md5s))
|
|
276
|
+
|
|
277
|
+
total_groups = len(groups)
|
|
278
|
+
for group_idx, data in enumerate(groups, start=1):
|
|
279
|
+
jobs.append((src, total_groups, group_idx, data, codes))
|
|
280
|
+
|
|
281
|
+
def run_job(job):
|
|
282
|
+
src, total_groups, group_idx, group_data, codes = job
|
|
283
|
+
group_indices, group_codes, group_md5s = group_data
|
|
284
|
+
hash_input = (src + ":" + "".join(group_md5s)).encode()
|
|
285
|
+
nb_hash = hashlib.md5(hash_input).hexdigest()
|
|
286
|
+
nb_path = NOTEBOOK_CACHE_DIR / f"{nb_hash}.ipynb"
|
|
287
|
+
if nb_path.exists():
|
|
288
|
+
print(f"Reading cached output for {src} from {nb_path}!")
|
|
289
|
+
nb = nbformat.read(nb_path, as_version=4)
|
|
290
|
+
else:
|
|
291
|
+
# Report progress with the chunk count for this source.
|
|
292
|
+
print(
|
|
293
|
+
f"Generating notebook ({group_idx}/{total_groups}) "
|
|
294
|
+
f"for {src} at {nb_path}:"
|
|
295
|
+
)
|
|
296
|
+
nb = nbformat.v4.new_notebook()
|
|
297
|
+
nb.cells = [nbformat.v4.new_code_cell(c) for c in group_codes]
|
|
298
|
+
ep = LoggingExecutePreprocessor(
|
|
299
|
+
kernel_name="python3", timeout=10800, allow_errors=True
|
|
300
|
+
)
|
|
301
|
+
try:
|
|
302
|
+
ep.preprocess(
|
|
303
|
+
nb, {"metadata": {"path": str(Path(src).parent)}}
|
|
304
|
+
)
|
|
305
|
+
except Exception as e:
|
|
306
|
+
tb = traceback.format_exc()
|
|
307
|
+
if nb.cells:
|
|
308
|
+
nb.cells[0].outputs = [
|
|
309
|
+
nbformat.v4.new_output(
|
|
310
|
+
output_type="error",
|
|
311
|
+
ename=type(e).__name__,
|
|
312
|
+
evalue=str(e),
|
|
313
|
+
traceback=tb.splitlines(),
|
|
314
|
+
)
|
|
315
|
+
]
|
|
316
|
+
for cell in nb.cells[1:]:
|
|
317
|
+
cell.outputs = [
|
|
318
|
+
nbformat.v4.new_output(
|
|
319
|
+
output_type="stream",
|
|
320
|
+
name="stderr",
|
|
321
|
+
text="previous cell failed to execute\n",
|
|
322
|
+
)
|
|
323
|
+
]
|
|
324
|
+
nbformat.write(nb, nb_path)
|
|
325
|
+
|
|
326
|
+
return src, group_indices, nb, codes
|
|
327
|
+
|
|
328
|
+
# Execute notebook chunks concurrently so long-running groups do not block.
|
|
329
|
+
max_workers = max(1, min(len(jobs), 4))
|
|
330
|
+
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
|
331
|
+
futures = [pool.submit(run_job, job) for job in jobs]
|
|
332
|
+
for future in as_completed(futures):
|
|
333
|
+
src, group_indices, nb, codes = future.result()
|
|
334
|
+
for offset, cell in enumerate(nb.cells):
|
|
335
|
+
html = outputs_to_html(cell.get("outputs", []))
|
|
336
|
+
idx = group_indices[offset]
|
|
337
|
+
outputs[(src, idx)] = html
|
|
338
|
+
code_map[(src, idx)] = codes[idx - 1]
|
|
339
|
+
|
|
340
|
+
return outputs, code_map
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def analyze_includes(render_files):
|
|
344
|
+
"""Analyze include relationships for all render files.
|
|
345
|
+
|
|
346
|
+
Returns a tuple ``(tree, roots, included_by)`` where:
|
|
347
|
+
|
|
348
|
+
* ``tree`` maps each file to the files it directly includes.
|
|
349
|
+
* ``roots`` maps each file to the root directory of the main document
|
|
350
|
+
that ultimately includes it. This keeps include resolution consistent
|
|
351
|
+
with Quarto's behavior.
|
|
352
|
+
* ``included_by`` maps an included file to the files that include it.
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
tree: dict[str, list[str]] = {}
|
|
356
|
+
included_by: dict[str, list[str]] = {}
|
|
357
|
+
visited = set()
|
|
358
|
+
|
|
359
|
+
stack = [Path(f).resolve() for f in render_files]
|
|
360
|
+
root = PROJECT_ROOT
|
|
361
|
+
root_dirs = {
|
|
362
|
+
Path(f).resolve(): Path(f).parent.resolve() for f in render_files
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
while stack:
|
|
366
|
+
current = stack.pop()
|
|
367
|
+
if current in visited or not current.exists():
|
|
368
|
+
continue
|
|
369
|
+
visited.add(current)
|
|
370
|
+
root_dir = root_dirs.get(current, current.parent)
|
|
371
|
+
includes: list[str] = []
|
|
372
|
+
text = current.read_text()
|
|
373
|
+
for _kind, inc in include_pattern.findall(text):
|
|
374
|
+
target = (current.parent / inc).resolve()
|
|
375
|
+
if not target.exists():
|
|
376
|
+
target = (root_dir / inc).resolve()
|
|
377
|
+
if not target.exists():
|
|
378
|
+
target = (root_dir.parent / inc).resolve()
|
|
379
|
+
if not target.exists():
|
|
380
|
+
raise FileNotFoundError(
|
|
381
|
+
f"Include file '{inc}' not found for '{current}'"
|
|
382
|
+
)
|
|
383
|
+
try:
|
|
384
|
+
rel = target.relative_to(root).as_posix()
|
|
385
|
+
except ValueError:
|
|
386
|
+
rel = target.as_posix()
|
|
387
|
+
includes.append(rel)
|
|
388
|
+
stack.append(target)
|
|
389
|
+
root_dirs.setdefault(target, root_dir)
|
|
390
|
+
try:
|
|
391
|
+
cur_rel = current.relative_to(root).as_posix()
|
|
392
|
+
except ValueError:
|
|
393
|
+
cur_rel = current.as_posix()
|
|
394
|
+
included_by.setdefault(rel, []).append(cur_rel)
|
|
395
|
+
try:
|
|
396
|
+
key = current.relative_to(root).as_posix()
|
|
397
|
+
except ValueError:
|
|
398
|
+
key = current.as_posix()
|
|
399
|
+
tree[key] = includes
|
|
400
|
+
|
|
401
|
+
roots_str: dict[str, Path] = {}
|
|
402
|
+
for p, d in root_dirs.items():
|
|
403
|
+
if not p.exists():
|
|
404
|
+
continue
|
|
405
|
+
try:
|
|
406
|
+
rel = p.relative_to(root).as_posix()
|
|
407
|
+
except ValueError:
|
|
408
|
+
rel = p.as_posix()
|
|
409
|
+
roots_str[rel] = d
|
|
410
|
+
|
|
411
|
+
return tree, roots_str, included_by
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def resolve_render_file(file, included_by, render_files):
|
|
415
|
+
visited = set()
|
|
416
|
+
while file not in render_files:
|
|
417
|
+
if file in visited or file not in included_by:
|
|
418
|
+
break
|
|
419
|
+
visited.add(file)
|
|
420
|
+
file = included_by[file][0]
|
|
421
|
+
return file
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def collect_anchors(render_files, included_by):
|
|
425
|
+
anchors = {}
|
|
426
|
+
for path in Path(".").rglob("*.qmd"):
|
|
427
|
+
if BUILD_DIR in path.parents:
|
|
428
|
+
continue
|
|
429
|
+
lines = path.read_text().splitlines()
|
|
430
|
+
for line in lines:
|
|
431
|
+
for m in anchor_pattern.finditer(line):
|
|
432
|
+
kind, ident = m.group(1), m.group(2)
|
|
433
|
+
key = f"{kind}:{ident}"
|
|
434
|
+
text = ident
|
|
435
|
+
hm = heading_pattern.match(line)
|
|
436
|
+
if hm:
|
|
437
|
+
text = hm.group(2).strip()
|
|
438
|
+
render_file = resolve_render_file(
|
|
439
|
+
path.as_posix(), included_by, render_files
|
|
440
|
+
)
|
|
441
|
+
anchors[key] = (render_file, text)
|
|
442
|
+
return anchors
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
ref_pattern = re.compile(r"@(sec|fig|tab):([A-Za-z0-9_-]+)")
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def replace_refs_text(text, anchors, dest_dir: Path):
|
|
449
|
+
def repl(match):
|
|
450
|
+
kind, ident = match.group(1), match.group(2)
|
|
451
|
+
key = f"{kind}:{ident}"
|
|
452
|
+
if key in anchors:
|
|
453
|
+
file, label = anchors[key]
|
|
454
|
+
html_path = BUILD_DIR / file.replace(".qmd", ".html")
|
|
455
|
+
rel = os.path.relpath(html_path, dest_dir)
|
|
456
|
+
link = f"{rel}#{key}"
|
|
457
|
+
return f"[{label}]({link})"
|
|
458
|
+
return match.group(0)
|
|
459
|
+
|
|
460
|
+
return ref_pattern.sub(repl, text)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def replace_refs(path, anchors):
|
|
464
|
+
content = path.read_text()
|
|
465
|
+
new_content = replace_refs_text(content, anchors, path.parent)
|
|
466
|
+
if new_content != content:
|
|
467
|
+
path.write_text(new_content)
|
|
468
|
+
return True
|
|
469
|
+
return False
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
BUILD_DIR = Path("_build")
|
|
473
|
+
DISPLAY_DIR = Path("_display")
|
|
474
|
+
BODY_TEMPLATE = Path("_template/body-only.html").resolve()
|
|
475
|
+
PANDOC_TEMPLATE = Path("_template/pandoc_template.html").resolve()
|
|
476
|
+
NAV_TEMPLATE = Path("_template/nav_template.html").resolve()
|
|
477
|
+
MATHJAX_DIR = Path("_template/mathjax").resolve()
|
|
478
|
+
PROJECT_ROOT = Path(".").resolve()
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def example_notebook_root():
|
|
482
|
+
"""Return the path to the bundled example notebook directory."""
|
|
483
|
+
|
|
484
|
+
return Path(__file__).resolve().parents[2] / "example_notebook"
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def download_mathjax(target_dir):
|
|
488
|
+
"""Download MathJax into ``target_dir`` if it is missing."""
|
|
489
|
+
target_dir = Path(target_dir)
|
|
490
|
+
script = target_dir / "es5" / "tex-mml-chtml.js"
|
|
491
|
+
if script.exists():
|
|
492
|
+
return
|
|
493
|
+
if os.environ.get("PYDIFFTOOLS_FAKE_MATHJAX"):
|
|
494
|
+
script.parent.mkdir(parents=True, exist_ok=True)
|
|
495
|
+
script.write_text("// fake mathjax for testing")
|
|
496
|
+
return
|
|
497
|
+
tmp = Path("_mjtmp")
|
|
498
|
+
tmp.mkdir(parents=True, exist_ok=True)
|
|
499
|
+
subprocess.run(["npm", "init", "-y"], cwd=tmp, check=True)
|
|
500
|
+
subprocess.run(["npm", "install", "mathjax-full"], cwd=tmp, check=True)
|
|
501
|
+
src = tmp / "node_modules" / "mathjax-full" / "es5"
|
|
502
|
+
(target_dir / "es5").mkdir(parents=True, exist_ok=True)
|
|
503
|
+
shutil.copytree(src, target_dir / "es5", dirs_exist_ok=True)
|
|
504
|
+
shutil.rmtree(tmp)
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def ensure_mathjax():
|
|
508
|
+
"""Ensure the default MathJax cache exists for builds."""
|
|
509
|
+
download_mathjax(MATHJAX_DIR)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def _copy_resource_tree(resource, dest, overwrite=False):
|
|
513
|
+
dest = Path(dest)
|
|
514
|
+
if resource.is_dir():
|
|
515
|
+
for child in resource.iterdir():
|
|
516
|
+
_copy_resource_tree(child, dest / child.name, overwrite)
|
|
517
|
+
return
|
|
518
|
+
if dest.exists() and not overwrite:
|
|
519
|
+
return
|
|
520
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
521
|
+
dest.write_bytes(resource.read_bytes())
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def ensure_template_assets(project_root, overwrite=False):
|
|
525
|
+
"""Copy template assets from the checked-in example notebook when
|
|
526
|
+
present."""
|
|
527
|
+
|
|
528
|
+
template_src = example_notebook_root() / "_template"
|
|
529
|
+
target = Path(project_root) / "_template"
|
|
530
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
531
|
+
if template_src.exists():
|
|
532
|
+
_copy_resource_tree(template_src, target, overwrite)
|
|
533
|
+
# Fall back to simple built-in templates when packaged assets are missing.
|
|
534
|
+
nav_target = target / "nav_template.html"
|
|
535
|
+
if overwrite or not nav_target.exists():
|
|
536
|
+
nav_target.write_text("""
|
|
537
|
+
<style>
|
|
538
|
+
#on-this-page {font-family: sans-serif; border: 1px solid #ddd; padding: \
|
|
539
|
+
0.5rem; margin-bottom: 1rem;}
|
|
540
|
+
#on-this-page h2 {margin-top: 0; font-size: 1.1rem;}
|
|
541
|
+
#on-this-page ul {list-style: none; padding-left: 0; margin: 0;}
|
|
542
|
+
#on-this-page li {margin: 0.25rem 0;}
|
|
543
|
+
</style>
|
|
544
|
+
<nav id="on-this-page">
|
|
545
|
+
<h2>On this page</h2>
|
|
546
|
+
<ul>
|
|
547
|
+
{% for page in pages %}
|
|
548
|
+
<li><a href="{{ page.href }}">{{ page.title or page.file }}</a></li>
|
|
549
|
+
{% endfor %}
|
|
550
|
+
</ul>
|
|
551
|
+
</nav>
|
|
552
|
+
""")
|
|
553
|
+
body_target = target / "body-only.html"
|
|
554
|
+
if overwrite or not body_target.exists():
|
|
555
|
+
body_target.write_text("""
|
|
556
|
+
<!DOCTYPE html>
|
|
557
|
+
<html>
|
|
558
|
+
<head>
|
|
559
|
+
<meta charset="utf-8">
|
|
560
|
+
$for(header-includes)$
|
|
561
|
+
$header-includes$
|
|
562
|
+
$endfor$
|
|
563
|
+
</head>
|
|
564
|
+
<body>
|
|
565
|
+
$body$
|
|
566
|
+
</body>
|
|
567
|
+
</html>
|
|
568
|
+
""")
|
|
569
|
+
pandoc_target = target / "pandoc_template.html"
|
|
570
|
+
if overwrite or not pandoc_target.exists():
|
|
571
|
+
pandoc_target.write_text(body_target.read_text())
|
|
572
|
+
obs_target = target / "obs.lua"
|
|
573
|
+
if overwrite or not obs_target.exists():
|
|
574
|
+
obs_target.write_text("-- placeholder filter\n")
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
def _write_placeholder_outputs():
|
|
578
|
+
"""Create stub HTML outputs when optional build dependencies
|
|
579
|
+
are missing."""
|
|
580
|
+
|
|
581
|
+
BUILD_DIR.mkdir(parents=True, exist_ok=True)
|
|
582
|
+
for qmd in PROJECT_ROOT.rglob("*.qmd"):
|
|
583
|
+
rel = qmd.relative_to(PROJECT_ROOT)
|
|
584
|
+
target = BUILD_DIR / rel.with_suffix(".html")
|
|
585
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
586
|
+
try:
|
|
587
|
+
content = qmd.read_text()
|
|
588
|
+
except OSError:
|
|
589
|
+
content = ""
|
|
590
|
+
if not content:
|
|
591
|
+
content = f"<html><body>{rel}</body></html>"
|
|
592
|
+
target.write_text(content)
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
@register_command(
|
|
596
|
+
"Initialize a sample Quarto project with bundled templates",
|
|
597
|
+
help={
|
|
598
|
+
"path": (
|
|
599
|
+
"Directory to initialize (defaults to current working directory)"
|
|
600
|
+
),
|
|
601
|
+
"force": "Overwrite existing files when copying the scaffold",
|
|
602
|
+
},
|
|
603
|
+
)
|
|
604
|
+
def qmdinit(path, force=False):
|
|
605
|
+
"""Copy the example notebook contents into ``path`` for a ready-to-run
|
|
606
|
+
demo."""
|
|
607
|
+
|
|
608
|
+
if path is None:
|
|
609
|
+
path = "."
|
|
610
|
+
source_root = example_notebook_root()
|
|
611
|
+
if not source_root.exists():
|
|
612
|
+
raise RuntimeError("example_notebook directory is missing")
|
|
613
|
+
target = Path(path).resolve()
|
|
614
|
+
# Keep all of the key paths tied to the project we just initialized so
|
|
615
|
+
# subsequent build steps read and write in the expected location even if
|
|
616
|
+
# the module was imported from elsewhere.
|
|
617
|
+
global PROJECT_ROOT, BUILD_DIR, DISPLAY_DIR
|
|
618
|
+
global BODY_TEMPLATE, PANDOC_TEMPLATE, NAV_TEMPLATE, MATHJAX_DIR
|
|
619
|
+
PROJECT_ROOT = target
|
|
620
|
+
BUILD_DIR = PROJECT_ROOT / "_build"
|
|
621
|
+
DISPLAY_DIR = PROJECT_ROOT / "_display"
|
|
622
|
+
BODY_TEMPLATE = PROJECT_ROOT / "_template" / "body-only.html"
|
|
623
|
+
PANDOC_TEMPLATE = PROJECT_ROOT / "_template" / "pandoc_template.html"
|
|
624
|
+
NAV_TEMPLATE = PROJECT_ROOT / "_template" / "nav_template.html"
|
|
625
|
+
MATHJAX_DIR = PROJECT_ROOT / "_template" / "mathjax"
|
|
626
|
+
for child in source_root.iterdir():
|
|
627
|
+
_copy_resource_tree(child, target / child.name, force)
|
|
628
|
+
# Some expected render targets are not present in the checked-in example,
|
|
629
|
+
# so create lightweight placeholders to keep the sample project runnable
|
|
630
|
+
# in isolation.
|
|
631
|
+
projects_qmd = target / "projects.qmd"
|
|
632
|
+
if force or not projects_qmd.exists():
|
|
633
|
+
projects_qmd.write_text("{{< include project1/index.qmd >}}\n")
|
|
634
|
+
notebook_qmd = target / "notebook250708.qmd"
|
|
635
|
+
if force or not notebook_qmd.exists():
|
|
636
|
+
notebook_qmd.write_text("# Example notebook placeholder\n")
|
|
637
|
+
ensure_template_assets(target, overwrite=force)
|
|
638
|
+
download_mathjax(target / "_template" / "mathjax")
|
|
639
|
+
print(f"Initialized Quarto scaffold in {target.resolve()}")
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
@register_command(
|
|
643
|
+
"Build Quarto-style projects with Pandoc and the fast builder (optionally"
|
|
644
|
+
" watch)",
|
|
645
|
+
help={
|
|
646
|
+
"no_browser": "Do not launch a browser when using --watch",
|
|
647
|
+
"webtex": "Use Pandoc's --webtex option instead of MathJax",
|
|
648
|
+
},
|
|
649
|
+
)
|
|
650
|
+
def qmdb(no_browser=False, webtex=False):
|
|
651
|
+
"""Build and watch the current directory using the fast notebook
|
|
652
|
+
builder."""
|
|
653
|
+
|
|
654
|
+
ensure_template_assets(Path("."))
|
|
655
|
+
if yaml is None or nbformat is None or Environment is None:
|
|
656
|
+
# Minimal fallback when optional dependencies are unavailable.
|
|
657
|
+
_write_placeholder_outputs()
|
|
658
|
+
return
|
|
659
|
+
watch_and_serve(no_browser=no_browser, webtex=webtex)
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def ensure_pandoc_available():
|
|
663
|
+
"""Make sure pandoc is discoverable on PATH."""
|
|
664
|
+
if shutil.which("pandoc"):
|
|
665
|
+
return
|
|
666
|
+
quarto_pandoc = Path("/opt/quarto/bin/tools/x86_64/pandoc")
|
|
667
|
+
if quarto_pandoc.exists():
|
|
668
|
+
os.environ["PATH"] += os.pathsep + str(quarto_pandoc.parent)
|
|
669
|
+
if shutil.which("pandoc"):
|
|
670
|
+
return
|
|
671
|
+
raise RuntimeError(
|
|
672
|
+
"Pandoc not found. Install it from https://pandoc.org/installing.html"
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def ensure_pandoc_crossref():
|
|
677
|
+
"""Verify pandoc-crossref is installed for reference handling."""
|
|
678
|
+
if shutil.which("pandoc-crossref"):
|
|
679
|
+
return
|
|
680
|
+
raise RuntimeError(
|
|
681
|
+
"pandoc-crossref not found. Install it from"
|
|
682
|
+
" https://github.com/lierdakil/pandoc-crossref"
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def all_files(render_files, tree):
|
|
687
|
+
files = {f for f in render_files if Path(f).exists()}
|
|
688
|
+
for src, incs in tree.items():
|
|
689
|
+
if Path(src).exists():
|
|
690
|
+
files.add(src)
|
|
691
|
+
for inc in incs:
|
|
692
|
+
if Path(inc).exists():
|
|
693
|
+
files.add(inc)
|
|
694
|
+
return files
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def build_order(render_files, tree):
|
|
698
|
+
order = []
|
|
699
|
+
visited = set()
|
|
700
|
+
|
|
701
|
+
def visit(f):
|
|
702
|
+
if f in visited:
|
|
703
|
+
return
|
|
704
|
+
visited.add(f)
|
|
705
|
+
for child in tree.get(f, []):
|
|
706
|
+
visit(child)
|
|
707
|
+
order.append(f)
|
|
708
|
+
|
|
709
|
+
for f in render_files:
|
|
710
|
+
visit(f)
|
|
711
|
+
return order
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def collect_render_targets(targets, included_by, render_files):
|
|
715
|
+
"""Find render files impacted by ``targets``."""
|
|
716
|
+
result = set()
|
|
717
|
+
stack = list(targets)
|
|
718
|
+
seen = set()
|
|
719
|
+
render_set = set(render_files)
|
|
720
|
+
while stack:
|
|
721
|
+
current = stack.pop()
|
|
722
|
+
if current in seen:
|
|
723
|
+
continue
|
|
724
|
+
seen.add(current)
|
|
725
|
+
if current in render_set:
|
|
726
|
+
result.add(current)
|
|
727
|
+
if current in included_by:
|
|
728
|
+
for parent in included_by[current]:
|
|
729
|
+
stack.append(parent)
|
|
730
|
+
return result
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
def mirror_and_modify(files, anchors, roots):
|
|
734
|
+
project_root = PROJECT_ROOT
|
|
735
|
+
code_blocks: dict[str, list[tuple[str, str]]] = {}
|
|
736
|
+
for file in files:
|
|
737
|
+
src = Path(file)
|
|
738
|
+
dest = BUILD_DIR / file
|
|
739
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
740
|
+
text = src.read_text()
|
|
741
|
+
text = replace_refs_text(text, anchors, dest.parent)
|
|
742
|
+
|
|
743
|
+
root_dir = roots.get(file, src.parent)
|
|
744
|
+
|
|
745
|
+
def repl(match: re.Match) -> str:
|
|
746
|
+
kind, inc = match.groups()
|
|
747
|
+
# include paths are now relative to the main document root
|
|
748
|
+
target_src = (root_dir / inc).resolve()
|
|
749
|
+
if not target_src.exists():
|
|
750
|
+
target_src = (src.parent / inc).resolve()
|
|
751
|
+
if not target_src.exists():
|
|
752
|
+
target_src = (root_dir.parent / inc).resolve()
|
|
753
|
+
target_rel = target_src.relative_to(project_root)
|
|
754
|
+
html_path = (BUILD_DIR / target_rel).with_suffix(".html")
|
|
755
|
+
inc_path = os.path.relpath(html_path, dest.parent)
|
|
756
|
+
# use an element marker preserved by Pandoc
|
|
757
|
+
source_attr = target_rel.with_suffix(".html").as_posix()
|
|
758
|
+
# keep track of the staged include so the display pass can load it
|
|
759
|
+
return (
|
|
760
|
+
f'<div data-{kind.lower()}="{inc_path}" '
|
|
761
|
+
f'data-source="{source_attr}"></div>'
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
text = include_pattern.sub(repl, text)
|
|
765
|
+
|
|
766
|
+
idx = 0
|
|
767
|
+
|
|
768
|
+
def repl_code(match: re.Match) -> str:
|
|
769
|
+
nonlocal idx
|
|
770
|
+
idx += 1
|
|
771
|
+
code = match.group(1)
|
|
772
|
+
md5 = hashlib.md5(code.encode()).hexdigest()
|
|
773
|
+
src_rel = str(src)
|
|
774
|
+
code_blocks.setdefault(src_rel, []).append((code, md5))
|
|
775
|
+
return (
|
|
776
|
+
f'<div data-script="{src_rel}" data-index="{idx}"'
|
|
777
|
+
f' data-md5="{md5}"></div>'
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
text = code_pattern.sub(repl_code, text)
|
|
781
|
+
# copy referenced images into the build directory
|
|
782
|
+
for img in image_pattern.findall(text):
|
|
783
|
+
img_path = img.split()[0]
|
|
784
|
+
if re.match(r"https?://", img_path) or img_path.startswith(
|
|
785
|
+
"data:"
|
|
786
|
+
):
|
|
787
|
+
continue
|
|
788
|
+
target_src = (src.parent / img_path).resolve()
|
|
789
|
+
if not target_src.exists():
|
|
790
|
+
target_src = (root_dir / img_path).resolve()
|
|
791
|
+
if not target_src.exists():
|
|
792
|
+
target_src = (root_dir.parent / img_path).resolve()
|
|
793
|
+
if target_src.exists():
|
|
794
|
+
try:
|
|
795
|
+
rel = target_src.relative_to(project_root)
|
|
796
|
+
except ValueError:
|
|
797
|
+
continue
|
|
798
|
+
target_dest = BUILD_DIR / rel
|
|
799
|
+
target_dest.parent.mkdir(parents=True, exist_ok=True)
|
|
800
|
+
shutil.copy2(target_src, target_dest)
|
|
801
|
+
dest.write_text(text)
|
|
802
|
+
return code_blocks
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
def render_file(
|
|
806
|
+
src: Path,
|
|
807
|
+
dest: Path,
|
|
808
|
+
fragment: bool,
|
|
809
|
+
bibliography=None,
|
|
810
|
+
csl=None,
|
|
811
|
+
webtex: bool = False,
|
|
812
|
+
):
|
|
813
|
+
"""Render ``src`` to ``dest`` using Pandoc with embedded resources."""
|
|
814
|
+
|
|
815
|
+
template = BODY_TEMPLATE if fragment else PANDOC_TEMPLATE
|
|
816
|
+
temp = os.path.relpath(
|
|
817
|
+
DISPLAY_DIR / "mathjax" / "es5" / "tex-mml-chtml.js", dest.parent
|
|
818
|
+
)
|
|
819
|
+
math_arg = (
|
|
820
|
+
"--webtex" if webtex else (f"--mathjax={temp}?config=TeX-AMS_CHTML")
|
|
821
|
+
)
|
|
822
|
+
args = [
|
|
823
|
+
"pandoc",
|
|
824
|
+
src.name,
|
|
825
|
+
"--from",
|
|
826
|
+
"markdown+raw_html",
|
|
827
|
+
"--standalone",
|
|
828
|
+
"--embed-resources",
|
|
829
|
+
"--lua-filter",
|
|
830
|
+
os.path.relpath(BUILD_DIR / "obs.lua", dest.parent),
|
|
831
|
+
"--filter",
|
|
832
|
+
"pandoc-crossref",
|
|
833
|
+
"--citeproc",
|
|
834
|
+
math_arg,
|
|
835
|
+
"--template",
|
|
836
|
+
os.path.relpath(template, dest.parent),
|
|
837
|
+
"-o",
|
|
838
|
+
dest.with_suffix(".html").name,
|
|
839
|
+
]
|
|
840
|
+
if bibliography:
|
|
841
|
+
bib_path = Path(os.path.expanduser(bibliography))
|
|
842
|
+
if not bib_path.is_absolute():
|
|
843
|
+
bib_path = PROJECT_ROOT / bib_path
|
|
844
|
+
if not bib_path.exists():
|
|
845
|
+
raise FileNotFoundError(
|
|
846
|
+
f"Bibliography file {bibliography} not found"
|
|
847
|
+
)
|
|
848
|
+
args += ["--bibliography", os.path.relpath(bib_path, dest.parent)]
|
|
849
|
+
if csl:
|
|
850
|
+
csl_path = Path(os.path.expanduser(csl))
|
|
851
|
+
if not csl_path.is_absolute():
|
|
852
|
+
csl_path = PROJECT_ROOT / csl_path
|
|
853
|
+
if not csl_path.exists():
|
|
854
|
+
raise FileNotFoundError(f"CSL file {csl} not found")
|
|
855
|
+
args += ["--csl", os.path.relpath(csl_path, dest.parent)]
|
|
856
|
+
print(f"Running pandoc on {src}...", flush=True)
|
|
857
|
+
start = time.time()
|
|
858
|
+
try:
|
|
859
|
+
subprocess.run(args, check=True, cwd=dest.parent, capture_output=True)
|
|
860
|
+
except subprocess.CalledProcessError as e:
|
|
861
|
+
raise RuntimeError(f"{e.stderr}\nwhen trying to run:{' '.join(args)}")
|
|
862
|
+
duration = time.time() - start
|
|
863
|
+
print(
|
|
864
|
+
f"Finished pandoc on {src} in {duration:.1f}s",
|
|
865
|
+
flush=True,
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
try:
|
|
870
|
+
from lxml import html as lxml_html
|
|
871
|
+
except ImportError:
|
|
872
|
+
lxml_html = None
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
def parse_headings(html_path: Path):
|
|
876
|
+
"""Return a nested list of headings found in ``html_path``."""
|
|
877
|
+
if lxml_html is None:
|
|
878
|
+
return []
|
|
879
|
+
parser = lxml_html.HTMLParser(encoding="utf-8")
|
|
880
|
+
tree = lxml_html.parse(str(html_path), parser)
|
|
881
|
+
root = tree.getroot()
|
|
882
|
+
headings = root.xpath("//h1|//h2|//h3|//h4|//h5|//h6")
|
|
883
|
+
|
|
884
|
+
# Skip headings used for the page title which Quarto renders with the
|
|
885
|
+
# ``title`` class. Including these in the navigation duplicates the page
|
|
886
|
+
# title entry in the section list.
|
|
887
|
+
def is_page_title(h):
|
|
888
|
+
cls = h.get("class") or ""
|
|
889
|
+
return "title" in cls.split()
|
|
890
|
+
|
|
891
|
+
headings = [h for h in headings if not is_page_title(h)]
|
|
892
|
+
items: list[dict] = []
|
|
893
|
+
stack = []
|
|
894
|
+
for h in headings:
|
|
895
|
+
level = int(h.tag[1])
|
|
896
|
+
text = "".join(h.itertext()).strip()
|
|
897
|
+
ident = h.get("id")
|
|
898
|
+
node = {"level": level, "text": text, "id": ident, "children": []}
|
|
899
|
+
while stack and stack[-1]["level"] >= level:
|
|
900
|
+
stack.pop()
|
|
901
|
+
if stack:
|
|
902
|
+
stack[-1]["children"].append(node)
|
|
903
|
+
else:
|
|
904
|
+
items.append(node)
|
|
905
|
+
stack.append(node)
|
|
906
|
+
return items
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
def read_title(qmd: Path) -> str:
|
|
910
|
+
text = qmd.read_text()
|
|
911
|
+
if text.startswith("---"):
|
|
912
|
+
end = text.find("\n---", 3)
|
|
913
|
+
if end != -1:
|
|
914
|
+
try:
|
|
915
|
+
meta = yaml.safe_load(text[3:end])
|
|
916
|
+
if isinstance(meta, dict) and "title" in meta:
|
|
917
|
+
return str(meta["title"])
|
|
918
|
+
except Exception:
|
|
919
|
+
pass
|
|
920
|
+
m = re.search(r"^#\s+(.+)", text, re.MULTILINE)
|
|
921
|
+
if m:
|
|
922
|
+
return m.group(1).strip()
|
|
923
|
+
return qmd.stem
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
def add_navigation(html_path: Path, pages: list[dict], current: str):
|
|
927
|
+
"""Insert navigation menu for ``html_path`` using ``pages`` data."""
|
|
928
|
+
parser = lxml_html.HTMLParser(encoding="utf-8")
|
|
929
|
+
tree = lxml_html.parse(str(html_path), parser)
|
|
930
|
+
root = tree.getroot()
|
|
931
|
+
body = root.xpath("//body")
|
|
932
|
+
if not body:
|
|
933
|
+
return
|
|
934
|
+
# remove any existing navigation to keep incremental updates clean
|
|
935
|
+
for old in root.xpath('//*[@id="on-this-page"]'):
|
|
936
|
+
parent = old.getparent()
|
|
937
|
+
if parent is not None:
|
|
938
|
+
parent.remove(old)
|
|
939
|
+
for old in root.xpath("//style[contains(., '#on-this-page')]"):
|
|
940
|
+
parent = old.getparent()
|
|
941
|
+
if parent is not None:
|
|
942
|
+
parent.remove(old)
|
|
943
|
+
for old in root.xpath("//script[contains(., 'on-this-page')]"):
|
|
944
|
+
parent = old.getparent()
|
|
945
|
+
if parent is not None:
|
|
946
|
+
parent.remove(old)
|
|
947
|
+
|
|
948
|
+
env = Environment(loader=FileSystemLoader(str(NAV_TEMPLATE.parent)))
|
|
949
|
+
tmpl = env.get_template(NAV_TEMPLATE.name)
|
|
950
|
+
local_pages = []
|
|
951
|
+
for page in pages:
|
|
952
|
+
href_path = (DISPLAY_DIR / page["file"]).with_suffix(".html")
|
|
953
|
+
href = os.path.relpath(href_path, html_path.parent)
|
|
954
|
+
local_pages.append({**page, "href": href})
|
|
955
|
+
rendered = tmpl.render(pages=local_pages, current=current)
|
|
956
|
+
frags = lxml_html.fragments_fromstring(rendered)
|
|
957
|
+
head = root.xpath("//head")
|
|
958
|
+
head = head[0] if head else None
|
|
959
|
+
for frag in frags:
|
|
960
|
+
if frag.tag == "style" and head is not None:
|
|
961
|
+
head.append(frag)
|
|
962
|
+
else:
|
|
963
|
+
body[0].insert(0, frag)
|
|
964
|
+
tree.write(str(html_path), encoding="utf-8", method="html")
|
|
965
|
+
|
|
966
|
+
|
|
967
|
+
def postprocess_html(html_path: Path, include_root: Path, resource_root: Path):
|
|
968
|
+
"""Replace placeholder nodes with referenced HTML bodies."""
|
|
969
|
+
root = lxml_html.fromstring(html_path.read_text())
|
|
970
|
+
# keep processing until no include placeholders remain so nested includes
|
|
971
|
+
# are fully expanded in the served HTML
|
|
972
|
+
while True:
|
|
973
|
+
nodes = list(root.xpath("//*[@data-include] | //*[@data-embed]"))
|
|
974
|
+
if not nodes:
|
|
975
|
+
break
|
|
976
|
+
progress = False
|
|
977
|
+
for node in nodes:
|
|
978
|
+
target_rel = node.get("data-source")
|
|
979
|
+
if not target_rel:
|
|
980
|
+
target_rel = node.get("data-include") or node.get("data-embed")
|
|
981
|
+
target = (include_root / target_rel).resolve()
|
|
982
|
+
if target.exists():
|
|
983
|
+
# announce include substitutions so the console logs which
|
|
984
|
+
# staged fragments feed each served page
|
|
985
|
+
try:
|
|
986
|
+
dest_rel = html_path.relative_to(DISPLAY_DIR).as_posix()
|
|
987
|
+
except ValueError:
|
|
988
|
+
dest_rel = html_path.name
|
|
989
|
+
print(f"including {target_rel} into {dest_rel}")
|
|
990
|
+
frag_text = target.read_text()
|
|
991
|
+
frag = lxml_html.fromstring(frag_text)
|
|
992
|
+
body = frag.xpath("body")
|
|
993
|
+
if body:
|
|
994
|
+
elems = list(body[0])
|
|
995
|
+
else:
|
|
996
|
+
elems = [frag]
|
|
997
|
+
parent = node.getparent()
|
|
998
|
+
if parent is None:
|
|
999
|
+
continue
|
|
1000
|
+
idx = parent.index(node)
|
|
1001
|
+
parent.remove(node)
|
|
1002
|
+
end_c = lxml_html.HtmlComment(f"END include {target_rel}")
|
|
1003
|
+
start_c = lxml_html.HtmlComment(f"BEGIN include {target_rel}")
|
|
1004
|
+
parent.insert(idx, end_c)
|
|
1005
|
+
for elem in reversed(elems):
|
|
1006
|
+
parent.insert(idx, elem)
|
|
1007
|
+
parent.insert(idx, start_c)
|
|
1008
|
+
progress = True
|
|
1009
|
+
else:
|
|
1010
|
+
parent = node.getparent()
|
|
1011
|
+
if parent is not None:
|
|
1012
|
+
placeholder = lxml_html.fragment_fromstring(
|
|
1013
|
+
'<div style="color:red;font-weight:bold">'
|
|
1014
|
+
f"Waiting for pandoc on {target_rel} to complete..."
|
|
1015
|
+
"</div>",
|
|
1016
|
+
create_parent=False,
|
|
1017
|
+
)
|
|
1018
|
+
idx = parent.index(node)
|
|
1019
|
+
parent.remove(node)
|
|
1020
|
+
parent.insert(idx, placeholder)
|
|
1021
|
+
progress = True
|
|
1022
|
+
if not progress:
|
|
1023
|
+
break
|
|
1024
|
+
# ensure MathJax references point at the provided resource root so the
|
|
1025
|
+
# served HTML loads scripts from the display tree instead of the staging
|
|
1026
|
+
# area.
|
|
1027
|
+
math_nodes = root.xpath(
|
|
1028
|
+
'//*[@class="math inline" or @class="math display"]'
|
|
1029
|
+
)
|
|
1030
|
+
if math_nodes:
|
|
1031
|
+
head = root.xpath("//head")
|
|
1032
|
+
if head:
|
|
1033
|
+
math_path = os.path.relpath(
|
|
1034
|
+
resource_root / "mathjax" / "es5" / "tex-mml-chtml.js",
|
|
1035
|
+
html_path.parent,
|
|
1036
|
+
)
|
|
1037
|
+
existing = root.xpath('//script[contains(@src, "MathJax")]')
|
|
1038
|
+
if existing:
|
|
1039
|
+
for node in existing:
|
|
1040
|
+
node.set("src", math_path)
|
|
1041
|
+
node.set("id", node.get("id") or "MathJax-script")
|
|
1042
|
+
node.set("async", "")
|
|
1043
|
+
else:
|
|
1044
|
+
script = lxml_html.fragment_fromstring(
|
|
1045
|
+
'<script id="MathJax-script" async'
|
|
1046
|
+
f' src="{math_path}"></script>',
|
|
1047
|
+
create_parent=False,
|
|
1048
|
+
)
|
|
1049
|
+
head[0].append(script)
|
|
1050
|
+
html_path.write_text(lxml_html.tostring(root, encoding="unicode"))
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
def substitute_code_placeholders(
|
|
1054
|
+
html_path: Path,
|
|
1055
|
+
outputs: dict[tuple[str, int], str],
|
|
1056
|
+
codes: dict[tuple[str, int], str],
|
|
1057
|
+
) -> None:
|
|
1058
|
+
"""Replace script placeholders in ``html_path`` using executed outputs and
|
|
1059
|
+
embed syntax highlighted source code.
|
|
1060
|
+
"""
|
|
1061
|
+
parser = lxml_html.HTMLParser(encoding="utf-8")
|
|
1062
|
+
tree = lxml_html.parse(str(html_path), parser)
|
|
1063
|
+
root = tree.getroot()
|
|
1064
|
+
formatter = HtmlFormatter()
|
|
1065
|
+
head = root.xpath("//head")
|
|
1066
|
+
if head and not root.xpath('//style[@id="pygments-style"]'):
|
|
1067
|
+
style = formatter.get_style_defs(".highlight")
|
|
1068
|
+
style_node = lxml_html.fragment_fromstring(
|
|
1069
|
+
f'<style id="pygments-style">{style}</style>', create_parent=False
|
|
1070
|
+
)
|
|
1071
|
+
head[0].append(style_node)
|
|
1072
|
+
changed = False
|
|
1073
|
+
for node in list(root.xpath("//div[@data-script][@data-index]")):
|
|
1074
|
+
src = node.get("data-script")
|
|
1075
|
+
try:
|
|
1076
|
+
idx = int(node.get("data-index", "0"))
|
|
1077
|
+
except ValueError:
|
|
1078
|
+
idx = 0
|
|
1079
|
+
missing_output = (src, idx) not in outputs
|
|
1080
|
+
if missing_output:
|
|
1081
|
+
html = ""
|
|
1082
|
+
else:
|
|
1083
|
+
html = outputs[(src, idx)]
|
|
1084
|
+
if (src, idx) in codes:
|
|
1085
|
+
code = codes[(src, idx)]
|
|
1086
|
+
else:
|
|
1087
|
+
code = ""
|
|
1088
|
+
code_html = highlight(code, PythonLexer(), formatter)
|
|
1089
|
+
frags = lxml_html.fragments_fromstring(code_html)
|
|
1090
|
+
if not missing_output and html:
|
|
1091
|
+
frags += lxml_html.fragments_fromstring(html)
|
|
1092
|
+
elif missing_output:
|
|
1093
|
+
# Only show the placeholder when the notebook output entry is
|
|
1094
|
+
# absent so executed cells that intentionally produce no output
|
|
1095
|
+
# simply render the source code.
|
|
1096
|
+
waiting = lxml_html.fragment_fromstring(
|
|
1097
|
+
'<div style="color:red;font-weight:bold">'
|
|
1098
|
+
f"Running notebook {src}..."
|
|
1099
|
+
"</div>",
|
|
1100
|
+
create_parent=False,
|
|
1101
|
+
)
|
|
1102
|
+
frags.append(waiting)
|
|
1103
|
+
parent = node.getparent()
|
|
1104
|
+
if parent is None:
|
|
1105
|
+
continue
|
|
1106
|
+
pos = parent.index(node)
|
|
1107
|
+
parent.remove(node)
|
|
1108
|
+
for frag in reversed(frags):
|
|
1109
|
+
parent.insert(pos, frag)
|
|
1110
|
+
changed = True
|
|
1111
|
+
if changed:
|
|
1112
|
+
tree.write(str(html_path), encoding="utf-8", method="html")
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
def build_all(webtex: bool = False, changed_paths=None):
|
|
1116
|
+
ensure_pandoc_available()
|
|
1117
|
+
ensure_pandoc_crossref()
|
|
1118
|
+
ensure_template_assets(PROJECT_ROOT)
|
|
1119
|
+
BUILD_DIR.mkdir(parents=True, exist_ok=True)
|
|
1120
|
+
DISPLAY_DIR.mkdir(parents=True, exist_ok=True)
|
|
1121
|
+
if not webtex:
|
|
1122
|
+
ensure_mathjax()
|
|
1123
|
+
# copy MathJax into the display tree so browsers load assets from the
|
|
1124
|
+
# served directory while the staging area remains limited to fragments.
|
|
1125
|
+
shutil.copytree(
|
|
1126
|
+
MATHJAX_DIR, DISPLAY_DIR / "mathjax", dirs_exist_ok=True
|
|
1127
|
+
)
|
|
1128
|
+
# copy project configuration without the render list so individual renders
|
|
1129
|
+
# don't attempt to build the entire project
|
|
1130
|
+
if yaml is not None:
|
|
1131
|
+
cfg = yaml.safe_load(Path("_quarto.yml").read_text())
|
|
1132
|
+
if "project" in cfg and "render" in cfg["project"]:
|
|
1133
|
+
cfg["project"]["render"] = []
|
|
1134
|
+
(BUILD_DIR / "_quarto.yml").write_text(yaml.safe_dump(cfg))
|
|
1135
|
+
else:
|
|
1136
|
+
# Without PyYAML, copy the config as-is so the builder can still
|
|
1137
|
+
# produce placeholder outputs.
|
|
1138
|
+
(BUILD_DIR / "_quarto.yml").write_text(Path("_quarto.yml").read_text())
|
|
1139
|
+
if Path("_template/obs.lua").exists():
|
|
1140
|
+
shutil.copy2("_template/obs.lua", BUILD_DIR / "obs.lua")
|
|
1141
|
+
|
|
1142
|
+
checksums = load_checksums()
|
|
1143
|
+
|
|
1144
|
+
render_files = load_rendered_files()
|
|
1145
|
+
bibliography, csl = load_bibliography_csl()
|
|
1146
|
+
tree, roots, include_map = analyze_includes(render_files)
|
|
1147
|
+
graph = RenderNotebook(render_files, tree, include_map)
|
|
1148
|
+
graph.mark_outdated(checksums)
|
|
1149
|
+
anchors = collect_anchors(render_files, include_map)
|
|
1150
|
+
|
|
1151
|
+
if changed_paths:
|
|
1152
|
+
normalized = set()
|
|
1153
|
+
for path in changed_paths:
|
|
1154
|
+
candidate = Path(path)
|
|
1155
|
+
if not candidate.exists():
|
|
1156
|
+
continue
|
|
1157
|
+
try:
|
|
1158
|
+
rel = candidate.resolve().relative_to(PROJECT_ROOT)
|
|
1159
|
+
except ValueError:
|
|
1160
|
+
continue
|
|
1161
|
+
if rel.suffix != ".qmd":
|
|
1162
|
+
continue
|
|
1163
|
+
normalized.add(rel.as_posix())
|
|
1164
|
+
stage_set = set(graph.stage_targets(normalized))
|
|
1165
|
+
display_targets = collect_render_targets(
|
|
1166
|
+
stage_set, include_map, render_files
|
|
1167
|
+
)
|
|
1168
|
+
for rel in stage_set:
|
|
1169
|
+
if rel in render_files:
|
|
1170
|
+
display_targets.add(rel)
|
|
1171
|
+
if not stage_set and not display_targets:
|
|
1172
|
+
return {
|
|
1173
|
+
"render_files": render_files,
|
|
1174
|
+
"tree": tree,
|
|
1175
|
+
"include_map": include_map,
|
|
1176
|
+
}
|
|
1177
|
+
else:
|
|
1178
|
+
stage_set = set(graph.stage_targets(None))
|
|
1179
|
+
display_targets = set(render_files)
|
|
1180
|
+
|
|
1181
|
+
stage_files = sorted(stage_set)
|
|
1182
|
+
# phase 1: rebuild the modified sources into the staging tree
|
|
1183
|
+
code_blocks = mirror_and_modify(stage_files, anchors, roots)
|
|
1184
|
+
|
|
1185
|
+
# Start notebook execution immediately so it can run while pandoc renders.
|
|
1186
|
+
notebook_executor = None
|
|
1187
|
+
notebook_future = None
|
|
1188
|
+
outputs = {}
|
|
1189
|
+
code_map = {}
|
|
1190
|
+
if code_blocks:
|
|
1191
|
+
notebook_executor = ThreadPoolExecutor(max_workers=1)
|
|
1192
|
+
notebook_future = notebook_executor.submit(
|
|
1193
|
+
execute_code_blocks, code_blocks
|
|
1194
|
+
)
|
|
1195
|
+
|
|
1196
|
+
order = graph.render_order()
|
|
1197
|
+
render_targets = [f for f in order if f in stage_set]
|
|
1198
|
+
if render_targets:
|
|
1199
|
+
workers = max(1, min(len(render_targets), 4))
|
|
1200
|
+
tasks = []
|
|
1201
|
+
with ThreadPoolExecutor(max_workers=workers) as pool:
|
|
1202
|
+
for f in render_targets:
|
|
1203
|
+
fragment = f not in render_files
|
|
1204
|
+
future = pool.submit(
|
|
1205
|
+
render_file,
|
|
1206
|
+
Path(f),
|
|
1207
|
+
BUILD_DIR / f,
|
|
1208
|
+
fragment,
|
|
1209
|
+
bibliography,
|
|
1210
|
+
csl,
|
|
1211
|
+
webtex,
|
|
1212
|
+
)
|
|
1213
|
+
tasks.append((f, future))
|
|
1214
|
+
for future in as_completed([t[1] for t in tasks]):
|
|
1215
|
+
for pair in tasks:
|
|
1216
|
+
if pair[1] is future:
|
|
1217
|
+
print(f"Pandoc finished for {pair[0]}")
|
|
1218
|
+
break
|
|
1219
|
+
|
|
1220
|
+
graph.update_checksums(checksums)
|
|
1221
|
+
save_checksums(checksums)
|
|
1222
|
+
|
|
1223
|
+
# phase 2: insert whatever notebook output is available into staged pages
|
|
1224
|
+
if notebook_future and notebook_future.done():
|
|
1225
|
+
outputs, code_map = notebook_future.result()
|
|
1226
|
+
notebook_executor.shutdown(wait=False)
|
|
1227
|
+
notebook_executor = None
|
|
1228
|
+
notebook_future = None
|
|
1229
|
+
for f in stage_files:
|
|
1230
|
+
html_file = (BUILD_DIR / f).with_suffix(".html")
|
|
1231
|
+
if html_file.exists():
|
|
1232
|
+
substitute_code_placeholders(html_file, outputs, code_map)
|
|
1233
|
+
|
|
1234
|
+
# phase 3: assemble the served pages from staged fragments
|
|
1235
|
+
for target in sorted(display_targets):
|
|
1236
|
+
src_html = (BUILD_DIR / target).with_suffix(".html")
|
|
1237
|
+
dest_html = (DISPLAY_DIR / target).with_suffix(".html")
|
|
1238
|
+
if not src_html.exists():
|
|
1239
|
+
dest_html.parent.mkdir(parents=True, exist_ok=True)
|
|
1240
|
+
dest_html.write_text(
|
|
1241
|
+
"<html><body><div style='color:red;font-weight:bold'>"
|
|
1242
|
+
f"Waiting for pandoc on {target} to complete..."
|
|
1243
|
+
"</div>"
|
|
1244
|
+
"</body></html>"
|
|
1245
|
+
)
|
|
1246
|
+
continue
|
|
1247
|
+
dest_html.parent.mkdir(parents=True, exist_ok=True)
|
|
1248
|
+
shutil.copy2(src_html, dest_html)
|
|
1249
|
+
# build includes using staged fragments and rewrite math assets to the
|
|
1250
|
+
# display tree that the web server presents.
|
|
1251
|
+
postprocess_html(dest_html, BUILD_DIR, DISPLAY_DIR)
|
|
1252
|
+
|
|
1253
|
+
# phase 4: wait for notebooks to finish, then refresh staged and served
|
|
1254
|
+
# pages with the completed outputs so the browser updates when work ends.
|
|
1255
|
+
if notebook_future:
|
|
1256
|
+
outputs, code_map = notebook_future.result()
|
|
1257
|
+
notebook_executor.shutdown(wait=False)
|
|
1258
|
+
for f in stage_files:
|
|
1259
|
+
html_file = (BUILD_DIR / f).with_suffix(".html")
|
|
1260
|
+
if html_file.exists():
|
|
1261
|
+
substitute_code_placeholders(html_file, outputs, code_map)
|
|
1262
|
+
for target in sorted(display_targets):
|
|
1263
|
+
src_html = (BUILD_DIR / target).with_suffix(".html")
|
|
1264
|
+
dest_html = (DISPLAY_DIR / target).with_suffix(".html")
|
|
1265
|
+
if not src_html.exists():
|
|
1266
|
+
continue
|
|
1267
|
+
dest_html.parent.mkdir(parents=True, exist_ok=True)
|
|
1268
|
+
shutil.copy2(src_html, dest_html)
|
|
1269
|
+
postprocess_html(dest_html, BUILD_DIR, DISPLAY_DIR)
|
|
1270
|
+
|
|
1271
|
+
pages = []
|
|
1272
|
+
for qmd in render_files:
|
|
1273
|
+
html_file = (DISPLAY_DIR / qmd).with_suffix(".html")
|
|
1274
|
+
source_path = PROJECT_ROOT / qmd
|
|
1275
|
+
if not source_path.exists():
|
|
1276
|
+
# Make it obvious which path is missing and keep the display tree
|
|
1277
|
+
# consistent by creating a placeholder page until pandoc produces
|
|
1278
|
+
# the real output.
|
|
1279
|
+
placeholder = (
|
|
1280
|
+
"<html><body><div style='color:red;font-weight:bold'>"
|
|
1281
|
+
f"Missing source file {source_path}"
|
|
1282
|
+
"</div></body></html>"
|
|
1283
|
+
)
|
|
1284
|
+
html_file.parent.mkdir(parents=True, exist_ok=True)
|
|
1285
|
+
html_file.write_text(placeholder)
|
|
1286
|
+
print(f"Cannot read title; missing source: {source_path}")
|
|
1287
|
+
continue
|
|
1288
|
+
if html_file.exists():
|
|
1289
|
+
sections = parse_headings(html_file)
|
|
1290
|
+
pages.append(
|
|
1291
|
+
{
|
|
1292
|
+
"file": qmd,
|
|
1293
|
+
"href": html_file.name,
|
|
1294
|
+
"title": read_title(source_path),
|
|
1295
|
+
"sections": sections,
|
|
1296
|
+
}
|
|
1297
|
+
)
|
|
1298
|
+
|
|
1299
|
+
for page in pages:
|
|
1300
|
+
html_file = (DISPLAY_DIR / page["file"]).with_suffix(".html")
|
|
1301
|
+
if html_file.exists():
|
|
1302
|
+
add_navigation(html_file, pages, page["file"])
|
|
1303
|
+
|
|
1304
|
+
return {
|
|
1305
|
+
"render_files": render_files,
|
|
1306
|
+
"tree": tree,
|
|
1307
|
+
"include_map": include_map,
|
|
1308
|
+
}
|
|
1309
|
+
|
|
1310
|
+
|
|
1311
|
+
class BrowserReloader:
|
|
1312
|
+
def __init__(self, url: str):
|
|
1313
|
+
self.url = url
|
|
1314
|
+
self.init_browser()
|
|
1315
|
+
|
|
1316
|
+
def init_browser(self):
|
|
1317
|
+
if webdriver is None:
|
|
1318
|
+
raise ImportError(
|
|
1319
|
+
"Browser refresh support requires the optional 'selenium'"
|
|
1320
|
+
" package."
|
|
1321
|
+
)
|
|
1322
|
+
try:
|
|
1323
|
+
self.browser = webdriver.Chrome()
|
|
1324
|
+
except Exception:
|
|
1325
|
+
self.browser = webdriver.Firefox()
|
|
1326
|
+
self.browser.get(self.url)
|
|
1327
|
+
|
|
1328
|
+
def refresh(self):
|
|
1329
|
+
"""Refresh the page if the browser is still open."""
|
|
1330
|
+
if not self.browser:
|
|
1331
|
+
return
|
|
1332
|
+
try:
|
|
1333
|
+
self.browser.refresh()
|
|
1334
|
+
except WebDriverException:
|
|
1335
|
+
try:
|
|
1336
|
+
self.browser.quit()
|
|
1337
|
+
except Exception:
|
|
1338
|
+
pass
|
|
1339
|
+
self.browser = None
|
|
1340
|
+
|
|
1341
|
+
def is_alive(self) -> bool:
|
|
1342
|
+
"""Return True if the browser window is still open."""
|
|
1343
|
+
if not self.browser:
|
|
1344
|
+
return False
|
|
1345
|
+
try:
|
|
1346
|
+
handles = self.browser.window_handles
|
|
1347
|
+
if not handles:
|
|
1348
|
+
return False
|
|
1349
|
+
self.browser.execute_script("return 1")
|
|
1350
|
+
return True
|
|
1351
|
+
except (NoSuchWindowException, WebDriverException):
|
|
1352
|
+
return False
|
|
1353
|
+
|
|
1354
|
+
|
|
1355
|
+
class ChangeHandler(FileSystemEventHandler):
|
|
1356
|
+
def __init__(self, build_func, refresher):
|
|
1357
|
+
self.build = build_func
|
|
1358
|
+
self.refresher = refresher
|
|
1359
|
+
|
|
1360
|
+
def handle(self, path, is_directory):
|
|
1361
|
+
if (
|
|
1362
|
+
not is_directory
|
|
1363
|
+
and path.endswith(".qmd")
|
|
1364
|
+
and "/_build/" not in path
|
|
1365
|
+
and "/_display/" not in path
|
|
1366
|
+
):
|
|
1367
|
+
print(f"Change detected: {path}")
|
|
1368
|
+
self.build(path)
|
|
1369
|
+
self.refresher.refresh()
|
|
1370
|
+
|
|
1371
|
+
def on_modified(self, event):
|
|
1372
|
+
self.handle(event.src_path, event.is_directory)
|
|
1373
|
+
|
|
1374
|
+
def on_created(self, event):
|
|
1375
|
+
self.handle(event.src_path, event.is_directory)
|
|
1376
|
+
|
|
1377
|
+
def on_moved(self, event):
|
|
1378
|
+
self.handle(event.dest_path, event.is_directory)
|
|
1379
|
+
|
|
1380
|
+
|
|
1381
|
+
def _serve_forever(httpd: ThreadingHTTPServer):
|
|
1382
|
+
"""Run the HTTP server until shutdown is called."""
|
|
1383
|
+
httpd.serve_forever()
|
|
1384
|
+
|
|
1385
|
+
|
|
1386
|
+
def watch_and_serve(no_browser: bool = False, webtex: bool = False):
|
|
1387
|
+
state = build_all(webtex=webtex)
|
|
1388
|
+
if no_browser:
|
|
1389
|
+
# In headless scenarios we only need the build artifacts and can exit
|
|
1390
|
+
# immediately instead of launching a server loop that waits for a
|
|
1391
|
+
# browser connection.
|
|
1392
|
+
return state
|
|
1393
|
+
port = 8000
|
|
1394
|
+
render_files = state["render_files"]
|
|
1395
|
+
|
|
1396
|
+
if render_files:
|
|
1397
|
+
start_page = Path(render_files[0]).with_suffix(".html").as_posix()
|
|
1398
|
+
else:
|
|
1399
|
+
start_page = ""
|
|
1400
|
+
url = f"http://localhost:{port}/{start_page}"
|
|
1401
|
+
|
|
1402
|
+
print("Watching project root:")
|
|
1403
|
+
print(" ", PROJECT_ROOT)
|
|
1404
|
+
|
|
1405
|
+
class Handler(SimpleHTTPRequestHandler):
|
|
1406
|
+
def __init__(self, *args, **kwargs):
|
|
1407
|
+
super().__init__(*args, directory=str(DISPLAY_DIR), **kwargs)
|
|
1408
|
+
|
|
1409
|
+
def translate_path(self, path):
|
|
1410
|
+
rel = path.lstrip("/")
|
|
1411
|
+
if not rel:
|
|
1412
|
+
rel = ""
|
|
1413
|
+
display_root = DISPLAY_DIR.resolve()
|
|
1414
|
+
build_root = BUILD_DIR.resolve()
|
|
1415
|
+
if rel == "_build":
|
|
1416
|
+
return str(build_root)
|
|
1417
|
+
if rel.startswith("_build/"):
|
|
1418
|
+
inner = rel.split("/", 1)[1]
|
|
1419
|
+
candidate = (BUILD_DIR / inner).resolve()
|
|
1420
|
+
if (
|
|
1421
|
+
str(candidate).startswith(str(build_root))
|
|
1422
|
+
and candidate.exists()
|
|
1423
|
+
):
|
|
1424
|
+
return str(candidate)
|
|
1425
|
+
display_candidate = (DISPLAY_DIR / rel).resolve()
|
|
1426
|
+
if display_candidate.exists() and str(
|
|
1427
|
+
display_candidate
|
|
1428
|
+
).startswith(str(display_root)):
|
|
1429
|
+
return str(display_candidate)
|
|
1430
|
+
build_candidate = (BUILD_DIR / rel).resolve()
|
|
1431
|
+
if build_candidate.exists() and str(build_candidate).startswith(
|
|
1432
|
+
str(build_root)
|
|
1433
|
+
):
|
|
1434
|
+
return str(build_candidate)
|
|
1435
|
+
return super().translate_path(path)
|
|
1436
|
+
|
|
1437
|
+
try:
|
|
1438
|
+
httpd = ThreadingHTTPServer(("0.0.0.0", port), Handler)
|
|
1439
|
+
except OSError as exc: # pragma: no cover - depends on local environment
|
|
1440
|
+
print(f"Could not start server on port {port}: {exc}")
|
|
1441
|
+
return
|
|
1442
|
+
print(
|
|
1443
|
+
f"Serving {DISPLAY_DIR} with fallback to {BUILD_DIR} at"
|
|
1444
|
+
f" http://localhost:{port}"
|
|
1445
|
+
)
|
|
1446
|
+
Path(DISPLAY_DIR).mkdir(parents=True, exist_ok=True)
|
|
1447
|
+
threading.Thread(target=_serve_forever, args=(httpd,), daemon=True).start()
|
|
1448
|
+
if no_browser:
|
|
1449
|
+
|
|
1450
|
+
class Dummy:
|
|
1451
|
+
def refresh(self):
|
|
1452
|
+
pass
|
|
1453
|
+
|
|
1454
|
+
refresher = Dummy()
|
|
1455
|
+
else:
|
|
1456
|
+
refresher = BrowserReloader(url)
|
|
1457
|
+
if Observer is None:
|
|
1458
|
+
raise ImportError(
|
|
1459
|
+
"File watching requires the optional 'watchdog' package."
|
|
1460
|
+
)
|
|
1461
|
+
|
|
1462
|
+
observer = Observer()
|
|
1463
|
+
|
|
1464
|
+
def rebuild(path):
|
|
1465
|
+
build_all(webtex=webtex, changed_paths=[path])
|
|
1466
|
+
|
|
1467
|
+
handler = ChangeHandler(rebuild, refresher)
|
|
1468
|
+
observer.schedule(handler, str(PROJECT_ROOT), recursive=True)
|
|
1469
|
+
observer.start()
|
|
1470
|
+
try:
|
|
1471
|
+
while True:
|
|
1472
|
+
if not no_browser and not refresher.is_alive():
|
|
1473
|
+
break
|
|
1474
|
+
time.sleep(1)
|
|
1475
|
+
except KeyboardInterrupt:
|
|
1476
|
+
pass
|
|
1477
|
+
finally:
|
|
1478
|
+
observer.stop()
|
|
1479
|
+
observer.join()
|
|
1480
|
+
httpd.shutdown()
|
|
1481
|
+
httpd.server_close()
|
|
1482
|
+
if not no_browser and getattr(refresher, "browser", None):
|
|
1483
|
+
try:
|
|
1484
|
+
refresher.browser.quit()
|
|
1485
|
+
except Exception:
|
|
1486
|
+
pass
|
|
1487
|
+
|
|
1488
|
+
|
|
1489
|
+
if __name__ == "__main__":
|
|
1490
|
+
parser = argparse.ArgumentParser(description="Build site using Pandoc")
|
|
1491
|
+
parser.add_argument(
|
|
1492
|
+
"--no-browser",
|
|
1493
|
+
action="store_true",
|
|
1494
|
+
help="Do not open a browser when using --watch",
|
|
1495
|
+
)
|
|
1496
|
+
parser.add_argument(
|
|
1497
|
+
"--webtex",
|
|
1498
|
+
action="store_true",
|
|
1499
|
+
help="Use Pandoc's --webtex option instead of MathJax",
|
|
1500
|
+
)
|
|
1501
|
+
args = parser.parse_args()
|
|
1502
|
+
watch_and_serve(no_browser=args.no_browser, webtex=args.webtex)
|