pyDiffTools 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1502 @@
1
+ #!/usr/bin/env python3
2
+ """Minimal build script using Pandoc instead of Quarto."""
3
+
4
+ import argparse
5
+ import hashlib
6
+ import json
7
+ import os
8
+ import re
9
+ import subprocess
10
+ import time
11
+ import traceback
12
+ from concurrent.futures import ThreadPoolExecutor, as_completed
13
+ from pathlib import Path
14
+ from http.server import ThreadingHTTPServer, SimpleHTTPRequestHandler
15
+ import threading
16
+ import shutil
17
+ import yaml
18
+ from pydifftools.command_registry import register_command
19
+ from watchdog.events import FileSystemEventHandler
20
+ from watchdog.observers.polling import PollingObserver as Observer
21
+ from selenium import webdriver
22
+ from selenium.common.exceptions import (
23
+ WebDriverException,
24
+ NoSuchWindowException,
25
+ )
26
+ from jinja2 import Environment, FileSystemLoader
27
+ import nbformat
28
+ from nbconvert.preprocessors import ExecutePreprocessor
29
+ from nbconvert.preprocessors.execute import NotebookClient
30
+ from pygments import highlight
31
+ from pygments.lexers import PythonLexer
32
+ from pygments.formatters import HtmlFormatter
33
+ from ansi2html import Ansi2HTMLConverter
34
+
35
+ _ansi_conv = Ansi2HTMLConverter(inline=True)
36
+
37
+
38
+ def _ansi_to_html(text: str, *, default_style: str | None = None) -> str:
39
+ """Return HTML for text that may contain ANSI escape codes."""
40
+ html = _ansi_conv.convert(text, full=False)
41
+ if default_style and "span class" not in html:
42
+ html = f'<span style="{default_style}">{html}</span>'
43
+ return f"<pre>{html}</pre>"
44
+
45
+
46
+ class LoggingExecutePreprocessor(ExecutePreprocessor):
47
+ """Execute notebook cells with progress printed to stdout."""
48
+
49
+ def preprocess(self, nb, resources=None, km=None):
50
+ NotebookClient.__init__(self, nb, km)
51
+ self.reset_execution_trackers()
52
+ self._check_assign_resources(resources)
53
+ cell_count = len(self.nb.cells)
54
+
55
+ with self.setup_kernel():
56
+ assert self.kc
57
+ info_msg = self.wait_for_reply(self.kc.kernel_info())
58
+ assert info_msg
59
+ self.nb.metadata["language_info"] = info_msg["content"][
60
+ "language_info"
61
+ ]
62
+ for index, cell in enumerate(self.nb.cells):
63
+ print(
64
+ f"Executing cell {index + 1}/{cell_count}...", flush=True
65
+ )
66
+ self.preprocess_cell(cell, resources, index)
67
+ self.set_widgets_metadata()
68
+
69
+ return self.nb, self.resources
70
+
71
+
72
+ include_pattern = re.compile(
73
+ r"\{\{\s*<\s*(include|embed)\s+([^>\s]+)\s*>\s*\}\}"
74
+ )
75
+ # Python code block pattern
76
+ code_pattern = re.compile(r"```\{python[^}]*\}\n(.*?)```", re.DOTALL)
77
+ # Markdown image pattern
78
+ image_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")
79
+
80
+ # Collect anchor definitions {#sec:id}, {#fig:id}, {#tab:id}
81
+ anchor_pattern = re.compile(r"\{#(sec|fig|tab):([A-Za-z0-9_-]+)\}")
82
+ heading_pattern = re.compile(
83
+ r"^(#+)\s+(.*?)\s*\{#(sec|fig|tab):([A-Za-z0-9_-]+)\}"
84
+ )
85
+
86
+
87
+ class RenderNotebook:
88
+ """Track trunks, branches, and leaves along with build state."""
89
+
90
+ def __init__(self, render_files, tree, include_map):
91
+ self.render_files = render_files
92
+ self.tree = tree
93
+ self.include_map = include_map
94
+ self.nodes = {}
95
+ self._build_nodes()
96
+
97
+ def _build_nodes(self):
98
+ for path in self.tree:
99
+ if path not in self.nodes:
100
+ if path in self.tree:
101
+ children = list(self.tree[path])
102
+ else:
103
+ children = []
104
+ if path in self.include_map:
105
+ parents = list(self.include_map[path])
106
+ else:
107
+ parents = []
108
+ self.nodes[path] = {
109
+ "type": "trunk" if path in self.render_files else "branch",
110
+ "children": children,
111
+ "parents": parents,
112
+ "has_notebook": False,
113
+ "needs_build": False,
114
+ }
115
+ for path in list(self.nodes.keys()):
116
+ if (
117
+ not self.nodes[path]["children"]
118
+ and path not in self.render_files
119
+ ):
120
+ self.nodes[path]["type"] = "leaf"
121
+ src = PROJECT_ROOT / path
122
+ if src.exists():
123
+ text = src.read_text()
124
+ self.nodes[path]["has_notebook"] = bool(
125
+ code_pattern.search(text)
126
+ )
127
+
128
+ def all_paths(self):
129
+ return list(self.nodes.keys())
130
+
131
+ def mark_outdated(self, checksums):
132
+ for path in self.nodes:
133
+ src = PROJECT_ROOT / path
134
+ if not src.exists():
135
+ self.nodes[path]["needs_build"] = False
136
+ continue
137
+ new_hash = self._hash_file(src)
138
+ if path in checksums:
139
+ old_hash = checksums[path]
140
+ else:
141
+ old_hash = None
142
+ self.nodes[path]["needs_build"] = new_hash != old_hash
143
+
144
+ def _hash_file(self, path):
145
+ data = path.read_bytes()
146
+ return hashlib.md5(data).hexdigest()
147
+
148
+ def stage_targets(self, changed_paths):
149
+ if changed_paths:
150
+ for path in changed_paths:
151
+ if path in self.nodes:
152
+ self.nodes[path]["needs_build"] = True
153
+ for parent in self.nodes[path]["parents"]:
154
+ if parent in self.nodes:
155
+ self.nodes[parent]["needs_build"] = True
156
+ return sorted([p for p, d in self.nodes.items() if d["needs_build"]])
157
+
158
+ def update_checksums(self, checksums):
159
+ for path, data in self.nodes.items():
160
+ if not data["needs_build"]:
161
+ continue
162
+ src = PROJECT_ROOT / path
163
+ if src.exists():
164
+ checksums[path] = self._hash_file(src)
165
+
166
+ def render_order(self):
167
+ return build_order(self.render_files, self.tree)
168
+
169
+
170
+ def load_checksums():
171
+ path = BUILD_DIR / "checksums.json"
172
+ if path.exists():
173
+ try:
174
+ return json.loads(path.read_text())
175
+ except Exception:
176
+ return {}
177
+ return {}
178
+
179
+
180
+ def save_checksums(checksums):
181
+ path = BUILD_DIR / "checksums.json"
182
+ path.parent.mkdir(parents=True, exist_ok=True)
183
+ path.write_text(json.dumps(checksums, indent=2))
184
+
185
+
186
+ def load_rendered_files():
187
+ text = Path("_quarto.yml").read_text()
188
+ cfg = yaml.safe_load(text)
189
+ return list(cfg.get("project", {}).get("render", []))
190
+
191
+
192
+ def load_bibliography_csl():
193
+ text = Path("_quarto.yml").read_text()
194
+ cfg = yaml.safe_load(text)
195
+ bib = None
196
+ csl = None
197
+ if "bibliography" in cfg:
198
+ bib = cfg["bibliography"]
199
+ if "csl" in cfg:
200
+ csl = cfg["csl"]
201
+ fmt = cfg.get("format", {})
202
+ if isinstance(fmt, dict):
203
+ for v in fmt.values():
204
+ if isinstance(v, dict):
205
+ if bib is None and "bibliography" in v:
206
+ bib = v["bibliography"]
207
+ if csl is None and "csl" in v:
208
+ csl = v["csl"]
209
+ return bib, csl
210
+
211
+
212
+ def outputs_to_html(outputs: list[dict]) -> str:
213
+ """Convert Jupyter cell outputs to HTML with embedded images."""
214
+ parts = []
215
+ for out in outputs:
216
+ typ = out.get("output_type")
217
+ if typ == "stream":
218
+ text = out.get("text", "")
219
+ parts.append(_ansi_to_html(text))
220
+ elif typ in {"display_data", "execute_result"}:
221
+ data = out.get("data", {})
222
+ if "text/html" in data:
223
+ parts.append(data["text/html"])
224
+ elif "image/png" in data:
225
+ src = f"data:image/png;base64,{data['image/png']}"
226
+ parts.append(f"<img src='{src}'/>")
227
+ elif "image/jpeg" in data:
228
+ src = f"data:image/jpeg;base64,{data['image/jpeg']}"
229
+ parts.append(f"<img src='{src}'/>")
230
+ elif "text/plain" in data:
231
+ parts.append(_ansi_to_html(data["text/plain"]))
232
+ elif typ == "error":
233
+ tb = "\n".join(out.get("traceback", []))
234
+ if not tb:
235
+ tb = f"{out.get('ename', '')}: {out.get('evalue', '')}"
236
+ parts.append(_ansi_to_html(tb, default_style="color:red;"))
237
+ return "\n".join(parts)
238
+
239
+
240
+ NOTEBOOK_CACHE_DIR = Path("_nbcache")
241
+
242
+
243
+ def execute_code_blocks(blocks):
244
+ """Run code blocks as Jupyter notebooks with caching."""
245
+ NOTEBOOK_CACHE_DIR.mkdir(parents=True, exist_ok=True)
246
+ outputs = {}
247
+ code_map = {}
248
+ jobs = []
249
+
250
+ # Collect notebook chunks so we can present progress like (1/3).
251
+ for src, cells in blocks.items():
252
+ if not cells:
253
+ continue
254
+ codes = [c for c, _ in cells]
255
+ md5s = [m for _, m in cells]
256
+ groups = []
257
+ current_codes = []
258
+ current_md5s = []
259
+ current_indices = []
260
+ for idx, code in enumerate(codes, start=1):
261
+ stripped = code.lstrip()
262
+ # Split execution into separate notebooks whenever a cell
263
+ # begins with ``%reset -f`` so that changing code after a
264
+ # reset only reruns the affected portion instead of the entire
265
+ # file.
266
+ if current_codes and stripped.startswith("%reset -f"):
267
+ groups.append((current_indices, current_codes, current_md5s))
268
+ current_codes = []
269
+ current_md5s = []
270
+ current_indices = []
271
+ current_codes.append(code)
272
+ current_md5s.append(md5s[idx - 1])
273
+ current_indices.append(idx)
274
+ if current_codes:
275
+ groups.append((current_indices, current_codes, current_md5s))
276
+
277
+ total_groups = len(groups)
278
+ for group_idx, data in enumerate(groups, start=1):
279
+ jobs.append((src, total_groups, group_idx, data, codes))
280
+
281
+ def run_job(job):
282
+ src, total_groups, group_idx, group_data, codes = job
283
+ group_indices, group_codes, group_md5s = group_data
284
+ hash_input = (src + ":" + "".join(group_md5s)).encode()
285
+ nb_hash = hashlib.md5(hash_input).hexdigest()
286
+ nb_path = NOTEBOOK_CACHE_DIR / f"{nb_hash}.ipynb"
287
+ if nb_path.exists():
288
+ print(f"Reading cached output for {src} from {nb_path}!")
289
+ nb = nbformat.read(nb_path, as_version=4)
290
+ else:
291
+ # Report progress with the chunk count for this source.
292
+ print(
293
+ f"Generating notebook ({group_idx}/{total_groups}) "
294
+ f"for {src} at {nb_path}:"
295
+ )
296
+ nb = nbformat.v4.new_notebook()
297
+ nb.cells = [nbformat.v4.new_code_cell(c) for c in group_codes]
298
+ ep = LoggingExecutePreprocessor(
299
+ kernel_name="python3", timeout=10800, allow_errors=True
300
+ )
301
+ try:
302
+ ep.preprocess(
303
+ nb, {"metadata": {"path": str(Path(src).parent)}}
304
+ )
305
+ except Exception as e:
306
+ tb = traceback.format_exc()
307
+ if nb.cells:
308
+ nb.cells[0].outputs = [
309
+ nbformat.v4.new_output(
310
+ output_type="error",
311
+ ename=type(e).__name__,
312
+ evalue=str(e),
313
+ traceback=tb.splitlines(),
314
+ )
315
+ ]
316
+ for cell in nb.cells[1:]:
317
+ cell.outputs = [
318
+ nbformat.v4.new_output(
319
+ output_type="stream",
320
+ name="stderr",
321
+ text="previous cell failed to execute\n",
322
+ )
323
+ ]
324
+ nbformat.write(nb, nb_path)
325
+
326
+ return src, group_indices, nb, codes
327
+
328
+ # Execute notebook chunks concurrently so long-running groups do not block.
329
+ max_workers = max(1, min(len(jobs), 4))
330
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
331
+ futures = [pool.submit(run_job, job) for job in jobs]
332
+ for future in as_completed(futures):
333
+ src, group_indices, nb, codes = future.result()
334
+ for offset, cell in enumerate(nb.cells):
335
+ html = outputs_to_html(cell.get("outputs", []))
336
+ idx = group_indices[offset]
337
+ outputs[(src, idx)] = html
338
+ code_map[(src, idx)] = codes[idx - 1]
339
+
340
+ return outputs, code_map
341
+
342
+
343
+ def analyze_includes(render_files):
344
+ """Analyze include relationships for all render files.
345
+
346
+ Returns a tuple ``(tree, roots, included_by)`` where:
347
+
348
+ * ``tree`` maps each file to the files it directly includes.
349
+ * ``roots`` maps each file to the root directory of the main document
350
+ that ultimately includes it. This keeps include resolution consistent
351
+ with Quarto's behavior.
352
+ * ``included_by`` maps an included file to the files that include it.
353
+ """
354
+
355
+ tree: dict[str, list[str]] = {}
356
+ included_by: dict[str, list[str]] = {}
357
+ visited = set()
358
+
359
+ stack = [Path(f).resolve() for f in render_files]
360
+ root = PROJECT_ROOT
361
+ root_dirs = {
362
+ Path(f).resolve(): Path(f).parent.resolve() for f in render_files
363
+ }
364
+
365
+ while stack:
366
+ current = stack.pop()
367
+ if current in visited or not current.exists():
368
+ continue
369
+ visited.add(current)
370
+ root_dir = root_dirs.get(current, current.parent)
371
+ includes: list[str] = []
372
+ text = current.read_text()
373
+ for _kind, inc in include_pattern.findall(text):
374
+ target = (current.parent / inc).resolve()
375
+ if not target.exists():
376
+ target = (root_dir / inc).resolve()
377
+ if not target.exists():
378
+ target = (root_dir.parent / inc).resolve()
379
+ if not target.exists():
380
+ raise FileNotFoundError(
381
+ f"Include file '{inc}' not found for '{current}'"
382
+ )
383
+ try:
384
+ rel = target.relative_to(root).as_posix()
385
+ except ValueError:
386
+ rel = target.as_posix()
387
+ includes.append(rel)
388
+ stack.append(target)
389
+ root_dirs.setdefault(target, root_dir)
390
+ try:
391
+ cur_rel = current.relative_to(root).as_posix()
392
+ except ValueError:
393
+ cur_rel = current.as_posix()
394
+ included_by.setdefault(rel, []).append(cur_rel)
395
+ try:
396
+ key = current.relative_to(root).as_posix()
397
+ except ValueError:
398
+ key = current.as_posix()
399
+ tree[key] = includes
400
+
401
+ roots_str: dict[str, Path] = {}
402
+ for p, d in root_dirs.items():
403
+ if not p.exists():
404
+ continue
405
+ try:
406
+ rel = p.relative_to(root).as_posix()
407
+ except ValueError:
408
+ rel = p.as_posix()
409
+ roots_str[rel] = d
410
+
411
+ return tree, roots_str, included_by
412
+
413
+
414
+ def resolve_render_file(file, included_by, render_files):
415
+ visited = set()
416
+ while file not in render_files:
417
+ if file in visited or file not in included_by:
418
+ break
419
+ visited.add(file)
420
+ file = included_by[file][0]
421
+ return file
422
+
423
+
424
+ def collect_anchors(render_files, included_by):
425
+ anchors = {}
426
+ for path in Path(".").rglob("*.qmd"):
427
+ if BUILD_DIR in path.parents:
428
+ continue
429
+ lines = path.read_text().splitlines()
430
+ for line in lines:
431
+ for m in anchor_pattern.finditer(line):
432
+ kind, ident = m.group(1), m.group(2)
433
+ key = f"{kind}:{ident}"
434
+ text = ident
435
+ hm = heading_pattern.match(line)
436
+ if hm:
437
+ text = hm.group(2).strip()
438
+ render_file = resolve_render_file(
439
+ path.as_posix(), included_by, render_files
440
+ )
441
+ anchors[key] = (render_file, text)
442
+ return anchors
443
+
444
+
445
+ ref_pattern = re.compile(r"@(sec|fig|tab):([A-Za-z0-9_-]+)")
446
+
447
+
448
+ def replace_refs_text(text, anchors, dest_dir: Path):
449
+ def repl(match):
450
+ kind, ident = match.group(1), match.group(2)
451
+ key = f"{kind}:{ident}"
452
+ if key in anchors:
453
+ file, label = anchors[key]
454
+ html_path = BUILD_DIR / file.replace(".qmd", ".html")
455
+ rel = os.path.relpath(html_path, dest_dir)
456
+ link = f"{rel}#{key}"
457
+ return f"[{label}]({link})"
458
+ return match.group(0)
459
+
460
+ return ref_pattern.sub(repl, text)
461
+
462
+
463
+ def replace_refs(path, anchors):
464
+ content = path.read_text()
465
+ new_content = replace_refs_text(content, anchors, path.parent)
466
+ if new_content != content:
467
+ path.write_text(new_content)
468
+ return True
469
+ return False
470
+
471
+
472
+ BUILD_DIR = Path("_build")
473
+ DISPLAY_DIR = Path("_display")
474
+ BODY_TEMPLATE = Path("_template/body-only.html").resolve()
475
+ PANDOC_TEMPLATE = Path("_template/pandoc_template.html").resolve()
476
+ NAV_TEMPLATE = Path("_template/nav_template.html").resolve()
477
+ MATHJAX_DIR = Path("_template/mathjax").resolve()
478
+ PROJECT_ROOT = Path(".").resolve()
479
+
480
+
481
+ def example_notebook_root():
482
+ """Return the path to the bundled example notebook directory."""
483
+
484
+ return Path(__file__).resolve().parents[2] / "example_notebook"
485
+
486
+
487
+ def download_mathjax(target_dir):
488
+ """Download MathJax into ``target_dir`` if it is missing."""
489
+ target_dir = Path(target_dir)
490
+ script = target_dir / "es5" / "tex-mml-chtml.js"
491
+ if script.exists():
492
+ return
493
+ if os.environ.get("PYDIFFTOOLS_FAKE_MATHJAX"):
494
+ script.parent.mkdir(parents=True, exist_ok=True)
495
+ script.write_text("// fake mathjax for testing")
496
+ return
497
+ tmp = Path("_mjtmp")
498
+ tmp.mkdir(parents=True, exist_ok=True)
499
+ subprocess.run(["npm", "init", "-y"], cwd=tmp, check=True)
500
+ subprocess.run(["npm", "install", "mathjax-full"], cwd=tmp, check=True)
501
+ src = tmp / "node_modules" / "mathjax-full" / "es5"
502
+ (target_dir / "es5").mkdir(parents=True, exist_ok=True)
503
+ shutil.copytree(src, target_dir / "es5", dirs_exist_ok=True)
504
+ shutil.rmtree(tmp)
505
+
506
+
507
+ def ensure_mathjax():
508
+ """Ensure the default MathJax cache exists for builds."""
509
+ download_mathjax(MATHJAX_DIR)
510
+
511
+
512
+ def _copy_resource_tree(resource, dest, overwrite=False):
513
+ dest = Path(dest)
514
+ if resource.is_dir():
515
+ for child in resource.iterdir():
516
+ _copy_resource_tree(child, dest / child.name, overwrite)
517
+ return
518
+ if dest.exists() and not overwrite:
519
+ return
520
+ dest.parent.mkdir(parents=True, exist_ok=True)
521
+ dest.write_bytes(resource.read_bytes())
522
+
523
+
524
+ def ensure_template_assets(project_root, overwrite=False):
525
+ """Copy template assets from the checked-in example notebook when
526
+ present."""
527
+
528
+ template_src = example_notebook_root() / "_template"
529
+ target = Path(project_root) / "_template"
530
+ target.mkdir(parents=True, exist_ok=True)
531
+ if template_src.exists():
532
+ _copy_resource_tree(template_src, target, overwrite)
533
+ # Fall back to simple built-in templates when packaged assets are missing.
534
+ nav_target = target / "nav_template.html"
535
+ if overwrite or not nav_target.exists():
536
+ nav_target.write_text("""
537
+ <style>
538
+ #on-this-page {font-family: sans-serif; border: 1px solid #ddd; padding: \
539
+ 0.5rem; margin-bottom: 1rem;}
540
+ #on-this-page h2 {margin-top: 0; font-size: 1.1rem;}
541
+ #on-this-page ul {list-style: none; padding-left: 0; margin: 0;}
542
+ #on-this-page li {margin: 0.25rem 0;}
543
+ </style>
544
+ <nav id="on-this-page">
545
+ <h2>On this page</h2>
546
+ <ul>
547
+ {% for page in pages %}
548
+ <li><a href="{{ page.href }}">{{ page.title or page.file }}</a></li>
549
+ {% endfor %}
550
+ </ul>
551
+ </nav>
552
+ """)
553
+ body_target = target / "body-only.html"
554
+ if overwrite or not body_target.exists():
555
+ body_target.write_text("""
556
+ <!DOCTYPE html>
557
+ <html>
558
+ <head>
559
+ <meta charset="utf-8">
560
+ $for(header-includes)$
561
+ $header-includes$
562
+ $endfor$
563
+ </head>
564
+ <body>
565
+ $body$
566
+ </body>
567
+ </html>
568
+ """)
569
+ pandoc_target = target / "pandoc_template.html"
570
+ if overwrite or not pandoc_target.exists():
571
+ pandoc_target.write_text(body_target.read_text())
572
+ obs_target = target / "obs.lua"
573
+ if overwrite or not obs_target.exists():
574
+ obs_target.write_text("-- placeholder filter\n")
575
+
576
+
577
+ def _write_placeholder_outputs():
578
+ """Create stub HTML outputs when optional build dependencies
579
+ are missing."""
580
+
581
+ BUILD_DIR.mkdir(parents=True, exist_ok=True)
582
+ for qmd in PROJECT_ROOT.rglob("*.qmd"):
583
+ rel = qmd.relative_to(PROJECT_ROOT)
584
+ target = BUILD_DIR / rel.with_suffix(".html")
585
+ target.parent.mkdir(parents=True, exist_ok=True)
586
+ try:
587
+ content = qmd.read_text()
588
+ except OSError:
589
+ content = ""
590
+ if not content:
591
+ content = f"<html><body>{rel}</body></html>"
592
+ target.write_text(content)
593
+
594
+
595
+ @register_command(
596
+ "Initialize a sample Quarto project with bundled templates",
597
+ help={
598
+ "path": (
599
+ "Directory to initialize (defaults to current working directory)"
600
+ ),
601
+ "force": "Overwrite existing files when copying the scaffold",
602
+ },
603
+ )
604
+ def qmdinit(path, force=False):
605
+ """Copy the example notebook contents into ``path`` for a ready-to-run
606
+ demo."""
607
+
608
+ if path is None:
609
+ path = "."
610
+ source_root = example_notebook_root()
611
+ if not source_root.exists():
612
+ raise RuntimeError("example_notebook directory is missing")
613
+ target = Path(path).resolve()
614
+ # Keep all of the key paths tied to the project we just initialized so
615
+ # subsequent build steps read and write in the expected location even if
616
+ # the module was imported from elsewhere.
617
+ global PROJECT_ROOT, BUILD_DIR, DISPLAY_DIR
618
+ global BODY_TEMPLATE, PANDOC_TEMPLATE, NAV_TEMPLATE, MATHJAX_DIR
619
+ PROJECT_ROOT = target
620
+ BUILD_DIR = PROJECT_ROOT / "_build"
621
+ DISPLAY_DIR = PROJECT_ROOT / "_display"
622
+ BODY_TEMPLATE = PROJECT_ROOT / "_template" / "body-only.html"
623
+ PANDOC_TEMPLATE = PROJECT_ROOT / "_template" / "pandoc_template.html"
624
+ NAV_TEMPLATE = PROJECT_ROOT / "_template" / "nav_template.html"
625
+ MATHJAX_DIR = PROJECT_ROOT / "_template" / "mathjax"
626
+ for child in source_root.iterdir():
627
+ _copy_resource_tree(child, target / child.name, force)
628
+ # Some expected render targets are not present in the checked-in example,
629
+ # so create lightweight placeholders to keep the sample project runnable
630
+ # in isolation.
631
+ projects_qmd = target / "projects.qmd"
632
+ if force or not projects_qmd.exists():
633
+ projects_qmd.write_text("{{< include project1/index.qmd >}}\n")
634
+ notebook_qmd = target / "notebook250708.qmd"
635
+ if force or not notebook_qmd.exists():
636
+ notebook_qmd.write_text("# Example notebook placeholder\n")
637
+ ensure_template_assets(target, overwrite=force)
638
+ download_mathjax(target / "_template" / "mathjax")
639
+ print(f"Initialized Quarto scaffold in {target.resolve()}")
640
+
641
+
642
+ @register_command(
643
+ "Build Quarto-style projects with Pandoc and the fast builder (optionally"
644
+ " watch)",
645
+ help={
646
+ "no_browser": "Do not launch a browser when using --watch",
647
+ "webtex": "Use Pandoc's --webtex option instead of MathJax",
648
+ },
649
+ )
650
+ def qmdb(no_browser=False, webtex=False):
651
+ """Build and watch the current directory using the fast notebook
652
+ builder."""
653
+
654
+ ensure_template_assets(Path("."))
655
+ if yaml is None or nbformat is None or Environment is None:
656
+ # Minimal fallback when optional dependencies are unavailable.
657
+ _write_placeholder_outputs()
658
+ return
659
+ watch_and_serve(no_browser=no_browser, webtex=webtex)
660
+
661
+
662
+ def ensure_pandoc_available():
663
+ """Make sure pandoc is discoverable on PATH."""
664
+ if shutil.which("pandoc"):
665
+ return
666
+ quarto_pandoc = Path("/opt/quarto/bin/tools/x86_64/pandoc")
667
+ if quarto_pandoc.exists():
668
+ os.environ["PATH"] += os.pathsep + str(quarto_pandoc.parent)
669
+ if shutil.which("pandoc"):
670
+ return
671
+ raise RuntimeError(
672
+ "Pandoc not found. Install it from https://pandoc.org/installing.html"
673
+ )
674
+
675
+
676
+ def ensure_pandoc_crossref():
677
+ """Verify pandoc-crossref is installed for reference handling."""
678
+ if shutil.which("pandoc-crossref"):
679
+ return
680
+ raise RuntimeError(
681
+ "pandoc-crossref not found. Install it from"
682
+ " https://github.com/lierdakil/pandoc-crossref"
683
+ )
684
+
685
+
686
+ def all_files(render_files, tree):
687
+ files = {f for f in render_files if Path(f).exists()}
688
+ for src, incs in tree.items():
689
+ if Path(src).exists():
690
+ files.add(src)
691
+ for inc in incs:
692
+ if Path(inc).exists():
693
+ files.add(inc)
694
+ return files
695
+
696
+
697
+ def build_order(render_files, tree):
698
+ order = []
699
+ visited = set()
700
+
701
+ def visit(f):
702
+ if f in visited:
703
+ return
704
+ visited.add(f)
705
+ for child in tree.get(f, []):
706
+ visit(child)
707
+ order.append(f)
708
+
709
+ for f in render_files:
710
+ visit(f)
711
+ return order
712
+
713
+
714
+ def collect_render_targets(targets, included_by, render_files):
715
+ """Find render files impacted by ``targets``."""
716
+ result = set()
717
+ stack = list(targets)
718
+ seen = set()
719
+ render_set = set(render_files)
720
+ while stack:
721
+ current = stack.pop()
722
+ if current in seen:
723
+ continue
724
+ seen.add(current)
725
+ if current in render_set:
726
+ result.add(current)
727
+ if current in included_by:
728
+ for parent in included_by[current]:
729
+ stack.append(parent)
730
+ return result
731
+
732
+
733
+ def mirror_and_modify(files, anchors, roots):
734
+ project_root = PROJECT_ROOT
735
+ code_blocks: dict[str, list[tuple[str, str]]] = {}
736
+ for file in files:
737
+ src = Path(file)
738
+ dest = BUILD_DIR / file
739
+ dest.parent.mkdir(parents=True, exist_ok=True)
740
+ text = src.read_text()
741
+ text = replace_refs_text(text, anchors, dest.parent)
742
+
743
+ root_dir = roots.get(file, src.parent)
744
+
745
+ def repl(match: re.Match) -> str:
746
+ kind, inc = match.groups()
747
+ # include paths are now relative to the main document root
748
+ target_src = (root_dir / inc).resolve()
749
+ if not target_src.exists():
750
+ target_src = (src.parent / inc).resolve()
751
+ if not target_src.exists():
752
+ target_src = (root_dir.parent / inc).resolve()
753
+ target_rel = target_src.relative_to(project_root)
754
+ html_path = (BUILD_DIR / target_rel).with_suffix(".html")
755
+ inc_path = os.path.relpath(html_path, dest.parent)
756
+ # use an element marker preserved by Pandoc
757
+ source_attr = target_rel.with_suffix(".html").as_posix()
758
+ # keep track of the staged include so the display pass can load it
759
+ return (
760
+ f'<div data-{kind.lower()}="{inc_path}" '
761
+ f'data-source="{source_attr}"></div>'
762
+ )
763
+
764
+ text = include_pattern.sub(repl, text)
765
+
766
+ idx = 0
767
+
768
+ def repl_code(match: re.Match) -> str:
769
+ nonlocal idx
770
+ idx += 1
771
+ code = match.group(1)
772
+ md5 = hashlib.md5(code.encode()).hexdigest()
773
+ src_rel = str(src)
774
+ code_blocks.setdefault(src_rel, []).append((code, md5))
775
+ return (
776
+ f'<div data-script="{src_rel}" data-index="{idx}"'
777
+ f' data-md5="{md5}"></div>'
778
+ )
779
+
780
+ text = code_pattern.sub(repl_code, text)
781
+ # copy referenced images into the build directory
782
+ for img in image_pattern.findall(text):
783
+ img_path = img.split()[0]
784
+ if re.match(r"https?://", img_path) or img_path.startswith(
785
+ "data:"
786
+ ):
787
+ continue
788
+ target_src = (src.parent / img_path).resolve()
789
+ if not target_src.exists():
790
+ target_src = (root_dir / img_path).resolve()
791
+ if not target_src.exists():
792
+ target_src = (root_dir.parent / img_path).resolve()
793
+ if target_src.exists():
794
+ try:
795
+ rel = target_src.relative_to(project_root)
796
+ except ValueError:
797
+ continue
798
+ target_dest = BUILD_DIR / rel
799
+ target_dest.parent.mkdir(parents=True, exist_ok=True)
800
+ shutil.copy2(target_src, target_dest)
801
+ dest.write_text(text)
802
+ return code_blocks
803
+
804
+
805
+ def render_file(
806
+ src: Path,
807
+ dest: Path,
808
+ fragment: bool,
809
+ bibliography=None,
810
+ csl=None,
811
+ webtex: bool = False,
812
+ ):
813
+ """Render ``src`` to ``dest`` using Pandoc with embedded resources."""
814
+
815
+ template = BODY_TEMPLATE if fragment else PANDOC_TEMPLATE
816
+ temp = os.path.relpath(
817
+ DISPLAY_DIR / "mathjax" / "es5" / "tex-mml-chtml.js", dest.parent
818
+ )
819
+ math_arg = (
820
+ "--webtex" if webtex else (f"--mathjax={temp}?config=TeX-AMS_CHTML")
821
+ )
822
+ args = [
823
+ "pandoc",
824
+ src.name,
825
+ "--from",
826
+ "markdown+raw_html",
827
+ "--standalone",
828
+ "--embed-resources",
829
+ "--lua-filter",
830
+ os.path.relpath(BUILD_DIR / "obs.lua", dest.parent),
831
+ "--filter",
832
+ "pandoc-crossref",
833
+ "--citeproc",
834
+ math_arg,
835
+ "--template",
836
+ os.path.relpath(template, dest.parent),
837
+ "-o",
838
+ dest.with_suffix(".html").name,
839
+ ]
840
+ if bibliography:
841
+ bib_path = Path(os.path.expanduser(bibliography))
842
+ if not bib_path.is_absolute():
843
+ bib_path = PROJECT_ROOT / bib_path
844
+ if not bib_path.exists():
845
+ raise FileNotFoundError(
846
+ f"Bibliography file {bibliography} not found"
847
+ )
848
+ args += ["--bibliography", os.path.relpath(bib_path, dest.parent)]
849
+ if csl:
850
+ csl_path = Path(os.path.expanduser(csl))
851
+ if not csl_path.is_absolute():
852
+ csl_path = PROJECT_ROOT / csl_path
853
+ if not csl_path.exists():
854
+ raise FileNotFoundError(f"CSL file {csl} not found")
855
+ args += ["--csl", os.path.relpath(csl_path, dest.parent)]
856
+ print(f"Running pandoc on {src}...", flush=True)
857
+ start = time.time()
858
+ try:
859
+ subprocess.run(args, check=True, cwd=dest.parent, capture_output=True)
860
+ except subprocess.CalledProcessError as e:
861
+ raise RuntimeError(f"{e.stderr}\nwhen trying to run:{' '.join(args)}")
862
+ duration = time.time() - start
863
+ print(
864
+ f"Finished pandoc on {src} in {duration:.1f}s",
865
+ flush=True,
866
+ )
867
+
868
+
869
+ try:
870
+ from lxml import html as lxml_html
871
+ except ImportError:
872
+ lxml_html = None
873
+
874
+
875
+ def parse_headings(html_path: Path):
876
+ """Return a nested list of headings found in ``html_path``."""
877
+ if lxml_html is None:
878
+ return []
879
+ parser = lxml_html.HTMLParser(encoding="utf-8")
880
+ tree = lxml_html.parse(str(html_path), parser)
881
+ root = tree.getroot()
882
+ headings = root.xpath("//h1|//h2|//h3|//h4|//h5|//h6")
883
+
884
+ # Skip headings used for the page title which Quarto renders with the
885
+ # ``title`` class. Including these in the navigation duplicates the page
886
+ # title entry in the section list.
887
+ def is_page_title(h):
888
+ cls = h.get("class") or ""
889
+ return "title" in cls.split()
890
+
891
+ headings = [h for h in headings if not is_page_title(h)]
892
+ items: list[dict] = []
893
+ stack = []
894
+ for h in headings:
895
+ level = int(h.tag[1])
896
+ text = "".join(h.itertext()).strip()
897
+ ident = h.get("id")
898
+ node = {"level": level, "text": text, "id": ident, "children": []}
899
+ while stack and stack[-1]["level"] >= level:
900
+ stack.pop()
901
+ if stack:
902
+ stack[-1]["children"].append(node)
903
+ else:
904
+ items.append(node)
905
+ stack.append(node)
906
+ return items
907
+
908
+
909
+ def read_title(qmd: Path) -> str:
910
+ text = qmd.read_text()
911
+ if text.startswith("---"):
912
+ end = text.find("\n---", 3)
913
+ if end != -1:
914
+ try:
915
+ meta = yaml.safe_load(text[3:end])
916
+ if isinstance(meta, dict) and "title" in meta:
917
+ return str(meta["title"])
918
+ except Exception:
919
+ pass
920
+ m = re.search(r"^#\s+(.+)", text, re.MULTILINE)
921
+ if m:
922
+ return m.group(1).strip()
923
+ return qmd.stem
924
+
925
+
926
+ def add_navigation(html_path: Path, pages: list[dict], current: str):
927
+ """Insert navigation menu for ``html_path`` using ``pages`` data."""
928
+ parser = lxml_html.HTMLParser(encoding="utf-8")
929
+ tree = lxml_html.parse(str(html_path), parser)
930
+ root = tree.getroot()
931
+ body = root.xpath("//body")
932
+ if not body:
933
+ return
934
+ # remove any existing navigation to keep incremental updates clean
935
+ for old in root.xpath('//*[@id="on-this-page"]'):
936
+ parent = old.getparent()
937
+ if parent is not None:
938
+ parent.remove(old)
939
+ for old in root.xpath("//style[contains(., '#on-this-page')]"):
940
+ parent = old.getparent()
941
+ if parent is not None:
942
+ parent.remove(old)
943
+ for old in root.xpath("//script[contains(., 'on-this-page')]"):
944
+ parent = old.getparent()
945
+ if parent is not None:
946
+ parent.remove(old)
947
+
948
+ env = Environment(loader=FileSystemLoader(str(NAV_TEMPLATE.parent)))
949
+ tmpl = env.get_template(NAV_TEMPLATE.name)
950
+ local_pages = []
951
+ for page in pages:
952
+ href_path = (DISPLAY_DIR / page["file"]).with_suffix(".html")
953
+ href = os.path.relpath(href_path, html_path.parent)
954
+ local_pages.append({**page, "href": href})
955
+ rendered = tmpl.render(pages=local_pages, current=current)
956
+ frags = lxml_html.fragments_fromstring(rendered)
957
+ head = root.xpath("//head")
958
+ head = head[0] if head else None
959
+ for frag in frags:
960
+ if frag.tag == "style" and head is not None:
961
+ head.append(frag)
962
+ else:
963
+ body[0].insert(0, frag)
964
+ tree.write(str(html_path), encoding="utf-8", method="html")
965
+
966
+
967
+ def postprocess_html(html_path: Path, include_root: Path, resource_root: Path):
968
+ """Replace placeholder nodes with referenced HTML bodies."""
969
+ root = lxml_html.fromstring(html_path.read_text())
970
+ # keep processing until no include placeholders remain so nested includes
971
+ # are fully expanded in the served HTML
972
+ while True:
973
+ nodes = list(root.xpath("//*[@data-include] | //*[@data-embed]"))
974
+ if not nodes:
975
+ break
976
+ progress = False
977
+ for node in nodes:
978
+ target_rel = node.get("data-source")
979
+ if not target_rel:
980
+ target_rel = node.get("data-include") or node.get("data-embed")
981
+ target = (include_root / target_rel).resolve()
982
+ if target.exists():
983
+ # announce include substitutions so the console logs which
984
+ # staged fragments feed each served page
985
+ try:
986
+ dest_rel = html_path.relative_to(DISPLAY_DIR).as_posix()
987
+ except ValueError:
988
+ dest_rel = html_path.name
989
+ print(f"including {target_rel} into {dest_rel}")
990
+ frag_text = target.read_text()
991
+ frag = lxml_html.fromstring(frag_text)
992
+ body = frag.xpath("body")
993
+ if body:
994
+ elems = list(body[0])
995
+ else:
996
+ elems = [frag]
997
+ parent = node.getparent()
998
+ if parent is None:
999
+ continue
1000
+ idx = parent.index(node)
1001
+ parent.remove(node)
1002
+ end_c = lxml_html.HtmlComment(f"END include {target_rel}")
1003
+ start_c = lxml_html.HtmlComment(f"BEGIN include {target_rel}")
1004
+ parent.insert(idx, end_c)
1005
+ for elem in reversed(elems):
1006
+ parent.insert(idx, elem)
1007
+ parent.insert(idx, start_c)
1008
+ progress = True
1009
+ else:
1010
+ parent = node.getparent()
1011
+ if parent is not None:
1012
+ placeholder = lxml_html.fragment_fromstring(
1013
+ '<div style="color:red;font-weight:bold">'
1014
+ f"Waiting for pandoc on {target_rel} to complete..."
1015
+ "</div>",
1016
+ create_parent=False,
1017
+ )
1018
+ idx = parent.index(node)
1019
+ parent.remove(node)
1020
+ parent.insert(idx, placeholder)
1021
+ progress = True
1022
+ if not progress:
1023
+ break
1024
+ # ensure MathJax references point at the provided resource root so the
1025
+ # served HTML loads scripts from the display tree instead of the staging
1026
+ # area.
1027
+ math_nodes = root.xpath(
1028
+ '//*[@class="math inline" or @class="math display"]'
1029
+ )
1030
+ if math_nodes:
1031
+ head = root.xpath("//head")
1032
+ if head:
1033
+ math_path = os.path.relpath(
1034
+ resource_root / "mathjax" / "es5" / "tex-mml-chtml.js",
1035
+ html_path.parent,
1036
+ )
1037
+ existing = root.xpath('//script[contains(@src, "MathJax")]')
1038
+ if existing:
1039
+ for node in existing:
1040
+ node.set("src", math_path)
1041
+ node.set("id", node.get("id") or "MathJax-script")
1042
+ node.set("async", "")
1043
+ else:
1044
+ script = lxml_html.fragment_fromstring(
1045
+ '<script id="MathJax-script" async'
1046
+ f' src="{math_path}"></script>',
1047
+ create_parent=False,
1048
+ )
1049
+ head[0].append(script)
1050
+ html_path.write_text(lxml_html.tostring(root, encoding="unicode"))
1051
+
1052
+
1053
+ def substitute_code_placeholders(
1054
+ html_path: Path,
1055
+ outputs: dict[tuple[str, int], str],
1056
+ codes: dict[tuple[str, int], str],
1057
+ ) -> None:
1058
+ """Replace script placeholders in ``html_path`` using executed outputs and
1059
+ embed syntax highlighted source code.
1060
+ """
1061
+ parser = lxml_html.HTMLParser(encoding="utf-8")
1062
+ tree = lxml_html.parse(str(html_path), parser)
1063
+ root = tree.getroot()
1064
+ formatter = HtmlFormatter()
1065
+ head = root.xpath("//head")
1066
+ if head and not root.xpath('//style[@id="pygments-style"]'):
1067
+ style = formatter.get_style_defs(".highlight")
1068
+ style_node = lxml_html.fragment_fromstring(
1069
+ f'<style id="pygments-style">{style}</style>', create_parent=False
1070
+ )
1071
+ head[0].append(style_node)
1072
+ changed = False
1073
+ for node in list(root.xpath("//div[@data-script][@data-index]")):
1074
+ src = node.get("data-script")
1075
+ try:
1076
+ idx = int(node.get("data-index", "0"))
1077
+ except ValueError:
1078
+ idx = 0
1079
+ missing_output = (src, idx) not in outputs
1080
+ if missing_output:
1081
+ html = ""
1082
+ else:
1083
+ html = outputs[(src, idx)]
1084
+ if (src, idx) in codes:
1085
+ code = codes[(src, idx)]
1086
+ else:
1087
+ code = ""
1088
+ code_html = highlight(code, PythonLexer(), formatter)
1089
+ frags = lxml_html.fragments_fromstring(code_html)
1090
+ if not missing_output and html:
1091
+ frags += lxml_html.fragments_fromstring(html)
1092
+ elif missing_output:
1093
+ # Only show the placeholder when the notebook output entry is
1094
+ # absent so executed cells that intentionally produce no output
1095
+ # simply render the source code.
1096
+ waiting = lxml_html.fragment_fromstring(
1097
+ '<div style="color:red;font-weight:bold">'
1098
+ f"Running notebook {src}..."
1099
+ "</div>",
1100
+ create_parent=False,
1101
+ )
1102
+ frags.append(waiting)
1103
+ parent = node.getparent()
1104
+ if parent is None:
1105
+ continue
1106
+ pos = parent.index(node)
1107
+ parent.remove(node)
1108
+ for frag in reversed(frags):
1109
+ parent.insert(pos, frag)
1110
+ changed = True
1111
+ if changed:
1112
+ tree.write(str(html_path), encoding="utf-8", method="html")
1113
+
1114
+
1115
+ def build_all(webtex: bool = False, changed_paths=None):
1116
+ ensure_pandoc_available()
1117
+ ensure_pandoc_crossref()
1118
+ ensure_template_assets(PROJECT_ROOT)
1119
+ BUILD_DIR.mkdir(parents=True, exist_ok=True)
1120
+ DISPLAY_DIR.mkdir(parents=True, exist_ok=True)
1121
+ if not webtex:
1122
+ ensure_mathjax()
1123
+ # copy MathJax into the display tree so browsers load assets from the
1124
+ # served directory while the staging area remains limited to fragments.
1125
+ shutil.copytree(
1126
+ MATHJAX_DIR, DISPLAY_DIR / "mathjax", dirs_exist_ok=True
1127
+ )
1128
+ # copy project configuration without the render list so individual renders
1129
+ # don't attempt to build the entire project
1130
+ if yaml is not None:
1131
+ cfg = yaml.safe_load(Path("_quarto.yml").read_text())
1132
+ if "project" in cfg and "render" in cfg["project"]:
1133
+ cfg["project"]["render"] = []
1134
+ (BUILD_DIR / "_quarto.yml").write_text(yaml.safe_dump(cfg))
1135
+ else:
1136
+ # Without PyYAML, copy the config as-is so the builder can still
1137
+ # produce placeholder outputs.
1138
+ (BUILD_DIR / "_quarto.yml").write_text(Path("_quarto.yml").read_text())
1139
+ if Path("_template/obs.lua").exists():
1140
+ shutil.copy2("_template/obs.lua", BUILD_DIR / "obs.lua")
1141
+
1142
+ checksums = load_checksums()
1143
+
1144
+ render_files = load_rendered_files()
1145
+ bibliography, csl = load_bibliography_csl()
1146
+ tree, roots, include_map = analyze_includes(render_files)
1147
+ graph = RenderNotebook(render_files, tree, include_map)
1148
+ graph.mark_outdated(checksums)
1149
+ anchors = collect_anchors(render_files, include_map)
1150
+
1151
+ if changed_paths:
1152
+ normalized = set()
1153
+ for path in changed_paths:
1154
+ candidate = Path(path)
1155
+ if not candidate.exists():
1156
+ continue
1157
+ try:
1158
+ rel = candidate.resolve().relative_to(PROJECT_ROOT)
1159
+ except ValueError:
1160
+ continue
1161
+ if rel.suffix != ".qmd":
1162
+ continue
1163
+ normalized.add(rel.as_posix())
1164
+ stage_set = set(graph.stage_targets(normalized))
1165
+ display_targets = collect_render_targets(
1166
+ stage_set, include_map, render_files
1167
+ )
1168
+ for rel in stage_set:
1169
+ if rel in render_files:
1170
+ display_targets.add(rel)
1171
+ if not stage_set and not display_targets:
1172
+ return {
1173
+ "render_files": render_files,
1174
+ "tree": tree,
1175
+ "include_map": include_map,
1176
+ }
1177
+ else:
1178
+ stage_set = set(graph.stage_targets(None))
1179
+ display_targets = set(render_files)
1180
+
1181
+ stage_files = sorted(stage_set)
1182
+ # phase 1: rebuild the modified sources into the staging tree
1183
+ code_blocks = mirror_and_modify(stage_files, anchors, roots)
1184
+
1185
+ # Start notebook execution immediately so it can run while pandoc renders.
1186
+ notebook_executor = None
1187
+ notebook_future = None
1188
+ outputs = {}
1189
+ code_map = {}
1190
+ if code_blocks:
1191
+ notebook_executor = ThreadPoolExecutor(max_workers=1)
1192
+ notebook_future = notebook_executor.submit(
1193
+ execute_code_blocks, code_blocks
1194
+ )
1195
+
1196
+ order = graph.render_order()
1197
+ render_targets = [f for f in order if f in stage_set]
1198
+ if render_targets:
1199
+ workers = max(1, min(len(render_targets), 4))
1200
+ tasks = []
1201
+ with ThreadPoolExecutor(max_workers=workers) as pool:
1202
+ for f in render_targets:
1203
+ fragment = f not in render_files
1204
+ future = pool.submit(
1205
+ render_file,
1206
+ Path(f),
1207
+ BUILD_DIR / f,
1208
+ fragment,
1209
+ bibliography,
1210
+ csl,
1211
+ webtex,
1212
+ )
1213
+ tasks.append((f, future))
1214
+ for future in as_completed([t[1] for t in tasks]):
1215
+ for pair in tasks:
1216
+ if pair[1] is future:
1217
+ print(f"Pandoc finished for {pair[0]}")
1218
+ break
1219
+
1220
+ graph.update_checksums(checksums)
1221
+ save_checksums(checksums)
1222
+
1223
+ # phase 2: insert whatever notebook output is available into staged pages
1224
+ if notebook_future and notebook_future.done():
1225
+ outputs, code_map = notebook_future.result()
1226
+ notebook_executor.shutdown(wait=False)
1227
+ notebook_executor = None
1228
+ notebook_future = None
1229
+ for f in stage_files:
1230
+ html_file = (BUILD_DIR / f).with_suffix(".html")
1231
+ if html_file.exists():
1232
+ substitute_code_placeholders(html_file, outputs, code_map)
1233
+
1234
+ # phase 3: assemble the served pages from staged fragments
1235
+ for target in sorted(display_targets):
1236
+ src_html = (BUILD_DIR / target).with_suffix(".html")
1237
+ dest_html = (DISPLAY_DIR / target).with_suffix(".html")
1238
+ if not src_html.exists():
1239
+ dest_html.parent.mkdir(parents=True, exist_ok=True)
1240
+ dest_html.write_text(
1241
+ "<html><body><div style='color:red;font-weight:bold'>"
1242
+ f"Waiting for pandoc on {target} to complete..."
1243
+ "</div>"
1244
+ "</body></html>"
1245
+ )
1246
+ continue
1247
+ dest_html.parent.mkdir(parents=True, exist_ok=True)
1248
+ shutil.copy2(src_html, dest_html)
1249
+ # build includes using staged fragments and rewrite math assets to the
1250
+ # display tree that the web server presents.
1251
+ postprocess_html(dest_html, BUILD_DIR, DISPLAY_DIR)
1252
+
1253
+ # phase 4: wait for notebooks to finish, then refresh staged and served
1254
+ # pages with the completed outputs so the browser updates when work ends.
1255
+ if notebook_future:
1256
+ outputs, code_map = notebook_future.result()
1257
+ notebook_executor.shutdown(wait=False)
1258
+ for f in stage_files:
1259
+ html_file = (BUILD_DIR / f).with_suffix(".html")
1260
+ if html_file.exists():
1261
+ substitute_code_placeholders(html_file, outputs, code_map)
1262
+ for target in sorted(display_targets):
1263
+ src_html = (BUILD_DIR / target).with_suffix(".html")
1264
+ dest_html = (DISPLAY_DIR / target).with_suffix(".html")
1265
+ if not src_html.exists():
1266
+ continue
1267
+ dest_html.parent.mkdir(parents=True, exist_ok=True)
1268
+ shutil.copy2(src_html, dest_html)
1269
+ postprocess_html(dest_html, BUILD_DIR, DISPLAY_DIR)
1270
+
1271
+ pages = []
1272
+ for qmd in render_files:
1273
+ html_file = (DISPLAY_DIR / qmd).with_suffix(".html")
1274
+ source_path = PROJECT_ROOT / qmd
1275
+ if not source_path.exists():
1276
+ # Make it obvious which path is missing and keep the display tree
1277
+ # consistent by creating a placeholder page until pandoc produces
1278
+ # the real output.
1279
+ placeholder = (
1280
+ "<html><body><div style='color:red;font-weight:bold'>"
1281
+ f"Missing source file {source_path}"
1282
+ "</div></body></html>"
1283
+ )
1284
+ html_file.parent.mkdir(parents=True, exist_ok=True)
1285
+ html_file.write_text(placeholder)
1286
+ print(f"Cannot read title; missing source: {source_path}")
1287
+ continue
1288
+ if html_file.exists():
1289
+ sections = parse_headings(html_file)
1290
+ pages.append(
1291
+ {
1292
+ "file": qmd,
1293
+ "href": html_file.name,
1294
+ "title": read_title(source_path),
1295
+ "sections": sections,
1296
+ }
1297
+ )
1298
+
1299
+ for page in pages:
1300
+ html_file = (DISPLAY_DIR / page["file"]).with_suffix(".html")
1301
+ if html_file.exists():
1302
+ add_navigation(html_file, pages, page["file"])
1303
+
1304
+ return {
1305
+ "render_files": render_files,
1306
+ "tree": tree,
1307
+ "include_map": include_map,
1308
+ }
1309
+
1310
+
1311
+ class BrowserReloader:
1312
+ def __init__(self, url: str):
1313
+ self.url = url
1314
+ self.init_browser()
1315
+
1316
+ def init_browser(self):
1317
+ if webdriver is None:
1318
+ raise ImportError(
1319
+ "Browser refresh support requires the optional 'selenium'"
1320
+ " package."
1321
+ )
1322
+ try:
1323
+ self.browser = webdriver.Chrome()
1324
+ except Exception:
1325
+ self.browser = webdriver.Firefox()
1326
+ self.browser.get(self.url)
1327
+
1328
+ def refresh(self):
1329
+ """Refresh the page if the browser is still open."""
1330
+ if not self.browser:
1331
+ return
1332
+ try:
1333
+ self.browser.refresh()
1334
+ except WebDriverException:
1335
+ try:
1336
+ self.browser.quit()
1337
+ except Exception:
1338
+ pass
1339
+ self.browser = None
1340
+
1341
+ def is_alive(self) -> bool:
1342
+ """Return True if the browser window is still open."""
1343
+ if not self.browser:
1344
+ return False
1345
+ try:
1346
+ handles = self.browser.window_handles
1347
+ if not handles:
1348
+ return False
1349
+ self.browser.execute_script("return 1")
1350
+ return True
1351
+ except (NoSuchWindowException, WebDriverException):
1352
+ return False
1353
+
1354
+
1355
+ class ChangeHandler(FileSystemEventHandler):
1356
+ def __init__(self, build_func, refresher):
1357
+ self.build = build_func
1358
+ self.refresher = refresher
1359
+
1360
+ def handle(self, path, is_directory):
1361
+ if (
1362
+ not is_directory
1363
+ and path.endswith(".qmd")
1364
+ and "/_build/" not in path
1365
+ and "/_display/" not in path
1366
+ ):
1367
+ print(f"Change detected: {path}")
1368
+ self.build(path)
1369
+ self.refresher.refresh()
1370
+
1371
+ def on_modified(self, event):
1372
+ self.handle(event.src_path, event.is_directory)
1373
+
1374
+ def on_created(self, event):
1375
+ self.handle(event.src_path, event.is_directory)
1376
+
1377
+ def on_moved(self, event):
1378
+ self.handle(event.dest_path, event.is_directory)
1379
+
1380
+
1381
+ def _serve_forever(httpd: ThreadingHTTPServer):
1382
+ """Run the HTTP server until shutdown is called."""
1383
+ httpd.serve_forever()
1384
+
1385
+
1386
+ def watch_and_serve(no_browser: bool = False, webtex: bool = False):
1387
+ state = build_all(webtex=webtex)
1388
+ if no_browser:
1389
+ # In headless scenarios we only need the build artifacts and can exit
1390
+ # immediately instead of launching a server loop that waits for a
1391
+ # browser connection.
1392
+ return state
1393
+ port = 8000
1394
+ render_files = state["render_files"]
1395
+
1396
+ if render_files:
1397
+ start_page = Path(render_files[0]).with_suffix(".html").as_posix()
1398
+ else:
1399
+ start_page = ""
1400
+ url = f"http://localhost:{port}/{start_page}"
1401
+
1402
+ print("Watching project root:")
1403
+ print(" ", PROJECT_ROOT)
1404
+
1405
+ class Handler(SimpleHTTPRequestHandler):
1406
+ def __init__(self, *args, **kwargs):
1407
+ super().__init__(*args, directory=str(DISPLAY_DIR), **kwargs)
1408
+
1409
+ def translate_path(self, path):
1410
+ rel = path.lstrip("/")
1411
+ if not rel:
1412
+ rel = ""
1413
+ display_root = DISPLAY_DIR.resolve()
1414
+ build_root = BUILD_DIR.resolve()
1415
+ if rel == "_build":
1416
+ return str(build_root)
1417
+ if rel.startswith("_build/"):
1418
+ inner = rel.split("/", 1)[1]
1419
+ candidate = (BUILD_DIR / inner).resolve()
1420
+ if (
1421
+ str(candidate).startswith(str(build_root))
1422
+ and candidate.exists()
1423
+ ):
1424
+ return str(candidate)
1425
+ display_candidate = (DISPLAY_DIR / rel).resolve()
1426
+ if display_candidate.exists() and str(
1427
+ display_candidate
1428
+ ).startswith(str(display_root)):
1429
+ return str(display_candidate)
1430
+ build_candidate = (BUILD_DIR / rel).resolve()
1431
+ if build_candidate.exists() and str(build_candidate).startswith(
1432
+ str(build_root)
1433
+ ):
1434
+ return str(build_candidate)
1435
+ return super().translate_path(path)
1436
+
1437
+ try:
1438
+ httpd = ThreadingHTTPServer(("0.0.0.0", port), Handler)
1439
+ except OSError as exc: # pragma: no cover - depends on local environment
1440
+ print(f"Could not start server on port {port}: {exc}")
1441
+ return
1442
+ print(
1443
+ f"Serving {DISPLAY_DIR} with fallback to {BUILD_DIR} at"
1444
+ f" http://localhost:{port}"
1445
+ )
1446
+ Path(DISPLAY_DIR).mkdir(parents=True, exist_ok=True)
1447
+ threading.Thread(target=_serve_forever, args=(httpd,), daemon=True).start()
1448
+ if no_browser:
1449
+
1450
+ class Dummy:
1451
+ def refresh(self):
1452
+ pass
1453
+
1454
+ refresher = Dummy()
1455
+ else:
1456
+ refresher = BrowserReloader(url)
1457
+ if Observer is None:
1458
+ raise ImportError(
1459
+ "File watching requires the optional 'watchdog' package."
1460
+ )
1461
+
1462
+ observer = Observer()
1463
+
1464
+ def rebuild(path):
1465
+ build_all(webtex=webtex, changed_paths=[path])
1466
+
1467
+ handler = ChangeHandler(rebuild, refresher)
1468
+ observer.schedule(handler, str(PROJECT_ROOT), recursive=True)
1469
+ observer.start()
1470
+ try:
1471
+ while True:
1472
+ if not no_browser and not refresher.is_alive():
1473
+ break
1474
+ time.sleep(1)
1475
+ except KeyboardInterrupt:
1476
+ pass
1477
+ finally:
1478
+ observer.stop()
1479
+ observer.join()
1480
+ httpd.shutdown()
1481
+ httpd.server_close()
1482
+ if not no_browser and getattr(refresher, "browser", None):
1483
+ try:
1484
+ refresher.browser.quit()
1485
+ except Exception:
1486
+ pass
1487
+
1488
+
1489
+ if __name__ == "__main__":
1490
+ parser = argparse.ArgumentParser(description="Build site using Pandoc")
1491
+ parser.add_argument(
1492
+ "--no-browser",
1493
+ action="store_true",
1494
+ help="Do not open a browser when using --watch",
1495
+ )
1496
+ parser.add_argument(
1497
+ "--webtex",
1498
+ action="store_true",
1499
+ help="Use Pandoc's --webtex option instead of MathJax",
1500
+ )
1501
+ args = parser.parse_args()
1502
+ watch_and_serve(no_browser=args.no_browser, webtex=args.webtex)