deepresearch-flow 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepresearch_flow/paper/db.py +34 -0
- deepresearch_flow/paper/db_ops.py +21 -7
- deepresearch_flow/paper/prompt_templates/deep_read_phi_system.j2 +6 -0
- deepresearch_flow/paper/prompt_templates/deep_read_phi_user.j2 +391 -0
- deepresearch_flow/paper/prompt_templates/eight_questions_phi_system.j2 +6 -0
- deepresearch_flow/paper/prompt_templates/eight_questions_phi_user.j2 +133 -0
- deepresearch_flow/paper/prompt_templates/simple_phi_system.j2 +6 -0
- deepresearch_flow/paper/prompt_templates/simple_phi_user.j2 +31 -0
- deepresearch_flow/paper/schemas/deep_read_phi_schema.json +30 -0
- deepresearch_flow/paper/template_registry.py +39 -0
- deepresearch_flow/paper/templates/deep_read_phi.md.j2 +40 -0
- deepresearch_flow/paper/web/app.py +106 -1
- deepresearch_flow/paper/web/constants.py +1 -0
- deepresearch_flow/paper/web/handlers/__init__.py +2 -1
- deepresearch_flow/paper/web/handlers/api.py +55 -0
- deepresearch_flow/paper/web/handlers/pages.py +105 -25
- deepresearch_flow/paper/web/markdown.py +230 -4
- deepresearch_flow/paper/web/pdfjs/web/viewer.html +57 -5
- deepresearch_flow/paper/web/pdfjs/web/viewer.js +5 -1
- deepresearch_flow/paper/web/static/css/main.css +8 -1
- deepresearch_flow/paper/web/static/js/detail.js +527 -124
- deepresearch_flow/paper/web/static/js/outline.js +48 -34
- deepresearch_flow/paper/web/static_assets.py +289 -0
- deepresearch_flow/paper/web/templates/detail.html +52 -66
- deepresearch_flow/paper/web/templates.py +7 -4
- deepresearch_flow/paper/web/text.py +8 -4
- deepresearch_flow/recognize/organize.py +9 -12
- deepresearch_flow/translator/fixers.py +15 -0
- {deepresearch_flow-0.4.1.dist-info → deepresearch_flow-0.5.1.dist-info}/METADATA +62 -2
- {deepresearch_flow-0.4.1.dist-info → deepresearch_flow-0.5.1.dist-info}/RECORD +34 -25
- {deepresearch_flow-0.4.1.dist-info → deepresearch_flow-0.5.1.dist-info}/WHEEL +0 -0
- {deepresearch_flow-0.4.1.dist-info → deepresearch_flow-0.5.1.dist-info}/entry_points.txt +0 -0
- {deepresearch_flow-0.4.1.dist-info → deepresearch_flow-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {deepresearch_flow-0.4.1.dist-info → deepresearch_flow-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import html
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from urllib.parse import urlencode
|
|
7
8
|
|
|
8
9
|
from starlette.requests import Request
|
|
@@ -16,6 +17,7 @@ from deepresearch_flow.paper.web.markdown import (
|
|
|
16
17
|
render_paper_markdown,
|
|
17
18
|
select_template_tag,
|
|
18
19
|
)
|
|
20
|
+
from deepresearch_flow.paper.web.static_assets import resolve_asset_urls
|
|
19
21
|
from deepresearch_flow.paper.web.text import normalize_title
|
|
20
22
|
from deepresearch_flow.paper.web.templates import (
|
|
21
23
|
build_pdfjs_viewer_url,
|
|
@@ -23,6 +25,47 @@ from deepresearch_flow.paper.web.templates import (
|
|
|
23
25
|
)
|
|
24
26
|
|
|
25
27
|
|
|
28
|
+
def _safe_read_text(path: Path) -> str:
|
|
29
|
+
try:
|
|
30
|
+
return path.read_text(encoding="utf-8")
|
|
31
|
+
except UnicodeDecodeError:
|
|
32
|
+
return path.read_text(encoding="latin-1")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _load_markdown_for_view(
|
|
36
|
+
index: PaperIndex,
|
|
37
|
+
asset_config,
|
|
38
|
+
export_dir: Path | None,
|
|
39
|
+
source_hash: str,
|
|
40
|
+
*,
|
|
41
|
+
lang: str | None = None,
|
|
42
|
+
) -> str | None:
|
|
43
|
+
if export_dir and asset_config and asset_config.enabled and (asset_config.base_url or "") == "":
|
|
44
|
+
if lang:
|
|
45
|
+
translated_url = asset_config.translated_md_urls.get(source_hash, {}).get(lang.lower())
|
|
46
|
+
if translated_url:
|
|
47
|
+
export_path = export_dir / translated_url.lstrip("/")
|
|
48
|
+
if export_path.exists():
|
|
49
|
+
return _safe_read_text(export_path)
|
|
50
|
+
else:
|
|
51
|
+
md_url = asset_config.md_urls.get(source_hash)
|
|
52
|
+
if md_url:
|
|
53
|
+
export_path = export_dir / md_url.lstrip("/")
|
|
54
|
+
if export_path.exists():
|
|
55
|
+
return _safe_read_text(export_path)
|
|
56
|
+
|
|
57
|
+
if lang:
|
|
58
|
+
md_path = index.translated_md_by_hash.get(source_hash, {}).get(lang.lower())
|
|
59
|
+
else:
|
|
60
|
+
md_path = index.md_path_by_hash.get(source_hash)
|
|
61
|
+
if not md_path:
|
|
62
|
+
return None
|
|
63
|
+
raw = _safe_read_text(md_path)
|
|
64
|
+
if lang:
|
|
65
|
+
raw = normalize_markdown_images(raw)
|
|
66
|
+
return raw
|
|
67
|
+
|
|
68
|
+
|
|
26
69
|
async def robots_txt(_: Request) -> Response:
|
|
27
70
|
"""Serve robots.txt to disallow all crawlers."""
|
|
28
71
|
return Response("User-agent: *\nDisallow: /\n", media_type="text/plain")
|
|
@@ -78,7 +121,13 @@ async def paper_detail(request: Request) -> HTMLResponse:
|
|
|
78
121
|
embed = request.query_params.get("embed") == "1"
|
|
79
122
|
|
|
80
123
|
pdf_path = index.pdf_path_by_hash.get(source_hash)
|
|
81
|
-
|
|
124
|
+
asset_urls = resolve_asset_urls(
|
|
125
|
+
index,
|
|
126
|
+
source_hash,
|
|
127
|
+
request.app.state.asset_config,
|
|
128
|
+
prefer_local=request.app.state.static_mode == "dev",
|
|
129
|
+
)
|
|
130
|
+
pdf_url = asset_urls["pdf_url"] or ""
|
|
82
131
|
source_available = source_hash in index.md_path_by_hash
|
|
83
132
|
translations = index.translated_md_by_hash.get(source_hash, {})
|
|
84
133
|
translation_langs = sorted(translations.keys(), key=str.lower)
|
|
@@ -156,14 +205,18 @@ async def paper_detail(request: Request) -> HTMLResponse:
|
|
|
156
205
|
|
|
157
206
|
# Initialize template variables
|
|
158
207
|
body_html = ""
|
|
159
|
-
raw_content = ""
|
|
160
208
|
summary_template_name = ""
|
|
161
209
|
template_warning = ""
|
|
162
210
|
template_controls = ""
|
|
163
211
|
source_path_str = ""
|
|
164
212
|
translated_path_str = ""
|
|
213
|
+
source_markdown_url = ""
|
|
214
|
+
translated_markdown_url = ""
|
|
215
|
+
images_base_url = asset_urls["images_base_url"] or ""
|
|
165
216
|
pdf_filename = ""
|
|
166
217
|
pdfjs_url = ""
|
|
218
|
+
pdfjs_script_url = ""
|
|
219
|
+
pdfjs_worker_url = ""
|
|
167
220
|
left_src = ""
|
|
168
221
|
right_src = ""
|
|
169
222
|
split_options: list[tuple[str, str]] = []
|
|
@@ -208,21 +261,27 @@ if (templateSelect) {{
|
|
|
208
261
|
</script>
|
|
209
262
|
"""
|
|
210
263
|
|
|
264
|
+
prefer_local = request.app.state.static_mode == "dev"
|
|
265
|
+
|
|
211
266
|
# Source view
|
|
212
267
|
if view == "source":
|
|
213
268
|
source_path = index.md_path_by_hash.get(source_hash)
|
|
214
|
-
if not source_path:
|
|
269
|
+
if not source_path or not asset_urls["md_url"]:
|
|
215
270
|
body_html = '<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
|
|
216
271
|
else:
|
|
217
|
-
|
|
218
|
-
raw = source_path.read_text(encoding="utf-8")
|
|
219
|
-
except UnicodeDecodeError:
|
|
220
|
-
raw = source_path.read_text(encoding="latin-1")
|
|
221
|
-
md_renderer = create_md_renderer()
|
|
222
|
-
body_html = render_markdown_with_math_placeholders(md_renderer, raw)
|
|
223
|
-
raw_content = raw
|
|
272
|
+
source_markdown_url = asset_urls["md_url"] or ""
|
|
224
273
|
source_path_str = str(source_path)
|
|
225
274
|
show_outline = True
|
|
275
|
+
if prefer_local:
|
|
276
|
+
raw = _load_markdown_for_view(
|
|
277
|
+
index,
|
|
278
|
+
request.app.state.asset_config,
|
|
279
|
+
request.app.state.static_export_dir,
|
|
280
|
+
source_hash,
|
|
281
|
+
)
|
|
282
|
+
if raw is not None:
|
|
283
|
+
md_renderer = create_md_renderer()
|
|
284
|
+
body_html = render_markdown_with_math_placeholders(md_renderer, raw)
|
|
226
285
|
|
|
227
286
|
# Translated view
|
|
228
287
|
if view == "translated":
|
|
@@ -230,38 +289,55 @@ if (templateSelect) {{
|
|
|
230
289
|
body_html = '<div class="warning">No translated markdown found. Provide <code>--md-translated-root</code> and place <code><base>.<lang>.md</code> under that root.</div>'
|
|
231
290
|
else:
|
|
232
291
|
translated_path = translations.get(selected_lang)
|
|
233
|
-
|
|
292
|
+
translated_markdown_url = asset_urls["md_translated_url"].get(selected_lang, "")
|
|
293
|
+
if not translated_path or not translated_markdown_url:
|
|
234
294
|
body_html = '<div class="warning">Translated markdown not found for the selected language.</div>'
|
|
235
295
|
else:
|
|
236
|
-
try:
|
|
237
|
-
raw = translated_path.read_text(encoding="utf-8")
|
|
238
|
-
except UnicodeDecodeError:
|
|
239
|
-
raw = translated_path.read_text(encoding="latin-1")
|
|
240
|
-
raw = normalize_markdown_images(raw)
|
|
241
|
-
md_renderer = create_md_renderer()
|
|
242
|
-
body_html = render_markdown_with_math_placeholders(md_renderer, raw)
|
|
243
|
-
raw_content = raw
|
|
244
296
|
translated_path_str = str(translated_path)
|
|
245
297
|
show_outline = True
|
|
298
|
+
if prefer_local:
|
|
299
|
+
raw = _load_markdown_for_view(
|
|
300
|
+
index,
|
|
301
|
+
request.app.state.asset_config,
|
|
302
|
+
request.app.state.static_export_dir,
|
|
303
|
+
source_hash,
|
|
304
|
+
lang=selected_lang,
|
|
305
|
+
)
|
|
306
|
+
if raw is not None:
|
|
307
|
+
md_renderer = create_md_renderer()
|
|
308
|
+
body_html = render_markdown_with_math_placeholders(md_renderer, raw)
|
|
246
309
|
|
|
247
310
|
# PDF view
|
|
248
311
|
if view == "pdf":
|
|
249
|
-
if not pdf_path:
|
|
312
|
+
if not pdf_path or not pdf_url:
|
|
250
313
|
body_html = '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
|
|
251
314
|
pdf_filename = str(pdf_path.name) if pdf_path else ""
|
|
315
|
+
pdfjs_cdn_base_url = request.app.state.pdfjs_cdn_base_url
|
|
316
|
+
if pdfjs_cdn_base_url:
|
|
317
|
+
pdfjs_script_url = f"{pdfjs_cdn_base_url}/legacy/build/pdf.min.js"
|
|
318
|
+
pdfjs_worker_url = f"{pdfjs_cdn_base_url}/legacy/build/pdf.worker.min.js"
|
|
319
|
+
else:
|
|
320
|
+
pdfjs_script_url = "/pdfjs/build/pdf.js"
|
|
321
|
+
pdfjs_worker_url = "/pdfjs/build/pdf.worker.js"
|
|
252
322
|
|
|
253
323
|
# PDF.js view
|
|
254
324
|
if view == "pdfjs":
|
|
255
|
-
if not pdf_path:
|
|
325
|
+
if not pdf_path or not pdf_url:
|
|
256
326
|
body_html = '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
|
|
257
|
-
pdfjs_url = build_pdfjs_viewer_url(
|
|
327
|
+
pdfjs_url = build_pdfjs_viewer_url(
|
|
328
|
+
pdf_url,
|
|
329
|
+
cdn_base_url=request.app.state.pdfjs_cdn_base_url,
|
|
330
|
+
)
|
|
258
331
|
pdf_filename = str(pdf_path.name) if pdf_path else ""
|
|
259
332
|
|
|
260
333
|
# Split view
|
|
261
334
|
if view == "split":
|
|
262
335
|
def pane_src(pane_view: str) -> str:
|
|
263
|
-
if pane_view == "pdfjs" and pdf_path:
|
|
264
|
-
return build_pdfjs_viewer_url(
|
|
336
|
+
if pane_view == "pdfjs" and pdf_path and pdf_url:
|
|
337
|
+
return build_pdfjs_viewer_url(
|
|
338
|
+
pdf_url,
|
|
339
|
+
cdn_base_url=request.app.state.pdfjs_cdn_base_url,
|
|
340
|
+
)
|
|
265
341
|
params: dict[str, str] = {"view": pane_view, "embed": "1"}
|
|
266
342
|
if pane_view == "summary" and template_param:
|
|
267
343
|
params["template"] = str(template_param)
|
|
@@ -307,12 +383,14 @@ if (templateSelect) {{
|
|
|
307
383
|
show_outline=show_outline,
|
|
308
384
|
# Content variables
|
|
309
385
|
body_html=body_html,
|
|
310
|
-
raw_content=raw_content,
|
|
311
386
|
summary_template_name=summary_template_name,
|
|
312
387
|
template_warning=template_warning,
|
|
313
388
|
template_controls=template_controls,
|
|
314
389
|
available_templates=available_templates,
|
|
315
390
|
selected_template_tag=selected_tag,
|
|
391
|
+
images_base_url=images_base_url,
|
|
392
|
+
source_markdown_url=source_markdown_url,
|
|
393
|
+
translated_markdown_url=translated_markdown_url,
|
|
316
394
|
# Source view
|
|
317
395
|
source_path=source_path_str,
|
|
318
396
|
# Translated view
|
|
@@ -322,6 +400,8 @@ if (templateSelect) {{
|
|
|
322
400
|
# PDF view
|
|
323
401
|
pdf_filename=pdf_filename,
|
|
324
402
|
pdf_url=pdf_url,
|
|
403
|
+
pdfjs_script_url=pdfjs_script_url,
|
|
404
|
+
pdfjs_worker_url=pdfjs_worker_url,
|
|
325
405
|
# PDF.js view
|
|
326
406
|
pdfjs_url=pdfjs_url,
|
|
327
407
|
# Split view
|
|
@@ -41,13 +41,88 @@ def strip_paragraph_wrapped_tables(text: str) -> str:
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
def normalize_footnote_definitions(text: str) -> str:
|
|
44
|
-
"""Normalize
|
|
44
|
+
"""Normalize footnotes and numbered notes to markdown-it footnote format."""
|
|
45
45
|
lines = text.splitlines()
|
|
46
|
-
|
|
46
|
+
out: list[str] = []
|
|
47
|
+
in_fence = False
|
|
48
|
+
fence_char = ""
|
|
49
|
+
fence_len = 0
|
|
50
|
+
in_notes = False
|
|
51
|
+
notes_level: int | None = None
|
|
52
|
+
notes_heading_re = re.compile(
|
|
53
|
+
r"^#{1,6}\s*(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\b",
|
|
54
|
+
re.IGNORECASE,
|
|
55
|
+
)
|
|
56
|
+
notes_heading_plain_re = re.compile(
|
|
57
|
+
r"^(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\s*:?$",
|
|
58
|
+
re.IGNORECASE,
|
|
59
|
+
)
|
|
60
|
+
last_note_index: int | None = None
|
|
61
|
+
|
|
62
|
+
for line in lines:
|
|
63
|
+
stripped = line.lstrip()
|
|
64
|
+
if stripped.startswith(("```", "~~~")):
|
|
65
|
+
run_len = 0
|
|
66
|
+
while run_len < len(stripped) and stripped[run_len] == stripped[0]:
|
|
67
|
+
run_len += 1
|
|
68
|
+
if not in_fence:
|
|
69
|
+
in_fence = True
|
|
70
|
+
fence_char = stripped[0]
|
|
71
|
+
fence_len = run_len
|
|
72
|
+
elif stripped[0] == fence_char and run_len >= fence_len:
|
|
73
|
+
in_fence = False
|
|
74
|
+
fence_char = ""
|
|
75
|
+
fence_len = 0
|
|
76
|
+
out.append(line)
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
if in_fence:
|
|
80
|
+
out.append(line)
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
heading_match = notes_heading_re.match(stripped)
|
|
84
|
+
if heading_match:
|
|
85
|
+
in_notes = True
|
|
86
|
+
notes_level = len(stripped.split(" ")[0].lstrip("#"))
|
|
87
|
+
last_note_index = None
|
|
88
|
+
elif notes_heading_plain_re.match(stripped):
|
|
89
|
+
in_notes = True
|
|
90
|
+
notes_level = None
|
|
91
|
+
last_note_index = None
|
|
92
|
+
elif re.match(r"^#{1,6}\s+", stripped):
|
|
93
|
+
if notes_level is not None:
|
|
94
|
+
level = len(stripped.split(" ")[0].lstrip("#"))
|
|
95
|
+
if level <= notes_level:
|
|
96
|
+
in_notes = False
|
|
97
|
+
notes_level = None
|
|
98
|
+
last_note_index = None
|
|
99
|
+
|
|
47
100
|
match = re.match(r"^\[\^([0-9]+)\]\s+", line)
|
|
48
101
|
if match:
|
|
49
|
-
|
|
50
|
-
|
|
102
|
+
out.append(re.sub(r"^\[\^([0-9]+)\]\s+", r"[^\1]: ", line))
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
if in_notes:
|
|
106
|
+
list_match = re.match(r"^\s*(\d{1,4})[.)]\s+", line)
|
|
107
|
+
if list_match:
|
|
108
|
+
number = list_match.group(1)
|
|
109
|
+
rest = line[list_match.end() :].strip()
|
|
110
|
+
out.append(f"[^{number}]: {rest}")
|
|
111
|
+
last_note_index = len(out) - 1
|
|
112
|
+
continue
|
|
113
|
+
if last_note_index is not None:
|
|
114
|
+
if line.strip() == "":
|
|
115
|
+
out.append(line)
|
|
116
|
+
last_note_index = None
|
|
117
|
+
continue
|
|
118
|
+
if line.startswith((" ", "\t")):
|
|
119
|
+
out[last_note_index] = f"{out[last_note_index]} {line.strip()}"
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
line = re.sub(r"(?<!\^)\[(\d{1,4})\]", r"[^\1]", line)
|
|
123
|
+
out.append(line)
|
|
124
|
+
|
|
125
|
+
return "\n".join(out)
|
|
51
126
|
|
|
52
127
|
|
|
53
128
|
def normalize_markdown_images(text: str) -> str:
|
|
@@ -96,6 +171,154 @@ def normalize_markdown_images(text: str) -> str:
|
|
|
96
171
|
return "\n".join(out)
|
|
97
172
|
|
|
98
173
|
|
|
174
|
+
def normalize_fenced_code_blocks(text: str) -> str:
|
|
175
|
+
"""Ensure fenced code block markers appear on their own lines."""
|
|
176
|
+
fence_re = re.compile(r"(`{3,}|~{3,})")
|
|
177
|
+
out: list[str] = []
|
|
178
|
+
for line in text.splitlines():
|
|
179
|
+
match = fence_re.search(line)
|
|
180
|
+
if not match:
|
|
181
|
+
out.append(line)
|
|
182
|
+
continue
|
|
183
|
+
prefix = line[: match.start()]
|
|
184
|
+
suffix = line[match.start() :]
|
|
185
|
+
if prefix.strip():
|
|
186
|
+
out.append(prefix.rstrip())
|
|
187
|
+
out.append(suffix.lstrip())
|
|
188
|
+
else:
|
|
189
|
+
out.append(line)
|
|
190
|
+
return "\n".join(out)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def normalize_mermaid_blocks(text: str) -> str:
|
|
194
|
+
"""Keep mermaid fences clean by moving legend text outside the block."""
|
|
195
|
+
lines = text.splitlines()
|
|
196
|
+
out: list[str] = []
|
|
197
|
+
in_mermaid = False
|
|
198
|
+
fence_char = ""
|
|
199
|
+
fence_len = 0
|
|
200
|
+
mermaid_lines: list[str] = []
|
|
201
|
+
legend_lines: list[str] = []
|
|
202
|
+
|
|
203
|
+
def is_legend(line: str) -> bool:
|
|
204
|
+
stripped = line.strip()
|
|
205
|
+
if not stripped:
|
|
206
|
+
return False
|
|
207
|
+
if stripped.startswith("图例") or stripped.lower().startswith("legend"):
|
|
208
|
+
return True
|
|
209
|
+
return "节点定位" in stripped
|
|
210
|
+
|
|
211
|
+
for line in lines:
|
|
212
|
+
stripped = line.lstrip()
|
|
213
|
+
if stripped.startswith(("```", "~~~")):
|
|
214
|
+
run_len = 0
|
|
215
|
+
while run_len < len(stripped) and stripped[run_len] == stripped[0]:
|
|
216
|
+
run_len += 1
|
|
217
|
+
rest = stripped[run_len:].strip()
|
|
218
|
+
if not in_mermaid and rest.lower().startswith("mermaid"):
|
|
219
|
+
in_mermaid = True
|
|
220
|
+
fence_char = stripped[0]
|
|
221
|
+
fence_len = run_len
|
|
222
|
+
mermaid_lines = []
|
|
223
|
+
legend_lines = []
|
|
224
|
+
out.append(line)
|
|
225
|
+
continue
|
|
226
|
+
if in_mermaid and stripped[0] == fence_char and run_len >= fence_len and rest == "":
|
|
227
|
+
out.extend(mermaid_lines)
|
|
228
|
+
out.append(line)
|
|
229
|
+
out.extend(legend_lines)
|
|
230
|
+
in_mermaid = False
|
|
231
|
+
fence_char = ""
|
|
232
|
+
fence_len = 0
|
|
233
|
+
mermaid_lines = []
|
|
234
|
+
legend_lines = []
|
|
235
|
+
continue
|
|
236
|
+
out.append(line)
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
if in_mermaid:
|
|
240
|
+
if is_legend(line):
|
|
241
|
+
legend_lines.append(line)
|
|
242
|
+
else:
|
|
243
|
+
mermaid_lines.append(line)
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
out.append(line)
|
|
247
|
+
|
|
248
|
+
if in_mermaid:
|
|
249
|
+
out.extend(mermaid_lines)
|
|
250
|
+
out.extend(legend_lines)
|
|
251
|
+
|
|
252
|
+
return "\n".join(out)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def normalize_unbalanced_fences(text: str) -> str:
|
|
256
|
+
"""Drop unmatched opening fences so later content still renders."""
|
|
257
|
+
lines = text.splitlines()
|
|
258
|
+
out: list[str] = []
|
|
259
|
+
in_fence = False
|
|
260
|
+
fence_char = ""
|
|
261
|
+
fence_len = 0
|
|
262
|
+
fence_has_content = False
|
|
263
|
+
fence_open_indices: list[int] = []
|
|
264
|
+
fence_re = re.compile(r"([`~]{3,})(.*)$")
|
|
265
|
+
|
|
266
|
+
for line in lines:
|
|
267
|
+
stripped = line.lstrip(" ")
|
|
268
|
+
leading_spaces = len(line) - len(stripped)
|
|
269
|
+
is_fence = False
|
|
270
|
+
if leading_spaces <= 3 and stripped:
|
|
271
|
+
match = fence_re.match(stripped)
|
|
272
|
+
if match:
|
|
273
|
+
run = match.group(1)
|
|
274
|
+
fence = run[0]
|
|
275
|
+
run_len = len(run)
|
|
276
|
+
rest = match.group(2) or ""
|
|
277
|
+
has_info = bool(rest.strip())
|
|
278
|
+
if not in_fence:
|
|
279
|
+
in_fence = True
|
|
280
|
+
fence_char = fence
|
|
281
|
+
fence_len = run_len
|
|
282
|
+
fence_has_content = False
|
|
283
|
+
fence_open_indices.append(len(out))
|
|
284
|
+
is_fence = True
|
|
285
|
+
elif fence == fence_char and run_len >= fence_len and not has_info:
|
|
286
|
+
if not fence_has_content:
|
|
287
|
+
if fence_open_indices:
|
|
288
|
+
out.pop(fence_open_indices[-1])
|
|
289
|
+
fence_open_indices.pop()
|
|
290
|
+
in_fence = True
|
|
291
|
+
fence_char = fence
|
|
292
|
+
fence_len = run_len
|
|
293
|
+
fence_has_content = False
|
|
294
|
+
fence_open_indices.append(len(out))
|
|
295
|
+
is_fence = True
|
|
296
|
+
else:
|
|
297
|
+
in_fence = False
|
|
298
|
+
fence_char = ""
|
|
299
|
+
fence_len = 0
|
|
300
|
+
fence_has_content = False
|
|
301
|
+
is_fence = True
|
|
302
|
+
elif fence == fence_char and run_len >= fence_len and has_info:
|
|
303
|
+
if fence_open_indices:
|
|
304
|
+
out.pop(fence_open_indices[-1])
|
|
305
|
+
fence_open_indices.pop()
|
|
306
|
+
in_fence = True
|
|
307
|
+
fence_char = fence
|
|
308
|
+
fence_len = run_len
|
|
309
|
+
fence_has_content = False
|
|
310
|
+
fence_open_indices.append(len(out))
|
|
311
|
+
is_fence = True
|
|
312
|
+
|
|
313
|
+
out.append(line)
|
|
314
|
+
if in_fence and not is_fence and line.strip():
|
|
315
|
+
fence_has_content = True
|
|
316
|
+
|
|
317
|
+
if in_fence and fence_open_indices:
|
|
318
|
+
out.pop(fence_open_indices[-1])
|
|
319
|
+
return "\n".join(out)
|
|
320
|
+
|
|
321
|
+
|
|
99
322
|
def extract_math_placeholders(text: str) -> tuple[str, dict[str, str]]:
|
|
100
323
|
"""Extract math expressions and replace with placeholders."""
|
|
101
324
|
placeholders: dict[str, str] = {}
|
|
@@ -476,6 +699,9 @@ def extract_html_table_placeholders(text: str) -> tuple[str, dict[str, str]]:
|
|
|
476
699
|
|
|
477
700
|
def render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
|
|
478
701
|
"""Render markdown with math, images, and tables properly escaped."""
|
|
702
|
+
text = normalize_mermaid_blocks(text)
|
|
703
|
+
text = normalize_fenced_code_blocks(text)
|
|
704
|
+
text = normalize_unbalanced_fences(text)
|
|
479
705
|
text = strip_paragraph_wrapped_tables(text)
|
|
480
706
|
text = normalize_footnote_definitions(text)
|
|
481
707
|
rendered, table_placeholders = extract_html_table_placeholders(text)
|
|
@@ -29,11 +29,63 @@ See https://github.com/adobe-type-tools/cmap-resources
|
|
|
29
29
|
|
|
30
30
|
<!-- This snippet is used in production (included from viewer.html) -->
|
|
31
31
|
<link rel="resource" type="application/l10n" href="locale/locale.properties">
|
|
32
|
-
<script
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
<script>
|
|
33
|
+
(function() {
|
|
34
|
+
var params = new URLSearchParams(window.location.search);
|
|
35
|
+
var cdnBase = params.get('cdn');
|
|
36
|
+
if (cdnBase) {
|
|
37
|
+
cdnBase = cdnBase.replace(/\/+$/, '');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function loadStyle(url, fallbackUrl) {
|
|
41
|
+
var link = document.createElement('link');
|
|
42
|
+
link.rel = 'stylesheet';
|
|
43
|
+
link.href = url;
|
|
44
|
+
if (fallbackUrl) {
|
|
45
|
+
link.onerror = function() {
|
|
46
|
+
if (link.dataset.fallbackLoaded) return;
|
|
47
|
+
link.dataset.fallbackLoaded = '1';
|
|
48
|
+
link.href = fallbackUrl;
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
document.head.appendChild(link);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function loadScript(url, fallbackUrl, onload) {
|
|
55
|
+
var script = document.createElement('script');
|
|
56
|
+
script.src = url;
|
|
57
|
+
script.defer = true;
|
|
58
|
+
script.onload = function() {
|
|
59
|
+
if (onload) onload();
|
|
60
|
+
};
|
|
61
|
+
script.onerror = function() {
|
|
62
|
+
if (!fallbackUrl) return;
|
|
63
|
+
var fallback = document.createElement('script');
|
|
64
|
+
fallback.src = fallbackUrl;
|
|
65
|
+
fallback.defer = true;
|
|
66
|
+
fallback.onload = function() {
|
|
67
|
+
if (onload) onload();
|
|
68
|
+
};
|
|
69
|
+
document.head.appendChild(fallback);
|
|
70
|
+
};
|
|
71
|
+
document.head.appendChild(script);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
var viewerCss = cdnBase ? cdnBase + '/web/viewer.css' : 'viewer.css';
|
|
75
|
+
loadStyle(viewerCss, 'viewer.css');
|
|
76
|
+
|
|
77
|
+
var pdfjsUrl = cdnBase ? cdnBase + '/build/pdf.js' : '../build/pdf.js';
|
|
78
|
+
loadScript(pdfjsUrl, '../build/pdf.js', function() {
|
|
79
|
+
var workerUrl = cdnBase ? cdnBase + '/build/pdf.worker.js' : '../build/pdf.worker.js';
|
|
80
|
+
if (window.pdfjsLib) {
|
|
81
|
+
window.pdfjsLib.GlobalWorkerOptions.workerSrc = workerUrl;
|
|
82
|
+
}
|
|
83
|
+
var viewerJs = 'viewer.js';
|
|
84
|
+
var viewerFallback = cdnBase ? cdnBase + '/web/viewer.js' : null;
|
|
85
|
+
loadScript(viewerJs, viewerFallback);
|
|
86
|
+
});
|
|
87
|
+
})();
|
|
88
|
+
</script>
|
|
37
89
|
</head>
|
|
38
90
|
|
|
39
91
|
<body tabindex="1">
|
|
@@ -1637,6 +1637,10 @@ exports.PDFViewerApplication = PDFViewerApplication;
|
|
|
1637
1637
|
return;
|
|
1638
1638
|
}
|
|
1639
1639
|
try {
|
|
1640
|
+
const params = new URLSearchParams(window.location.search);
|
|
1641
|
+
if (params.get("allow_origin") === "1" || params.get("disable_origin_check") === "1") {
|
|
1642
|
+
return;
|
|
1643
|
+
}
|
|
1640
1644
|
const viewerOrigin = new URL(window.location.href).origin || "null";
|
|
1641
1645
|
if (HOSTED_VIEWER_ORIGINS.includes(viewerOrigin)) {
|
|
1642
1646
|
return;
|
|
@@ -14096,4 +14100,4 @@ if (document.readyState === "interactive" || document.readyState === "complete")
|
|
|
14096
14100
|
|
|
14097
14101
|
/******/ })()
|
|
14098
14102
|
;
|
|
14099
|
-
//# sourceMappingURL=viewer.js.map
|
|
14103
|
+
//# sourceMappingURL=viewer.js.map
|
|
@@ -223,11 +223,18 @@ header a {
|
|
|
223
223
|
|
|
224
224
|
.markmap {
|
|
225
225
|
width: 100%;
|
|
226
|
-
height: 420px;
|
|
227
226
|
border: 1px solid #e2e8f0;
|
|
228
227
|
border-radius: 12px;
|
|
229
228
|
background: #ffffff;
|
|
230
229
|
margin: 12px 0;
|
|
230
|
+
padding: 8px;
|
|
231
|
+
overflow-x: auto;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
.markmap > svg {
|
|
235
|
+
width: 100%;
|
|
236
|
+
min-height: 240px;
|
|
237
|
+
display: block;
|
|
231
238
|
}
|
|
232
239
|
|
|
233
240
|
/* Utilities */
|