deepresearch-flow 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. deepresearch_flow/paper/cli.py +63 -0
  2. deepresearch_flow/paper/config.py +87 -12
  3. deepresearch_flow/paper/db.py +1041 -34
  4. deepresearch_flow/paper/db_ops.py +145 -26
  5. deepresearch_flow/paper/extract.py +1546 -152
  6. deepresearch_flow/paper/prompt_templates/deep_read_phi_system.j2 +8 -0
  7. deepresearch_flow/paper/prompt_templates/deep_read_phi_user.j2 +396 -0
  8. deepresearch_flow/paper/prompt_templates/deep_read_system.j2 +2 -0
  9. deepresearch_flow/paper/prompt_templates/deep_read_user.j2 +272 -40
  10. deepresearch_flow/paper/prompt_templates/eight_questions_phi_system.j2 +7 -0
  11. deepresearch_flow/paper/prompt_templates/eight_questions_phi_user.j2 +135 -0
  12. deepresearch_flow/paper/prompt_templates/eight_questions_system.j2 +2 -0
  13. deepresearch_flow/paper/prompt_templates/eight_questions_user.j2 +4 -0
  14. deepresearch_flow/paper/prompt_templates/simple_phi_system.j2 +8 -0
  15. deepresearch_flow/paper/prompt_templates/simple_phi_user.j2 +31 -0
  16. deepresearch_flow/paper/prompt_templates/simple_system.j2 +2 -0
  17. deepresearch_flow/paper/prompt_templates/simple_user.j2 +2 -0
  18. deepresearch_flow/paper/providers/azure_openai.py +45 -3
  19. deepresearch_flow/paper/providers/openai_compatible.py +45 -3
  20. deepresearch_flow/paper/schemas/deep_read_phi_schema.json +31 -0
  21. deepresearch_flow/paper/schemas/deep_read_schema.json +1 -0
  22. deepresearch_flow/paper/schemas/default_paper_schema.json +6 -0
  23. deepresearch_flow/paper/schemas/eight_questions_schema.json +1 -0
  24. deepresearch_flow/paper/snapshot/__init__.py +4 -0
  25. deepresearch_flow/paper/snapshot/api.py +941 -0
  26. deepresearch_flow/paper/snapshot/builder.py +965 -0
  27. deepresearch_flow/paper/snapshot/identity.py +239 -0
  28. deepresearch_flow/paper/snapshot/schema.py +245 -0
  29. deepresearch_flow/paper/snapshot/tests/__init__.py +2 -0
  30. deepresearch_flow/paper/snapshot/tests/test_identity.py +123 -0
  31. deepresearch_flow/paper/snapshot/text.py +154 -0
  32. deepresearch_flow/paper/template_registry.py +40 -0
  33. deepresearch_flow/paper/templates/deep_read.md.j2 +4 -0
  34. deepresearch_flow/paper/templates/deep_read_phi.md.j2 +44 -0
  35. deepresearch_flow/paper/templates/default_paper.md.j2 +4 -0
  36. deepresearch_flow/paper/templates/eight_questions.md.j2 +4 -0
  37. deepresearch_flow/paper/web/app.py +10 -3
  38. deepresearch_flow/paper/web/markdown.py +174 -8
  39. deepresearch_flow/paper/web/static/css/main.css +8 -1
  40. deepresearch_flow/paper/web/static/js/detail.js +46 -12
  41. deepresearch_flow/paper/web/templates/detail.html +9 -0
  42. deepresearch_flow/paper/web/text.py +8 -4
  43. deepresearch_flow/recognize/cli.py +380 -103
  44. deepresearch_flow/recognize/markdown.py +31 -7
  45. deepresearch_flow/recognize/math.py +47 -12
  46. deepresearch_flow/recognize/mermaid.py +320 -10
  47. deepresearch_flow/recognize/organize.py +35 -16
  48. deepresearch_flow/translator/cli.py +71 -20
  49. deepresearch_flow/translator/engine.py +220 -81
  50. deepresearch_flow/translator/fixers.py +15 -0
  51. deepresearch_flow/translator/prompts.py +19 -2
  52. deepresearch_flow/translator/protector.py +15 -3
  53. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/METADATA +407 -33
  54. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/RECORD +58 -42
  55. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/WHEEL +1 -1
  56. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/entry_points.txt +0 -0
  57. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/licenses/LICENSE +0 -0
  58. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,154 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Iterable
5
+
6
+ from markdown_it import MarkdownIt
7
+
8
+
9
+ _HTML_TABLE_RE = re.compile(r"<table\b.*?</table>", re.IGNORECASE | re.DOTALL)
10
+ _TAG_RE = re.compile(r"<[^>]+>")
11
+ _WS_RE = re.compile(r"\s+")
12
+
13
+
14
+ def _is_cjk_char(ch: str) -> bool:
15
+ code = ord(ch)
16
+ return (
17
+ 0x3400 <= code <= 0x4DBF # CJK Unified Ideographs Extension A
18
+ or 0x4E00 <= code <= 0x9FFF # CJK Unified Ideographs
19
+ or 0xF900 <= code <= 0xFAFF # CJK Compatibility Ideographs
20
+ or 0x3040 <= code <= 0x309F # Hiragana
21
+ or 0x30A0 <= code <= 0x30FF # Katakana
22
+ or 0xAC00 <= code <= 0xD7AF # Hangul syllables
23
+ )
24
+
25
+
26
+ def insert_cjk_spaces(text: str) -> str:
27
+ out: list[str] = []
28
+ prev_cjk = False
29
+ for ch in text:
30
+ cur_cjk = _is_cjk_char(ch)
31
+ if prev_cjk and cur_cjk:
32
+ out.append(" ")
33
+ out.append(ch)
34
+ prev_cjk = cur_cjk
35
+ return "".join(out)
36
+
37
+
38
+ def remove_cjk_spaces(text: str) -> str:
39
+ if " " not in text:
40
+ return text
41
+ chars = list(text)
42
+ out: list[str] = []
43
+ for idx, ch in enumerate(chars):
44
+ if ch == " " and 0 < idx < len(chars) - 1:
45
+ if _is_cjk_char(chars[idx - 1]) and _is_cjk_char(chars[idx + 1]):
46
+ continue
47
+ out.append(ch)
48
+ return "".join(out)
49
+
50
+
51
+ def merge_adjacent_markers(text: str, *, start_marker: str = "[[[", end_marker: str = "]]]") -> str:
52
+ needle = f"{end_marker}{start_marker}"
53
+ while needle in text:
54
+ text = text.replace(needle, "")
55
+ return text
56
+
57
+
58
+ def _md_renderer() -> MarkdownIt:
59
+ md = MarkdownIt("commonmark", {"html": False, "linkify": False})
60
+ md.enable("table")
61
+ return md
62
+
63
+
64
+ def markdown_to_plain_text(markdown: str) -> str:
65
+ if not markdown:
66
+ return ""
67
+ text = _HTML_TABLE_RE.sub(" ", markdown)
68
+ md = _md_renderer()
69
+ tokens = md.parse(text)
70
+
71
+ out: list[str] = []
72
+ in_table = 0
73
+ for token in tokens:
74
+ if token.type == "table_open":
75
+ in_table += 1
76
+ continue
77
+ if token.type == "table_close":
78
+ in_table = max(0, in_table - 1)
79
+ continue
80
+ if in_table:
81
+ continue
82
+ if token.type != "inline":
83
+ continue
84
+ for child in token.children or []:
85
+ if child.type in {"text", "code_inline"}:
86
+ out.append(child.content)
87
+ elif child.type == "softbreak":
88
+ out.append("\n")
89
+ elif child.type == "hardbreak":
90
+ out.append("\n")
91
+ elif child.type == "image":
92
+ if child.content:
93
+ out.append(child.content)
94
+
95
+ collapsed = _WS_RE.sub(" ", " ".join(out)).strip()
96
+ collapsed = _TAG_RE.sub(" ", collapsed)
97
+ return _WS_RE.sub(" ", collapsed).strip()
98
+
99
+
100
+ def normalize_query_punctuation(text: str) -> str:
101
+ if not text:
102
+ return ""
103
+ return re.sub(r"[,。、《》、;:!?()【】「」『』·…—]+", " ", text)
104
+
105
+
106
+ def split_mixed_cjk_latin(token: str) -> list[str]:
107
+ if not token:
108
+ return []
109
+ parts: list[str] = []
110
+ buf: list[str] = []
111
+ buf_is_cjk: bool | None = None
112
+ for ch in token:
113
+ cur_is_cjk = _is_cjk_char(ch)
114
+ if buf_is_cjk is None or cur_is_cjk == buf_is_cjk:
115
+ buf.append(ch)
116
+ buf_is_cjk = cur_is_cjk
117
+ continue
118
+ parts.append("".join(buf))
119
+ buf = [ch]
120
+ buf_is_cjk = cur_is_cjk
121
+ if buf:
122
+ parts.append("".join(buf))
123
+ return parts
124
+
125
+
126
+ def rewrite_search_query(user_query: str) -> str:
127
+ cleaned = normalize_query_punctuation(user_query)
128
+ cleaned = _WS_RE.sub(" ", cleaned).strip()
129
+ if not cleaned:
130
+ return ""
131
+
132
+ out: list[str] = []
133
+ for raw in cleaned.split(" "):
134
+ if not raw:
135
+ continue
136
+ upper = raw.upper()
137
+ if upper in {"AND", "OR"}:
138
+ out.append(upper)
139
+ continue
140
+
141
+ segments = split_mixed_cjk_latin(raw)
142
+ for seg in segments:
143
+ if not seg:
144
+ continue
145
+ if all(_is_cjk_char(ch) for ch in seg):
146
+ phrase = insert_cjk_spaces(seg)
147
+ out.append(f"\"{phrase}\"")
148
+ else:
149
+ safe = re.sub(r"[^0-9A-Za-z._+-]+", "", seg)
150
+ if safe:
151
+ out.append(safe.lower())
152
+
153
+ return " ".join(out)
154
+
@@ -24,6 +24,7 @@ class TemplateBundle:
24
24
  class StageDefinition:
25
25
  name: str
26
26
  fields: list[str]
27
+ depends_on: list[str] | None = None
27
28
 
28
29
 
29
30
  _TEMPLATES: dict[str, TemplateBundle] = {
@@ -34,6 +35,13 @@ _TEMPLATES: dict[str, TemplateBundle] = {
34
35
  schema_file="default_paper_schema.json",
35
36
  render_template="default_paper.md.j2",
36
37
  ),
38
+ "simple_phi": TemplateBundle(
39
+ name="simple_phi",
40
+ prompt_system="simple_phi_system.j2",
41
+ prompt_user="simple_phi_user.j2",
42
+ schema_file="default_paper_schema.json",
43
+ render_template="default_paper.md.j2",
44
+ ),
37
45
  "deep_read": TemplateBundle(
38
46
  name="deep_read",
39
47
  prompt_system="deep_read_system.j2",
@@ -41,6 +49,13 @@ _TEMPLATES: dict[str, TemplateBundle] = {
41
49
  schema_file="deep_read_schema.json",
42
50
  render_template="deep_read.md.j2",
43
51
  ),
52
+ "deep_read_phi": TemplateBundle(
53
+ name="deep_read_phi",
54
+ prompt_system="deep_read_phi_system.j2",
55
+ prompt_user="deep_read_phi_user.j2",
56
+ schema_file="deep_read_phi_schema.json",
57
+ render_template="deep_read_phi.md.j2",
58
+ ),
44
59
  "eight_questions": TemplateBundle(
45
60
  name="eight_questions",
46
61
  prompt_system="eight_questions_system.j2",
@@ -48,6 +63,13 @@ _TEMPLATES: dict[str, TemplateBundle] = {
48
63
  schema_file="eight_questions_schema.json",
49
64
  render_template="eight_questions.md.j2",
50
65
  ),
66
+ "eight_questions_phi": TemplateBundle(
67
+ name="eight_questions_phi",
68
+ prompt_system="eight_questions_phi_system.j2",
69
+ prompt_user="eight_questions_phi_user.j2",
70
+ schema_file="eight_questions_schema.json",
71
+ render_template="eight_questions.md.j2",
72
+ ),
51
73
  "three_pass": TemplateBundle(
52
74
  name="three_pass",
53
75
  prompt_system="three_pass_system.j2",
@@ -75,6 +97,14 @@ _STAGES: dict[str, list[StageDefinition]] = {
75
97
  StageDefinition("module_g", ["module_g"]),
76
98
  StageDefinition("module_h", ["module_h"]),
77
99
  ],
100
+ "deep_read_phi": [
101
+ StageDefinition("module_m1", ["module_m1"]),
102
+ StageDefinition("module_m2", ["module_m2"]),
103
+ StageDefinition("module_m3", ["module_m3"]),
104
+ StageDefinition("module_m4", ["module_m4"]),
105
+ StageDefinition("module_m5", ["module_m5"]),
106
+ StageDefinition("module_m6", ["module_m6"]),
107
+ ],
78
108
  "eight_questions": [
79
109
  StageDefinition(
80
110
  "questions_1to4",
@@ -85,6 +115,16 @@ _STAGES: dict[str, list[StageDefinition]] = {
85
115
  ["question5", "question6", "question7", "question8"],
86
116
  ),
87
117
  ],
118
+ "eight_questions_phi": [
119
+ StageDefinition(
120
+ "questions_1to4",
121
+ ["question1", "question2", "question3", "question4"],
122
+ ),
123
+ StageDefinition(
124
+ "questions_5to8",
125
+ ["question5", "question6", "question7", "question8"],
126
+ ),
127
+ ],
88
128
  "three_pass": [
89
129
  StageDefinition("step1_summary", ["step1_summary"]),
90
130
  StageDefinition("step2_analysis", ["step2_analysis"]),
@@ -3,6 +3,10 @@
3
3
  {% set is_zh = output_language == "zh" %}
4
4
  **{{ "作者 / Authors" if is_zh else "Authors" }}:** {{ paper_authors | join(", ") }}
5
5
 
6
+ {% if paper_institutions %}
7
+ **{{ "单位 / Institutions" if is_zh else "Institutions" }}:** {{ paper_institutions | join(", ") }}
8
+ {% endif %}
9
+
6
10
  {% if output_language %}
7
11
  **{{ "输出语言 / Output Language" if is_zh else "Output Language" }}:** {{ output_language }}
8
12
  {% endif %}
@@ -0,0 +1,44 @@
1
+ # {{ paper_title }}
2
+
3
+ {% set is_zh = output_language == "zh" %}
4
+ **{{ "作者 / Authors" if is_zh else "Authors" }}:** {{ paper_authors | join(", ") }}
5
+
6
+ {% if paper_institutions %}
7
+ **{{ "单位 / Institutions" if is_zh else "Institutions" }}:** {{ paper_institutions | join(", ") }}
8
+ {% endif %}
9
+
10
+ {% if output_language %}
11
+ **{{ "输出语言 / Output Language" if is_zh else "Output Language" }}:** {{ output_language }}
12
+ {% endif %}
13
+
14
+ {% if publication_date %}
15
+ **{{ "发表日期 / Publication Date" if is_zh else "Publication Date" }}:** {{ publication_date }}
16
+ {% endif %}
17
+
18
+ {% if publication_venue %}
19
+ **{{ "期刊/会议 / Publication Venue" if is_zh else "Publication Venue" }}:** {{ publication_venue }}
20
+ {% endif %}
21
+
22
+ ## {{ "模块 M1:对齐目标 + 输入校验 + 论文地图 + Exhibit全量索引 + markmap大纲脑图" if is_zh else "Module M1: Alignment + Input Check + Paper Map + Exhibit Index + Markmap" }}
23
+
24
+ {{ module_m1 }}
25
+
26
+ ## {{ "模块 M2:第一遍鸟瞰 + Exhibit客观打分排序" if is_zh else "Module M2: First Pass Overview + Exhibit Scoring" }}
27
+
28
+ {{ module_m2 }}
29
+
30
+ ## {{ "模块 M3:概念与术语工程 + 辩论谱系" if is_zh else "Module M3: Concepts + Debate Lineage" }}
31
+
32
+ {{ module_m3 }}
33
+
34
+ ## {{ "模块 M4:论证重建 + Top Exhibits 深读(上半)" if is_zh else "Module M4: Argument Map + Top Exhibits (Part 1)" }}
35
+
36
+ {{ module_m4 }}
37
+
38
+ ## {{ "模块 M5:深度审视 + objection mining + Top Exhibits 深读(下半)" if is_zh else "Module M5: Deep Review + Objection Mining + Top Exhibits (Part 2)" }}
39
+
40
+ {{ module_m5 }}
41
+
42
+ ## {{ "模块 M6:写作级产出包" if is_zh else "Module M6: Writing-Ready Output Pack" }}
43
+
44
+ {{ module_m6 }}
@@ -3,6 +3,10 @@
3
3
  {% set is_zh = output_language == "zh" %}
4
4
  **{{ "作者 / Authors" if is_zh else "Authors" }}:** {{ paper_authors | join(", ") }}
5
5
 
6
+ {% if paper_institutions %}
7
+ **{{ "单位 / Institutions" if is_zh else "Institutions" }}:** {{ paper_institutions | join(", ") }}
8
+ {% endif %}
9
+
6
10
  {% if output_language %}
7
11
  **{{ "输出语言 / Output Language" if is_zh else "Output Language" }}:** {{ output_language }}
8
12
  {% endif %}
@@ -4,6 +4,10 @@
4
4
 
5
5
  **{{ "作者 / Authors" if is_zh else "Authors" }}:** {{ paper_authors | join(", ") }}
6
6
 
7
+ {% if paper_institutions %}
8
+ **{{ "单位 / Institutions" if is_zh else "Institutions" }}:** {{ paper_institutions | join(", ") }}
9
+ {% endif %}
10
+
7
11
  {% if output_language %}
8
12
  **{{ "输出语言 / Output Language" if is_zh else "Output Language" }}:** {{ output_language }}
9
13
  {% endif %}
@@ -91,7 +91,7 @@ def create_app(
91
91
  pdf_roots=pdf_roots,
92
92
  )
93
93
  md = create_md_renderer()
94
- static_base_url = static_base_url or os.getenv("PAPER_DB_STATIC_BASE_URL")
94
+ static_base_url = static_base_url or os.getenv("PAPER_DB_STATIC_BASE") or os.getenv("PAPER_DB_STATIC_BASE_URL")
95
95
  static_mode = _normalize_static_mode(static_mode or os.getenv("PAPER_DB_STATIC_MODE"))
96
96
  resolved_mode = _resolve_static_mode(static_mode, static_base_url)
97
97
  export_dir_value = static_export_dir or os.getenv("PAPER_DB_STATIC_EXPORT_DIR")
@@ -111,7 +111,12 @@ def create_app(
111
111
  asset_config = None
112
112
  if resolved_mode == "prod":
113
113
  if not static_base_url:
114
- logger.warning("Static mode set to prod without base URL; falling back to dev asset routes.")
114
+ logger.warning(
115
+ "Static mode set to prod without base URL; falling back to dev asset routes "
116
+ "(static_mode=%s, static_base_url=%s)",
117
+ static_mode,
118
+ static_base_url or "<empty>",
119
+ )
115
120
  resolved_mode = "dev"
116
121
  else:
117
122
  asset_config = build_static_assets(
@@ -149,8 +154,10 @@ def create_app(
149
154
  )
150
155
  elif pdf_roots:
151
156
  logger.warning(
152
- "PDF.js viewer assets not found at %s; PDF Viewer mode will be unavailable.",
157
+ "PDF.js viewer assets not found at %s; PDF Viewer mode will be unavailable "
158
+ "(pdf_roots=%d).",
153
159
  PDFJS_STATIC_DIR,
160
+ len(pdf_roots),
154
161
  )
155
162
  if STATIC_DIR.exists():
156
163
  routes.append(
@@ -41,13 +41,88 @@ def strip_paragraph_wrapped_tables(text: str) -> str:
41
41
 
42
42
 
43
43
  def normalize_footnote_definitions(text: str) -> str:
44
- """Normalize footnote definitions to the markdown-it footnote format."""
44
+ """Normalize footnotes and numbered notes to markdown-it footnote format."""
45
45
  lines = text.splitlines()
46
- for idx, line in enumerate(lines):
46
+ out: list[str] = []
47
+ in_fence = False
48
+ fence_char = ""
49
+ fence_len = 0
50
+ in_notes = False
51
+ notes_level: int | None = None
52
+ notes_heading_re = re.compile(
53
+ r"^#{1,6}\s*(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\b",
54
+ re.IGNORECASE,
55
+ )
56
+ notes_heading_plain_re = re.compile(
57
+ r"^(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\s*:?$",
58
+ re.IGNORECASE,
59
+ )
60
+ last_note_index: int | None = None
61
+
62
+ for line in lines:
63
+ stripped = line.lstrip()
64
+ if stripped.startswith(("```", "~~~")):
65
+ run_len = 0
66
+ while run_len < len(stripped) and stripped[run_len] == stripped[0]:
67
+ run_len += 1
68
+ if not in_fence:
69
+ in_fence = True
70
+ fence_char = stripped[0]
71
+ fence_len = run_len
72
+ elif stripped[0] == fence_char and run_len >= fence_len:
73
+ in_fence = False
74
+ fence_char = ""
75
+ fence_len = 0
76
+ out.append(line)
77
+ continue
78
+
79
+ if in_fence:
80
+ out.append(line)
81
+ continue
82
+
83
+ heading_match = notes_heading_re.match(stripped)
84
+ if heading_match:
85
+ in_notes = True
86
+ notes_level = len(stripped.split(" ")[0].lstrip("#"))
87
+ last_note_index = None
88
+ elif notes_heading_plain_re.match(stripped):
89
+ in_notes = True
90
+ notes_level = None
91
+ last_note_index = None
92
+ elif re.match(r"^#{1,6}\s+", stripped):
93
+ if notes_level is not None:
94
+ level = len(stripped.split(" ")[0].lstrip("#"))
95
+ if level <= notes_level:
96
+ in_notes = False
97
+ notes_level = None
98
+ last_note_index = None
99
+
47
100
  match = re.match(r"^\[\^([0-9]+)\]\s+", line)
48
101
  if match:
49
- lines[idx] = re.sub(r"^\[\^([0-9]+)\]\s+", r"[^\1]: ", line)
50
- return "\n".join(lines)
102
+ out.append(re.sub(r"^\[\^([0-9]+)\]\s+", r"[^\1]: ", line))
103
+ continue
104
+
105
+ if in_notes:
106
+ list_match = re.match(r"^\s*(\d{1,4})[.)]\s+", line)
107
+ if list_match:
108
+ number = list_match.group(1)
109
+ rest = line[list_match.end() :].strip()
110
+ out.append(f"[^{number}]: {rest}")
111
+ last_note_index = len(out) - 1
112
+ continue
113
+ if last_note_index is not None:
114
+ if line.strip() == "":
115
+ out.append(line)
116
+ last_note_index = None
117
+ continue
118
+ if line.startswith((" ", "\t")):
119
+ out[last_note_index] = f"{out[last_note_index]} {line.strip()}"
120
+ continue
121
+
122
+ line = re.sub(r"(?<!\^)\[(\d{1,4})\]", r"[^\1]", line)
123
+ out.append(line)
124
+
125
+ return "\n".join(out)
51
126
 
52
127
 
53
128
  def normalize_markdown_images(text: str) -> str:
@@ -115,6 +190,68 @@ def normalize_fenced_code_blocks(text: str) -> str:
115
190
  return "\n".join(out)
116
191
 
117
192
 
193
+ def normalize_mermaid_blocks(text: str) -> str:
194
+ """Keep mermaid fences clean by moving legend text outside the block."""
195
+ lines = text.splitlines()
196
+ out: list[str] = []
197
+ in_mermaid = False
198
+ fence_char = ""
199
+ fence_len = 0
200
+ mermaid_lines: list[str] = []
201
+ legend_lines: list[str] = []
202
+
203
+ def is_legend(line: str) -> bool:
204
+ stripped = line.strip()
205
+ if not stripped:
206
+ return False
207
+ if stripped.startswith("图例") or stripped.lower().startswith("legend"):
208
+ return True
209
+ return "节点定位" in stripped
210
+
211
+ for line in lines:
212
+ stripped = line.lstrip()
213
+ if stripped.startswith(("```", "~~~")):
214
+ run_len = 0
215
+ while run_len < len(stripped) and stripped[run_len] == stripped[0]:
216
+ run_len += 1
217
+ rest = stripped[run_len:].strip()
218
+ if not in_mermaid and rest.lower().startswith("mermaid"):
219
+ in_mermaid = True
220
+ fence_char = stripped[0]
221
+ fence_len = run_len
222
+ mermaid_lines = []
223
+ legend_lines = []
224
+ out.append(line)
225
+ continue
226
+ if in_mermaid and stripped[0] == fence_char and run_len >= fence_len and rest == "":
227
+ out.extend(mermaid_lines)
228
+ out.append(line)
229
+ out.extend(legend_lines)
230
+ in_mermaid = False
231
+ fence_char = ""
232
+ fence_len = 0
233
+ mermaid_lines = []
234
+ legend_lines = []
235
+ continue
236
+ out.append(line)
237
+ continue
238
+
239
+ if in_mermaid:
240
+ if is_legend(line):
241
+ legend_lines.append(line)
242
+ else:
243
+ mermaid_lines.append(line)
244
+ continue
245
+
246
+ out.append(line)
247
+
248
+ if in_mermaid:
249
+ out.extend(mermaid_lines)
250
+ out.extend(legend_lines)
251
+
252
+ return "\n".join(out)
253
+
254
+
118
255
  def normalize_unbalanced_fences(text: str) -> str:
119
256
  """Drop unmatched opening fences so later content still renders."""
120
257
  lines = text.splitlines()
@@ -122,6 +259,7 @@ def normalize_unbalanced_fences(text: str) -> str:
122
259
  in_fence = False
123
260
  fence_char = ""
124
261
  fence_len = 0
262
+ fence_has_content = False
125
263
  fence_open_indices: list[int] = []
126
264
  fence_re = re.compile(r"([`~]{3,})(.*)$")
127
265
 
@@ -135,19 +273,46 @@ def normalize_unbalanced_fences(text: str) -> str:
135
273
  run = match.group(1)
136
274
  fence = run[0]
137
275
  run_len = len(run)
276
+ rest = match.group(2) or ""
277
+ has_info = bool(rest.strip())
138
278
  if not in_fence:
139
279
  in_fence = True
140
280
  fence_char = fence
141
281
  fence_len = run_len
282
+ fence_has_content = False
142
283
  fence_open_indices.append(len(out))
143
284
  is_fence = True
144
- elif fence == fence_char and run_len >= fence_len:
145
- in_fence = False
146
- fence_char = ""
147
- fence_len = 0
285
+ elif fence == fence_char and run_len >= fence_len and not has_info:
286
+ if not fence_has_content:
287
+ if fence_open_indices:
288
+ out.pop(fence_open_indices[-1])
289
+ fence_open_indices.pop()
290
+ in_fence = True
291
+ fence_char = fence
292
+ fence_len = run_len
293
+ fence_has_content = False
294
+ fence_open_indices.append(len(out))
295
+ is_fence = True
296
+ else:
297
+ in_fence = False
298
+ fence_char = ""
299
+ fence_len = 0
300
+ fence_has_content = False
301
+ is_fence = True
302
+ elif fence == fence_char and run_len >= fence_len and has_info:
303
+ if fence_open_indices:
304
+ out.pop(fence_open_indices[-1])
305
+ fence_open_indices.pop()
306
+ in_fence = True
307
+ fence_char = fence
308
+ fence_len = run_len
309
+ fence_has_content = False
310
+ fence_open_indices.append(len(out))
148
311
  is_fence = True
149
312
 
150
313
  out.append(line)
314
+ if in_fence and not is_fence and line.strip():
315
+ fence_has_content = True
151
316
 
152
317
  if in_fence and fence_open_indices:
153
318
  out.pop(fence_open_indices[-1])
@@ -534,6 +699,7 @@ def extract_html_table_placeholders(text: str) -> tuple[str, dict[str, str]]:
534
699
 
535
700
  def render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
536
701
  """Render markdown with math, images, and tables properly escaped."""
702
+ text = normalize_mermaid_blocks(text)
537
703
  text = normalize_fenced_code_blocks(text)
538
704
  text = normalize_unbalanced_fences(text)
539
705
  text = strip_paragraph_wrapped_tables(text)
@@ -223,11 +223,18 @@ header a {
223
223
 
224
224
  .markmap {
225
225
  width: 100%;
226
- height: 420px;
227
226
  border: 1px solid #e2e8f0;
228
227
  border-radius: 12px;
229
228
  background: #ffffff;
230
229
  margin: 12px 0;
230
+ padding: 8px;
231
+ overflow-x: auto;
232
+ }
233
+
234
+ .markmap > svg {
235
+ width: 100%;
236
+ min-height: 240px;
237
+ display: block;
231
238
  }
232
239
 
233
240
  /* Utilities */
@@ -421,24 +421,58 @@
421
421
  var content = document.getElementById('content');
422
422
  if (!content) return;
423
423
 
424
- // Markmap: convert fenced markmap blocks to svg mindmaps
425
- if (window.markmap && window.markmap.Transformer && window.markmap.Markmap) {
426
- var transformer = new window.markmap.Transformer();
427
- document.querySelectorAll('code.language-markmap').forEach(function(code) {
428
- var pre = code.parentElement;
429
- if (!pre) return;
430
- var svg = document.createElement('svg');
431
- svg.className = 'markmap';
432
- pre.replaceWith(svg);
424
+ // Markmap: convert fenced markmap blocks to autoloader containers
425
+ var markmapBlocks = 0;
426
+ document.querySelectorAll('code.language-markmap').forEach(function(code) {
427
+ var pre = code.parentElement;
428
+ if (!pre) return;
429
+ var wrapper = document.createElement('div');
430
+ wrapper.className = 'markmap';
431
+ var template = document.createElement('script');
432
+ template.type = 'text/template';
433
+ template.textContent = code.textContent || '';
434
+ wrapper.appendChild(template);
435
+ pre.replaceWith(wrapper);
436
+ markmapBlocks += 1;
437
+ });
438
+ function resizeMarkmaps() {
439
+ document.querySelectorAll('.markmap svg').forEach(function(svg) {
433
440
  try {
434
- var result = transformer.transform(code.textContent || '');
435
- window.markmap.Markmap.create(svg, null, result.root);
441
+ var bbox = svg.getBBox();
442
+ if (!bbox || !bbox.height) {
443
+ svg.style.height = '800px';
444
+ svg.style.width = '100%';
445
+ return;
446
+ }
447
+ var height = Math.ceil(bbox.height * 2);
448
+ svg.style.height = height + 'px';
449
+ if (bbox.width && bbox.width > svg.clientWidth) {
450
+ svg.style.width = Math.ceil(bbox.width * 2) + 'px';
451
+ if (svg.parentElement) {
452
+ svg.parentElement.style.overflowX = 'auto';
453
+ }
454
+ } else {
455
+ svg.style.width = '100%';
456
+ }
436
457
  } catch (err) {
437
- // Ignore markmap parse errors
458
+ // Ignore sizing errors
438
459
  }
439
460
  });
440
461
  }
441
462
 
463
+ if (markmapBlocks && window.markmap && window.markmap.autoLoader && window.markmap.autoLoader.renderAll) {
464
+ window.markmap.autoLoader.renderAll();
465
+ setTimeout(resizeMarkmaps, 120);
466
+ setTimeout(resizeMarkmaps, 600);
467
+ setTimeout(resizeMarkmaps, 1600);
468
+ if (!window.__markmapResizeBound) {
469
+ window.__markmapResizeBound = true;
470
+ window.addEventListener('resize', function() {
471
+ setTimeout(resizeMarkmaps, 120);
472
+ });
473
+ }
474
+ }
475
+
442
476
  // Mermaid: convert fenced code blocks to mermaid divs
443
477
  document.querySelectorAll('code.language-mermaid').forEach(function(code) {
444
478
  var pre = code.parentElement;