deepresearch-flow 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. deepresearch_flow/paper/db_ops.py +21 -7
  2. deepresearch_flow/paper/prompt_templates/deep_read_phi_system.j2 +6 -0
  3. deepresearch_flow/paper/prompt_templates/deep_read_phi_user.j2 +391 -0
  4. deepresearch_flow/paper/prompt_templates/eight_questions_phi_system.j2 +6 -0
  5. deepresearch_flow/paper/prompt_templates/eight_questions_phi_user.j2 +133 -0
  6. deepresearch_flow/paper/prompt_templates/simple_phi_system.j2 +6 -0
  7. deepresearch_flow/paper/prompt_templates/simple_phi_user.j2 +31 -0
  8. deepresearch_flow/paper/schemas/deep_read_phi_schema.json +30 -0
  9. deepresearch_flow/paper/template_registry.py +39 -0
  10. deepresearch_flow/paper/templates/deep_read_phi.md.j2 +40 -0
  11. deepresearch_flow/paper/web/markdown.py +174 -8
  12. deepresearch_flow/paper/web/static/css/main.css +8 -1
  13. deepresearch_flow/paper/web/static/js/detail.js +46 -12
  14. deepresearch_flow/paper/web/templates/detail.html +9 -0
  15. deepresearch_flow/paper/web/text.py +8 -4
  16. deepresearch_flow/recognize/organize.py +9 -12
  17. deepresearch_flow/translator/fixers.py +15 -0
  18. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.5.1.dist-info}/METADATA +1 -1
  19. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.5.1.dist-info}/RECORD +23 -15
  20. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.5.1.dist-info}/WHEEL +0 -0
  21. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.5.1.dist-info}/entry_points.txt +0 -0
  22. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.5.1.dist-info}/licenses/LICENSE +0 -0
  23. {deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "type": "object",
4
+ "additionalProperties": false,
5
+ "required": [
6
+ "paper_title",
7
+ "paper_authors",
8
+ "publication_date",
9
+ "publication_venue",
10
+ "module_m1",
11
+ "module_m2",
12
+ "module_m3",
13
+ "module_m4",
14
+ "module_m5",
15
+ "module_m6"
16
+ ],
17
+ "properties": {
18
+ "paper_title": {"type": "string", "minLength": 1},
19
+ "paper_authors": {"type": "array", "items": {"type": "string"}},
20
+ "publication_date": {"type": "string"},
21
+ "publication_venue": {"type": "string"},
22
+ "module_m1": {"type": "string"},
23
+ "module_m2": {"type": "string"},
24
+ "module_m3": {"type": "string"},
25
+ "module_m4": {"type": "string"},
26
+ "module_m5": {"type": "string"},
27
+ "module_m6": {"type": "string"},
28
+ "output_language": {"type": "string"}
29
+ }
30
+ }
@@ -34,6 +34,13 @@ _TEMPLATES: dict[str, TemplateBundle] = {
34
34
  schema_file="default_paper_schema.json",
35
35
  render_template="default_paper.md.j2",
36
36
  ),
37
+ "simple_phi": TemplateBundle(
38
+ name="simple_phi",
39
+ prompt_system="simple_phi_system.j2",
40
+ prompt_user="simple_phi_user.j2",
41
+ schema_file="default_paper_schema.json",
42
+ render_template="default_paper.md.j2",
43
+ ),
37
44
  "deep_read": TemplateBundle(
38
45
  name="deep_read",
39
46
  prompt_system="deep_read_system.j2",
@@ -41,6 +48,13 @@ _TEMPLATES: dict[str, TemplateBundle] = {
41
48
  schema_file="deep_read_schema.json",
42
49
  render_template="deep_read.md.j2",
43
50
  ),
51
+ "deep_read_phi": TemplateBundle(
52
+ name="deep_read_phi",
53
+ prompt_system="deep_read_phi_system.j2",
54
+ prompt_user="deep_read_phi_user.j2",
55
+ schema_file="deep_read_phi_schema.json",
56
+ render_template="deep_read_phi.md.j2",
57
+ ),
44
58
  "eight_questions": TemplateBundle(
45
59
  name="eight_questions",
46
60
  prompt_system="eight_questions_system.j2",
@@ -48,6 +62,13 @@ _TEMPLATES: dict[str, TemplateBundle] = {
48
62
  schema_file="eight_questions_schema.json",
49
63
  render_template="eight_questions.md.j2",
50
64
  ),
65
+ "eight_questions_phi": TemplateBundle(
66
+ name="eight_questions_phi",
67
+ prompt_system="eight_questions_phi_system.j2",
68
+ prompt_user="eight_questions_phi_user.j2",
69
+ schema_file="eight_questions_schema.json",
70
+ render_template="eight_questions.md.j2",
71
+ ),
51
72
  "three_pass": TemplateBundle(
52
73
  name="three_pass",
53
74
  prompt_system="three_pass_system.j2",
@@ -75,6 +96,14 @@ _STAGES: dict[str, list[StageDefinition]] = {
75
96
  StageDefinition("module_g", ["module_g"]),
76
97
  StageDefinition("module_h", ["module_h"]),
77
98
  ],
99
+ "deep_read_phi": [
100
+ StageDefinition("module_m1", ["module_m1"]),
101
+ StageDefinition("module_m2", ["module_m2"]),
102
+ StageDefinition("module_m3", ["module_m3"]),
103
+ StageDefinition("module_m4", ["module_m4"]),
104
+ StageDefinition("module_m5", ["module_m5"]),
105
+ StageDefinition("module_m6", ["module_m6"]),
106
+ ],
78
107
  "eight_questions": [
79
108
  StageDefinition(
80
109
  "questions_1to4",
@@ -85,6 +114,16 @@ _STAGES: dict[str, list[StageDefinition]] = {
85
114
  ["question5", "question6", "question7", "question8"],
86
115
  ),
87
116
  ],
117
+ "eight_questions_phi": [
118
+ StageDefinition(
119
+ "questions_1to4",
120
+ ["question1", "question2", "question3", "question4"],
121
+ ),
122
+ StageDefinition(
123
+ "questions_5to8",
124
+ ["question5", "question6", "question7", "question8"],
125
+ ),
126
+ ],
88
127
  "three_pass": [
89
128
  StageDefinition("step1_summary", ["step1_summary"]),
90
129
  StageDefinition("step2_analysis", ["step2_analysis"]),
@@ -0,0 +1,40 @@
1
+ # {{ paper_title }}
2
+
3
+ {% set is_zh = output_language == "zh" %}
4
+ **{{ "作者 / Authors" if is_zh else "Authors" }}:** {{ paper_authors | join(", ") }}
5
+
6
+ {% if output_language %}
7
+ **{{ "输出语言 / Output Language" if is_zh else "Output Language" }}:** {{ output_language }}
8
+ {% endif %}
9
+
10
+ {% if publication_date %}
11
+ **{{ "发表日期 / Publication Date" if is_zh else "Publication Date" }}:** {{ publication_date }}
12
+ {% endif %}
13
+
14
+ {% if publication_venue %}
15
+ **{{ "期刊/会议 / Publication Venue" if is_zh else "Publication Venue" }}:** {{ publication_venue }}
16
+ {% endif %}
17
+
18
+ ## {{ "模块 M1:对齐目标 + 输入校验 + 论文地图 + Exhibit全量索引 + markmap大纲脑图" if is_zh else "Module M1: Alignment + Input Check + Paper Map + Exhibit Index + Markmap" }}
19
+
20
+ {{ module_m1 }}
21
+
22
+ ## {{ "模块 M2:第一遍鸟瞰 + Exhibit客观打分排序" if is_zh else "Module M2: First Pass Overview + Exhibit Scoring" }}
23
+
24
+ {{ module_m2 }}
25
+
26
+ ## {{ "模块 M3:概念与术语工程 + 辩论谱系" if is_zh else "Module M3: Concepts + Debate Lineage" }}
27
+
28
+ {{ module_m3 }}
29
+
30
+ ## {{ "模块 M4:论证重建 + Top Exhibits 深读(上半)" if is_zh else "Module M4: Argument Map + Top Exhibits (Part 1)" }}
31
+
32
+ {{ module_m4 }}
33
+
34
+ ## {{ "模块 M5:深度审视 + objection mining + Top Exhibits 深读(下半)" if is_zh else "Module M5: Deep Review + Objection Mining + Top Exhibits (Part 2)" }}
35
+
36
+ {{ module_m5 }}
37
+
38
+ ## {{ "模块 M6:写作级产出包" if is_zh else "Module M6: Writing-Ready Output Pack" }}
39
+
40
+ {{ module_m6 }}
@@ -41,13 +41,88 @@ def strip_paragraph_wrapped_tables(text: str) -> str:
41
41
 
42
42
 
43
43
  def normalize_footnote_definitions(text: str) -> str:
44
- """Normalize footnote definitions to the markdown-it footnote format."""
44
+ """Normalize footnotes and numbered notes to markdown-it footnote format."""
45
45
  lines = text.splitlines()
46
- for idx, line in enumerate(lines):
46
+ out: list[str] = []
47
+ in_fence = False
48
+ fence_char = ""
49
+ fence_len = 0
50
+ in_notes = False
51
+ notes_level: int | None = None
52
+ notes_heading_re = re.compile(
53
+ r"^#{1,6}\s*(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\b",
54
+ re.IGNORECASE,
55
+ )
56
+ notes_heading_plain_re = re.compile(
57
+ r"^(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\s*:?$",
58
+ re.IGNORECASE,
59
+ )
60
+ last_note_index: int | None = None
61
+
62
+ for line in lines:
63
+ stripped = line.lstrip()
64
+ if stripped.startswith(("```", "~~~")):
65
+ run_len = 0
66
+ while run_len < len(stripped) and stripped[run_len] == stripped[0]:
67
+ run_len += 1
68
+ if not in_fence:
69
+ in_fence = True
70
+ fence_char = stripped[0]
71
+ fence_len = run_len
72
+ elif stripped[0] == fence_char and run_len >= fence_len:
73
+ in_fence = False
74
+ fence_char = ""
75
+ fence_len = 0
76
+ out.append(line)
77
+ continue
78
+
79
+ if in_fence:
80
+ out.append(line)
81
+ continue
82
+
83
+ heading_match = notes_heading_re.match(stripped)
84
+ if heading_match:
85
+ in_notes = True
86
+ notes_level = len(stripped.split(" ")[0].lstrip("#"))
87
+ last_note_index = None
88
+ elif notes_heading_plain_re.match(stripped):
89
+ in_notes = True
90
+ notes_level = None
91
+ last_note_index = None
92
+ elif re.match(r"^#{1,6}\s+", stripped):
93
+ if notes_level is not None:
94
+ level = len(stripped.split(" ")[0].lstrip("#"))
95
+ if level <= notes_level:
96
+ in_notes = False
97
+ notes_level = None
98
+ last_note_index = None
99
+
47
100
  match = re.match(r"^\[\^([0-9]+)\]\s+", line)
48
101
  if match:
49
- lines[idx] = re.sub(r"^\[\^([0-9]+)\]\s+", r"[^\1]: ", line)
50
- return "\n".join(lines)
102
+ out.append(re.sub(r"^\[\^([0-9]+)\]\s+", r"[^\1]: ", line))
103
+ continue
104
+
105
+ if in_notes:
106
+ list_match = re.match(r"^\s*(\d{1,4})[.)]\s+", line)
107
+ if list_match:
108
+ number = list_match.group(1)
109
+ rest = line[list_match.end() :].strip()
110
+ out.append(f"[^{number}]: {rest}")
111
+ last_note_index = len(out) - 1
112
+ continue
113
+ if last_note_index is not None:
114
+ if line.strip() == "":
115
+ out.append(line)
116
+ last_note_index = None
117
+ continue
118
+ if line.startswith((" ", "\t")):
119
+ out[last_note_index] = f"{out[last_note_index]} {line.strip()}"
120
+ continue
121
+
122
+ line = re.sub(r"(?<!\^)\[(\d{1,4})\]", r"[^\1]", line)
123
+ out.append(line)
124
+
125
+ return "\n".join(out)
51
126
 
52
127
 
53
128
  def normalize_markdown_images(text: str) -> str:
@@ -115,6 +190,68 @@ def normalize_fenced_code_blocks(text: str) -> str:
115
190
  return "\n".join(out)
116
191
 
117
192
 
193
+ def normalize_mermaid_blocks(text: str) -> str:
194
+ """Keep mermaid fences clean by moving legend text outside the block."""
195
+ lines = text.splitlines()
196
+ out: list[str] = []
197
+ in_mermaid = False
198
+ fence_char = ""
199
+ fence_len = 0
200
+ mermaid_lines: list[str] = []
201
+ legend_lines: list[str] = []
202
+
203
+ def is_legend(line: str) -> bool:
204
+ stripped = line.strip()
205
+ if not stripped:
206
+ return False
207
+ if stripped.startswith("图例") or stripped.lower().startswith("legend"):
208
+ return True
209
+ return "节点定位" in stripped
210
+
211
+ for line in lines:
212
+ stripped = line.lstrip()
213
+ if stripped.startswith(("```", "~~~")):
214
+ run_len = 0
215
+ while run_len < len(stripped) and stripped[run_len] == stripped[0]:
216
+ run_len += 1
217
+ rest = stripped[run_len:].strip()
218
+ if not in_mermaid and rest.lower().startswith("mermaid"):
219
+ in_mermaid = True
220
+ fence_char = stripped[0]
221
+ fence_len = run_len
222
+ mermaid_lines = []
223
+ legend_lines = []
224
+ out.append(line)
225
+ continue
226
+ if in_mermaid and stripped[0] == fence_char and run_len >= fence_len and rest == "":
227
+ out.extend(mermaid_lines)
228
+ out.append(line)
229
+ out.extend(legend_lines)
230
+ in_mermaid = False
231
+ fence_char = ""
232
+ fence_len = 0
233
+ mermaid_lines = []
234
+ legend_lines = []
235
+ continue
236
+ out.append(line)
237
+ continue
238
+
239
+ if in_mermaid:
240
+ if is_legend(line):
241
+ legend_lines.append(line)
242
+ else:
243
+ mermaid_lines.append(line)
244
+ continue
245
+
246
+ out.append(line)
247
+
248
+ if in_mermaid:
249
+ out.extend(mermaid_lines)
250
+ out.extend(legend_lines)
251
+
252
+ return "\n".join(out)
253
+
254
+
118
255
  def normalize_unbalanced_fences(text: str) -> str:
119
256
  """Drop unmatched opening fences so later content still renders."""
120
257
  lines = text.splitlines()
@@ -122,6 +259,7 @@ def normalize_unbalanced_fences(text: str) -> str:
122
259
  in_fence = False
123
260
  fence_char = ""
124
261
  fence_len = 0
262
+ fence_has_content = False
125
263
  fence_open_indices: list[int] = []
126
264
  fence_re = re.compile(r"([`~]{3,})(.*)$")
127
265
 
@@ -135,19 +273,46 @@ def normalize_unbalanced_fences(text: str) -> str:
135
273
  run = match.group(1)
136
274
  fence = run[0]
137
275
  run_len = len(run)
276
+ rest = match.group(2) or ""
277
+ has_info = bool(rest.strip())
138
278
  if not in_fence:
139
279
  in_fence = True
140
280
  fence_char = fence
141
281
  fence_len = run_len
282
+ fence_has_content = False
142
283
  fence_open_indices.append(len(out))
143
284
  is_fence = True
144
- elif fence == fence_char and run_len >= fence_len:
145
- in_fence = False
146
- fence_char = ""
147
- fence_len = 0
285
+ elif fence == fence_char and run_len >= fence_len and not has_info:
286
+ if not fence_has_content:
287
+ if fence_open_indices:
288
+ out.pop(fence_open_indices[-1])
289
+ fence_open_indices.pop()
290
+ in_fence = True
291
+ fence_char = fence
292
+ fence_len = run_len
293
+ fence_has_content = False
294
+ fence_open_indices.append(len(out))
295
+ is_fence = True
296
+ else:
297
+ in_fence = False
298
+ fence_char = ""
299
+ fence_len = 0
300
+ fence_has_content = False
301
+ is_fence = True
302
+ elif fence == fence_char and run_len >= fence_len and has_info:
303
+ if fence_open_indices:
304
+ out.pop(fence_open_indices[-1])
305
+ fence_open_indices.pop()
306
+ in_fence = True
307
+ fence_char = fence
308
+ fence_len = run_len
309
+ fence_has_content = False
310
+ fence_open_indices.append(len(out))
148
311
  is_fence = True
149
312
 
150
313
  out.append(line)
314
+ if in_fence and not is_fence and line.strip():
315
+ fence_has_content = True
151
316
 
152
317
  if in_fence and fence_open_indices:
153
318
  out.pop(fence_open_indices[-1])
@@ -534,6 +699,7 @@ def extract_html_table_placeholders(text: str) -> tuple[str, dict[str, str]]:
534
699
 
535
700
  def render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
536
701
  """Render markdown with math, images, and tables properly escaped."""
702
+ text = normalize_mermaid_blocks(text)
537
703
  text = normalize_fenced_code_blocks(text)
538
704
  text = normalize_unbalanced_fences(text)
539
705
  text = strip_paragraph_wrapped_tables(text)
@@ -223,11 +223,18 @@ header a {
223
223
 
224
224
  .markmap {
225
225
  width: 100%;
226
- height: 420px;
227
226
  border: 1px solid #e2e8f0;
228
227
  border-radius: 12px;
229
228
  background: #ffffff;
230
229
  margin: 12px 0;
230
+ padding: 8px;
231
+ overflow-x: auto;
232
+ }
233
+
234
+ .markmap > svg {
235
+ width: 100%;
236
+ min-height: 240px;
237
+ display: block;
231
238
  }
232
239
 
233
240
  /* Utilities */
@@ -421,24 +421,58 @@
421
421
  var content = document.getElementById('content');
422
422
  if (!content) return;
423
423
 
424
- // Markmap: convert fenced markmap blocks to svg mindmaps
425
- if (window.markmap && window.markmap.Transformer && window.markmap.Markmap) {
426
- var transformer = new window.markmap.Transformer();
427
- document.querySelectorAll('code.language-markmap').forEach(function(code) {
428
- var pre = code.parentElement;
429
- if (!pre) return;
430
- var svg = document.createElement('svg');
431
- svg.className = 'markmap';
432
- pre.replaceWith(svg);
424
+ // Markmap: convert fenced markmap blocks to autoloader containers
425
+ var markmapBlocks = 0;
426
+ document.querySelectorAll('code.language-markmap').forEach(function(code) {
427
+ var pre = code.parentElement;
428
+ if (!pre) return;
429
+ var wrapper = document.createElement('div');
430
+ wrapper.className = 'markmap';
431
+ var template = document.createElement('script');
432
+ template.type = 'text/template';
433
+ template.textContent = code.textContent || '';
434
+ wrapper.appendChild(template);
435
+ pre.replaceWith(wrapper);
436
+ markmapBlocks += 1;
437
+ });
438
+ function resizeMarkmaps() {
439
+ document.querySelectorAll('.markmap svg').forEach(function(svg) {
433
440
  try {
434
- var result = transformer.transform(code.textContent || '');
435
- window.markmap.Markmap.create(svg, null, result.root);
441
+ var bbox = svg.getBBox();
442
+ if (!bbox || !bbox.height) {
443
+ svg.style.height = '800px';
444
+ svg.style.width = '100%';
445
+ return;
446
+ }
447
+ var height = Math.ceil(bbox.height * 2);
448
+ svg.style.height = height + 'px';
449
+ if (bbox.width && bbox.width > svg.clientWidth) {
450
+ svg.style.width = Math.ceil(bbox.width * 2) + 'px';
451
+ if (svg.parentElement) {
452
+ svg.parentElement.style.overflowX = 'auto';
453
+ }
454
+ } else {
455
+ svg.style.width = '100%';
456
+ }
436
457
  } catch (err) {
437
- // Ignore markmap parse errors
458
+ // Ignore sizing errors
438
459
  }
439
460
  });
440
461
  }
441
462
 
463
+ if (markmapBlocks && window.markmap && window.markmap.autoLoader && window.markmap.autoLoader.renderAll) {
464
+ window.markmap.autoLoader.renderAll();
465
+ setTimeout(resizeMarkmaps, 120);
466
+ setTimeout(resizeMarkmaps, 600);
467
+ setTimeout(resizeMarkmaps, 1600);
468
+ if (!window.__markmapResizeBound) {
469
+ window.__markmapResizeBound = true;
470
+ window.addEventListener('resize', function() {
471
+ setTimeout(resizeMarkmaps, 120);
472
+ });
473
+ }
474
+ }
475
+
442
476
  // Mermaid: convert fenced code blocks to mermaid divs
443
477
  document.querySelectorAll('code.language-mermaid').forEach(function(code) {
444
478
  var pre = code.parentElement;
@@ -256,8 +256,17 @@
256
256
  <script src="https://cdn.jsdelivr.net/npm/katex@0.16.27/dist/katex.min.js"></script>
257
257
  <script src="https://cdn.jsdelivr.net/npm/katex@0.16.27/dist/contrib/auto-render.min.js"></script>
258
258
  <script src="https://cdn.jsdelivr.net/npm/mermaid@10.6.1/dist/mermaid.min.js"></script>
259
+ <script src="https://cdn.jsdelivr.net/npm/d3@7.9.0/dist/d3.min.js"></script>
259
260
  <script src="https://cdn.jsdelivr.net/npm/markmap-lib@0.15.4/dist/browser/index.min.js"></script>
261
+ <script>
262
+ window.__markmapLib = window.markmap || window.__markmapLib;
263
+ </script>
260
264
  <script src="https://cdn.jsdelivr.net/npm/markmap-view@0.15.4/dist/browser/index.min.js"></script>
265
+ <script>
266
+ window.markmap = window.markmap || {};
267
+ window.markmap.autoLoader = { manual: true };
268
+ </script>
269
+ <script src="https://cdn.jsdelivr.net/npm/markmap-autoloader@0.18.12/dist/index.min.js"></script>
261
270
  <script src="https://cdn.jsdelivr.net/npm/marked@12.0.1/marked.min.js"></script>
262
271
  <script src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"></script>
263
272
 
@@ -41,17 +41,21 @@ def normalize_venue(raw: str) -> str:
41
41
 
42
42
 
43
43
  def extract_summary_snippet(paper: dict[str, object], max_len: int = 280) -> str:
44
- """Extract a short summary snippet, preferring the 'simple' template."""
44
+ """Extract a short summary snippet, preferring the simple/simple_phi templates."""
45
45
  summary = ""
46
46
  templates = paper.get("templates")
47
47
  if isinstance(templates, dict):
48
- simple = templates.get("simple")
49
- if isinstance(simple, dict):
48
+ for template_tag in ("simple", "simple_phi"):
49
+ template = templates.get(template_tag)
50
+ if not isinstance(template, dict):
51
+ continue
50
52
  for key in ("summary", "abstract"):
51
- value = simple.get(key)
53
+ value = template.get(key)
52
54
  if isinstance(value, str) and value.strip():
53
55
  summary = value.strip()
54
56
  break
57
+ if summary:
58
+ break
55
59
  if not summary:
56
60
  for key in ("summary", "abstract"):
57
61
  value = paper.get(key)
@@ -80,26 +80,23 @@ def discover_mineru_dirs(inputs: Iterable[str], recursive: bool) -> list[Path]:
80
80
  if path.name != "full.md":
81
81
  raise FileNotFoundError(f"Expected full.md file but got: {path}")
82
82
  parent = path.parent.resolve()
83
- if (parent / "images").is_dir():
84
- results.add(parent)
85
- else:
86
- logger.warning("Skipping %s (missing images/)", parent)
83
+ if not (parent / "images").is_dir():
84
+ logger.warning("Missing images/ for %s; continuing", parent)
85
+ results.add(parent)
87
86
  continue
88
87
  if not path.exists():
89
88
  raise FileNotFoundError(f"Input path not found: {path}")
90
89
  if path.is_dir():
91
90
  if (path / "full.md").is_file():
92
- if (path / "images").is_dir():
93
- results.add(path.resolve())
94
- else:
95
- logger.warning("Skipping %s (missing images/)", path)
91
+ if not (path / "images").is_dir():
92
+ logger.warning("Missing images/ for %s; continuing", path)
93
+ results.add(path.resolve())
96
94
  pattern = path.rglob("full.md") if recursive else path.glob("full.md")
97
95
  for full_path in pattern:
98
96
  parent = full_path.parent.resolve()
99
- if (parent / "images").is_dir():
100
- results.add(parent)
101
- else:
102
- logger.warning("Skipping %s (missing images/)", parent)
97
+ if not (parent / "images").is_dir():
98
+ logger.warning("Missing images/ for %s; continuing", parent)
99
+ results.add(parent)
103
100
  continue
104
101
  raise FileNotFoundError(f"Input path not found: {path}")
105
102
  return sorted(results)
@@ -448,4 +448,19 @@ def fix_markdown(text: str, level: str) -> str:
448
448
  if level == "aggressive":
449
449
  text = title_processor.fix_titles(text)
450
450
 
451
+ try:
452
+ from deepresearch_flow.paper.web.markdown import (
453
+ normalize_fenced_code_blocks,
454
+ normalize_footnote_definitions,
455
+ normalize_mermaid_blocks,
456
+ normalize_unbalanced_fences,
457
+ )
458
+ except Exception:
459
+ return text
460
+
461
+ text = normalize_fenced_code_blocks(text)
462
+ text = normalize_mermaid_blocks(text)
463
+ text = normalize_unbalanced_fences(text)
464
+ text = normalize_footnote_definitions(text)
465
+
451
466
  return text
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepresearch-flow
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Workflow tools for paper extraction, review, and research automation.
5
5
  Author-email: DengQi <dengqi935@gmail.com>
6
6
  License: MIT License