PyPI - deepresearch-flow - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

deepresearch-flow 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

deepresearch_flow/paper/schemas/deep_read_phi_schema.json ADDED Viewed

@@ -0,0 +1,30 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "additionalProperties": false,
+  "required": [
+    "paper_title",
+    "paper_authors",
+    "publication_date",
+    "publication_venue",
+    "module_m1",
+    "module_m2",
+    "module_m3",
+    "module_m4",
+    "module_m5",
+    "module_m6"
+  ],
+  "properties": {
+    "paper_title": {"type": "string", "minLength": 1},
+    "paper_authors": {"type": "array", "items": {"type": "string"}},
+    "publication_date": {"type": "string"},
+    "publication_venue": {"type": "string"},
+    "module_m1": {"type": "string"},
+    "module_m2": {"type": "string"},
+    "module_m3": {"type": "string"},
+    "module_m4": {"type": "string"},
+    "module_m5": {"type": "string"},
+    "module_m6": {"type": "string"},
+    "output_language": {"type": "string"}
+  }
+}

deepresearch_flow/paper/template_registry.py CHANGED Viewed

@@ -34,6 +34,13 @@ _TEMPLATES: dict[str, TemplateBundle] = {
         schema_file="default_paper_schema.json",
         render_template="default_paper.md.j2",
     ),
+    "simple_phi": TemplateBundle(
+        name="simple_phi",
+        prompt_system="simple_phi_system.j2",
+        prompt_user="simple_phi_user.j2",
+        schema_file="default_paper_schema.json",
+        render_template="default_paper.md.j2",
+    ),
     "deep_read": TemplateBundle(
         name="deep_read",
         prompt_system="deep_read_system.j2",
@@ -41,6 +48,13 @@ _TEMPLATES: dict[str, TemplateBundle] = {
         schema_file="deep_read_schema.json",
         render_template="deep_read.md.j2",
     ),
+    "deep_read_phi": TemplateBundle(
+        name="deep_read_phi",
+        prompt_system="deep_read_phi_system.j2",
+        prompt_user="deep_read_phi_user.j2",
+        schema_file="deep_read_phi_schema.json",
+        render_template="deep_read_phi.md.j2",
+    ),
     "eight_questions": TemplateBundle(
         name="eight_questions",
         prompt_system="eight_questions_system.j2",
@@ -48,6 +62,13 @@ _TEMPLATES: dict[str, TemplateBundle] = {
         schema_file="eight_questions_schema.json",
         render_template="eight_questions.md.j2",
     ),
+    "eight_questions_phi": TemplateBundle(
+        name="eight_questions_phi",
+        prompt_system="eight_questions_phi_system.j2",
+        prompt_user="eight_questions_phi_user.j2",
+        schema_file="eight_questions_schema.json",
+        render_template="eight_questions.md.j2",
+    ),
     "three_pass": TemplateBundle(
         name="three_pass",
         prompt_system="three_pass_system.j2",
@@ -75,6 +96,14 @@ _STAGES: dict[str, list[StageDefinition]] = {
         StageDefinition("module_g", ["module_g"]),
         StageDefinition("module_h", ["module_h"]),
     ],
+    "deep_read_phi": [
+        StageDefinition("module_m1", ["module_m1"]),
+        StageDefinition("module_m2", ["module_m2"]),
+        StageDefinition("module_m3", ["module_m3"]),
+        StageDefinition("module_m4", ["module_m4"]),
+        StageDefinition("module_m5", ["module_m5"]),
+        StageDefinition("module_m6", ["module_m6"]),
+    ],
     "eight_questions": [
         StageDefinition(
             "questions_1to4",
@@ -85,6 +114,16 @@ _STAGES: dict[str, list[StageDefinition]] = {
             ["question5", "question6", "question7", "question8"],
         ),
     ],
+    "eight_questions_phi": [
+        StageDefinition(
+            "questions_1to4",
+            ["question1", "question2", "question3", "question4"],
+        ),
+        StageDefinition(
+            "questions_5to8",
+            ["question5", "question6", "question7", "question8"],
+        ),
+    ],
     "three_pass": [
         StageDefinition("step1_summary", ["step1_summary"]),
         StageDefinition("step2_analysis", ["step2_analysis"]),

deepresearch_flow/paper/templates/deep_read_phi.md.j2 ADDED Viewed

@@ -0,0 +1,40 @@
+# {{ paper_title }}
+{% set is_zh = output_language == "zh" %}
+**{{ "作者 / Authors" if is_zh else "Authors" }}:** {{ paper_authors | join(", ") }}
+{% if output_language %}
+**{{ "输出语言 / Output Language" if is_zh else "Output Language" }}:** {{ output_language }}
+{% endif %}
+{% if publication_date %}
+**{{ "发表日期 / Publication Date" if is_zh else "Publication Date" }}:** {{ publication_date }}
+{% endif %}
+{% if publication_venue %}
+**{{ "期刊/会议 / Publication Venue" if is_zh else "Publication Venue" }}:** {{ publication_venue }}
+{% endif %}
+## {{ "模块 M1：对齐目标 + 输入校验 + 论文地图 + Exhibit全量索引 + markmap大纲脑图" if is_zh else "Module M1: Alignment + Input Check + Paper Map + Exhibit Index + Markmap" }}
+{{ module_m1 }}
+## {{ "模块 M2：第一遍鸟瞰 + Exhibit客观打分排序" if is_zh else "Module M2: First Pass Overview + Exhibit Scoring" }}
+{{ module_m2 }}
+## {{ "模块 M3：概念与术语工程 + 辩论谱系" if is_zh else "Module M3: Concepts + Debate Lineage" }}
+{{ module_m3 }}
+## {{ "模块 M4：论证重建 + Top Exhibits 深读（上半）" if is_zh else "Module M4: Argument Map + Top Exhibits (Part 1)" }}
+{{ module_m4 }}
+## {{ "模块 M5：深度审视 + objection mining + Top Exhibits 深读（下半）" if is_zh else "Module M5: Deep Review + Objection Mining + Top Exhibits (Part 2)" }}
+{{ module_m5 }}
+## {{ "模块 M6：写作级产出包" if is_zh else "Module M6: Writing-Ready Output Pack" }}
+{{ module_m6 }}

deepresearch_flow/paper/web/markdown.py CHANGED Viewed

@@ -41,13 +41,88 @@ def strip_paragraph_wrapped_tables(text: str) -> str:
 def normalize_footnote_definitions(text: str) -> str:
-    """Normalize footnote definitions to the markdown-it footnote format."""
+    """Normalize footnotes and numbered notes to markdown-it footnote format."""
     lines = text.splitlines()
-    for idx, line in enumerate(lines):
+    out: list[str] = []
+    in_fence = False
+    fence_char = ""
+    fence_len = 0
+    in_notes = False
+    notes_level: int | None = None
+    notes_heading_re = re.compile(
+        r"^#{1,6}\s*(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\b",
+        re.IGNORECASE,
+    )
+    notes_heading_plain_re = re.compile(
+        r"^(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\s*:?$",
+        re.IGNORECASE,
+    )
+    last_note_index: int | None = None
+    for line in lines:
+        stripped = line.lstrip()
+        if stripped.startswith(("```", "~~~")):
+            run_len = 0
+            while run_len < len(stripped) and stripped[run_len] == stripped[0]:
+                run_len += 1
+            if not in_fence:
+                in_fence = True
+                fence_char = stripped[0]
+                fence_len = run_len
+            elif stripped[0] == fence_char and run_len >= fence_len:
+                in_fence = False
+                fence_char = ""
+                fence_len = 0
+            out.append(line)
+            continue
+        if in_fence:
+            out.append(line)
+            continue
+        heading_match = notes_heading_re.match(stripped)
+        if heading_match:
+            in_notes = True
+            notes_level = len(stripped.split(" ")[0].lstrip("#"))
+            last_note_index = None
+        elif notes_heading_plain_re.match(stripped):
+            in_notes = True
+            notes_level = None
+            last_note_index = None
+        elif re.match(r"^#{1,6}\s+", stripped):
+            if notes_level is not None:
+                level = len(stripped.split(" ")[0].lstrip("#"))
+                if level <= notes_level:
+                    in_notes = False
+                    notes_level = None
+                    last_note_index = None
         match = re.match(r"^\[\^([0-9]+)\]\s+", line)
         if match:
-            lines[idx] = re.sub(r"^\[\^([0-9]+)\]\s+", r"[^\1]: ", line)
-    return "\n".join(lines)
+            out.append(re.sub(r"^\[\^([0-9]+)\]\s+", r"[^\1]: ", line))
+            continue
+        if in_notes:
+            list_match = re.match(r"^\s*(\d{1,4})[.)]\s+", line)
+            if list_match:
+                number = list_match.group(1)
+                rest = line[list_match.end() :].strip()
+                out.append(f"[^{number}]: {rest}")
+                last_note_index = len(out) - 1
+                continue
+            if last_note_index is not None:
+                if line.strip() == "":
+                    out.append(line)
+                    last_note_index = None
+                    continue
+                if line.startswith((" ", "\t")):
+                    out[last_note_index] = f"{out[last_note_index]} {line.strip()}"
+                    continue
+        line = re.sub(r"(?<!\^)\[(\d{1,4})\]", r"[^\1]", line)
+        out.append(line)
+    return "\n".join(out)
 def normalize_markdown_images(text: str) -> str:
@@ -115,6 +190,68 @@ def normalize_fenced_code_blocks(text: str) -> str:
     return "\n".join(out)
+def normalize_mermaid_blocks(text: str) -> str:
+    """Keep mermaid fences clean by moving legend text outside the block."""
+    lines = text.splitlines()
+    out: list[str] = []
+    in_mermaid = False
+    fence_char = ""
+    fence_len = 0
+    mermaid_lines: list[str] = []
+    legend_lines: list[str] = []
+    def is_legend(line: str) -> bool:
+        stripped = line.strip()
+        if not stripped:
+            return False
+        if stripped.startswith("图例") or stripped.lower().startswith("legend"):
+            return True
+        return "节点定位" in stripped
+    for line in lines:
+        stripped = line.lstrip()
+        if stripped.startswith(("```", "~~~")):
+            run_len = 0
+            while run_len < len(stripped) and stripped[run_len] == stripped[0]:
+                run_len += 1
+            rest = stripped[run_len:].strip()
+            if not in_mermaid and rest.lower().startswith("mermaid"):
+                in_mermaid = True
+                fence_char = stripped[0]
+                fence_len = run_len
+                mermaid_lines = []
+                legend_lines = []
+                out.append(line)
+                continue
+            if in_mermaid and stripped[0] == fence_char and run_len >= fence_len and rest == "":
+                out.extend(mermaid_lines)
+                out.append(line)
+                out.extend(legend_lines)
+                in_mermaid = False
+                fence_char = ""
+                fence_len = 0
+                mermaid_lines = []
+                legend_lines = []
+                continue
+            out.append(line)
+            continue
+        if in_mermaid:
+            if is_legend(line):
+                legend_lines.append(line)
+            else:
+                mermaid_lines.append(line)
+            continue
+        out.append(line)
+    if in_mermaid:
+        out.extend(mermaid_lines)
+        out.extend(legend_lines)
+    return "\n".join(out)
 def normalize_unbalanced_fences(text: str) -> str:
     """Drop unmatched opening fences so later content still renders."""
     lines = text.splitlines()
@@ -122,6 +259,7 @@ def normalize_unbalanced_fences(text: str) -> str:
     in_fence = False
     fence_char = ""
     fence_len = 0
+    fence_has_content = False
     fence_open_indices: list[int] = []
     fence_re = re.compile(r"([`~]{3,})(.*)$")
@@ -135,19 +273,46 @@ def normalize_unbalanced_fences(text: str) -> str:
                 run = match.group(1)
                 fence = run[0]
                 run_len = len(run)
+                rest = match.group(2) or ""
+                has_info = bool(rest.strip())
                 if not in_fence:
                     in_fence = True
                     fence_char = fence
                     fence_len = run_len
+                    fence_has_content = False
                     fence_open_indices.append(len(out))
                     is_fence = True
-                elif fence == fence_char and run_len >= fence_len:
-                    in_fence = False
-                    fence_char = ""
-                    fence_len = 0
+                elif fence == fence_char and run_len >= fence_len and not has_info:
+                    if not fence_has_content:
+                        if fence_open_indices:
+                            out.pop(fence_open_indices[-1])
+                            fence_open_indices.pop()
+                        in_fence = True
+                        fence_char = fence
+                        fence_len = run_len
+                        fence_has_content = False
+                        fence_open_indices.append(len(out))
+                        is_fence = True
+                    else:
+                        in_fence = False
+                        fence_char = ""
+                        fence_len = 0
+                        fence_has_content = False
+                        is_fence = True
+                elif fence == fence_char and run_len >= fence_len and has_info:
+                    if fence_open_indices:
+                        out.pop(fence_open_indices[-1])
+                        fence_open_indices.pop()
+                    in_fence = True
+                    fence_char = fence
+                    fence_len = run_len
+                    fence_has_content = False
+                    fence_open_indices.append(len(out))
                     is_fence = True
         out.append(line)
+        if in_fence and not is_fence and line.strip():
+            fence_has_content = True
     if in_fence and fence_open_indices:
         out.pop(fence_open_indices[-1])
@@ -534,6 +699,7 @@ def extract_html_table_placeholders(text: str) -> tuple[str, dict[str, str]]:
 def render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
     """Render markdown with math, images, and tables properly escaped."""
+    text = normalize_mermaid_blocks(text)
     text = normalize_fenced_code_blocks(text)
     text = normalize_unbalanced_fences(text)
     text = strip_paragraph_wrapped_tables(text)

deepresearch_flow/paper/web/static/css/main.css CHANGED Viewed

@@ -223,11 +223,18 @@ header a {
 .markmap {
   width: 100%;
-  height: 420px;
   border: 1px solid #e2e8f0;
   border-radius: 12px;
   background: #ffffff;
   margin: 12px 0;
+  padding: 8px;
+  overflow-x: auto;
+}
+.markmap > svg {
+  width: 100%;
+  min-height: 240px;
+  display: block;
 }
 /* Utilities */

deepresearch_flow/paper/web/static/js/detail.js CHANGED Viewed

@@ -421,24 +421,58 @@
     var content = document.getElementById('content');
     if (!content) return;
-    // Markmap: convert fenced markmap blocks to svg mindmaps
-    if (window.markmap && window.markmap.Transformer && window.markmap.Markmap) {
-      var transformer = new window.markmap.Transformer();
-      document.querySelectorAll('code.language-markmap').forEach(function(code) {
-        var pre = code.parentElement;
-        if (!pre) return;
-        var svg = document.createElement('svg');
-        svg.className = 'markmap';
-        pre.replaceWith(svg);
+    // Markmap: convert fenced markmap blocks to autoloader containers
+    var markmapBlocks = 0;
+    document.querySelectorAll('code.language-markmap').forEach(function(code) {
+      var pre = code.parentElement;
+      if (!pre) return;
+      var wrapper = document.createElement('div');
+      wrapper.className = 'markmap';
+      var template = document.createElement('script');
+      template.type = 'text/template';
+      template.textContent = code.textContent || '';
+      wrapper.appendChild(template);
+      pre.replaceWith(wrapper);
+      markmapBlocks += 1;
+    });
+    function resizeMarkmaps() {
+      document.querySelectorAll('.markmap svg').forEach(function(svg) {
         try {
-          var result = transformer.transform(code.textContent || '');
-          window.markmap.Markmap.create(svg, null, result.root);
+          var bbox = svg.getBBox();
+          if (!bbox || !bbox.height) {
+            svg.style.height = '800px';
+            svg.style.width = '100%';
+            return;
+          }
+          var height = Math.ceil(bbox.height * 2);
+          svg.style.height = height + 'px';
+          if (bbox.width && bbox.width > svg.clientWidth) {
+            svg.style.width = Math.ceil(bbox.width * 2) + 'px';
+            if (svg.parentElement) {
+              svg.parentElement.style.overflowX = 'auto';
+            }
+          } else {
+            svg.style.width = '100%';
+          }
         } catch (err) {
-          // Ignore markmap parse errors
+          // Ignore sizing errors
         }
       });
     }
+    if (markmapBlocks && window.markmap && window.markmap.autoLoader && window.markmap.autoLoader.renderAll) {
+      window.markmap.autoLoader.renderAll();
+      setTimeout(resizeMarkmaps, 120);
+      setTimeout(resizeMarkmaps, 600);
+      setTimeout(resizeMarkmaps, 1600);
+      if (!window.__markmapResizeBound) {
+        window.__markmapResizeBound = true;
+        window.addEventListener('resize', function() {
+          setTimeout(resizeMarkmaps, 120);
+        });
+      }
+    }
     // Mermaid: convert fenced code blocks to mermaid divs
     document.querySelectorAll('code.language-mermaid').forEach(function(code) {
       var pre = code.parentElement;

deepresearch_flow/paper/web/templates/detail.html CHANGED Viewed

@@ -256,8 +256,17 @@
 <script src="https://cdn.jsdelivr.net/npm/katex@0.16.27/dist/katex.min.js"></script>
 <script src="https://cdn.jsdelivr.net/npm/katex@0.16.27/dist/contrib/auto-render.min.js"></script>
 <script src="https://cdn.jsdelivr.net/npm/mermaid@10.6.1/dist/mermaid.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/d3@7.9.0/dist/d3.min.js"></script>
 <script src="https://cdn.jsdelivr.net/npm/markmap-lib@0.15.4/dist/browser/index.min.js"></script>
+<script>
+  window.__markmapLib = window.markmap || window.__markmapLib;
+</script>
 <script src="https://cdn.jsdelivr.net/npm/markmap-view@0.15.4/dist/browser/index.min.js"></script>
+<script>
+  window.markmap = window.markmap || {};
+  window.markmap.autoLoader = { manual: true };
+</script>
+<script src="https://cdn.jsdelivr.net/npm/markmap-autoloader@0.18.12/dist/index.min.js"></script>
 <script src="https://cdn.jsdelivr.net/npm/marked@12.0.1/marked.min.js"></script>
 <script src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"></script>

deepresearch_flow/paper/web/text.py CHANGED Viewed

@@ -41,17 +41,21 @@ def normalize_venue(raw: str) -> str:
 def extract_summary_snippet(paper: dict[str, object], max_len: int = 280) -> str:
-    """Extract a short summary snippet, preferring the 'simple' template."""
+    """Extract a short summary snippet, preferring the simple/simple_phi templates."""
     summary = ""
     templates = paper.get("templates")
     if isinstance(templates, dict):
-        simple = templates.get("simple")
-        if isinstance(simple, dict):
+        for template_tag in ("simple", "simple_phi"):
+            template = templates.get(template_tag)
+            if not isinstance(template, dict):
+                continue
             for key in ("summary", "abstract"):
-                value = simple.get(key)
+                value = template.get(key)
                 if isinstance(value, str) and value.strip():
                     summary = value.strip()
                     break
+            if summary:
+                break
     if not summary:
         for key in ("summary", "abstract"):
             value = paper.get(key)

deepresearch_flow/recognize/organize.py CHANGED Viewed

@@ -80,26 +80,23 @@ def discover_mineru_dirs(inputs: Iterable[str], recursive: bool) -> list[Path]:
             if path.name != "full.md":
                 raise FileNotFoundError(f"Expected full.md file but got: {path}")
             parent = path.parent.resolve()
-            if (parent / "images").is_dir():
-                results.add(parent)
-            else:
-                logger.warning("Skipping %s (missing images/)", parent)
+            if not (parent / "images").is_dir():
+                logger.warning("Missing images/ for %s; continuing", parent)
+            results.add(parent)
             continue
         if not path.exists():
             raise FileNotFoundError(f"Input path not found: {path}")
         if path.is_dir():
             if (path / "full.md").is_file():
-                if (path / "images").is_dir():
-                    results.add(path.resolve())
-                else:
-                    logger.warning("Skipping %s (missing images/)", path)
+                if not (path / "images").is_dir():
+                    logger.warning("Missing images/ for %s; continuing", path)
+                results.add(path.resolve())
             pattern = path.rglob("full.md") if recursive else path.glob("full.md")
             for full_path in pattern:
                 parent = full_path.parent.resolve()
-                if (parent / "images").is_dir():
-                    results.add(parent)
-                else:
-                    logger.warning("Skipping %s (missing images/)", parent)
+                if not (parent / "images").is_dir():
+                    logger.warning("Missing images/ for %s; continuing", parent)
+                results.add(parent)
             continue
         raise FileNotFoundError(f"Input path not found: {path}")
     return sorted(results)

deepresearch_flow/translator/fixers.py CHANGED Viewed

@@ -448,4 +448,19 @@ def fix_markdown(text: str, level: str) -> str:
     if level == "aggressive":
         text = title_processor.fix_titles(text)
+    try:
+        from deepresearch_flow.paper.web.markdown import (
+            normalize_fenced_code_blocks,
+            normalize_footnote_definitions,
+            normalize_mermaid_blocks,
+            normalize_unbalanced_fences,
+        )
+    except Exception:
+        return text
+    text = normalize_fenced_code_blocks(text)
+    text = normalize_mermaid_blocks(text)
+    text = normalize_unbalanced_fences(text)
+    text = normalize_footnote_definitions(text)
     return text

{deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deepresearch-flow
-Version: 0.5.0
+Version: 0.5.1
 Summary: Workflow tools for paper extraction, review, and research automation.
 Author-email: DengQi <dengqi935@gmail.com>
 License: MIT License

deepresearch-flow 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

deepresearch-flow 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl