PyPI - deepresearch-flow - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

deepresearch-flow 0.4.1py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

deepresearch_flow/paper/web/handlers/pages.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import html
+from pathlib import Path
 from urllib.parse import urlencode
 from starlette.requests import Request
@@ -16,6 +17,7 @@ from deepresearch_flow.paper.web.markdown import (
     render_paper_markdown,
     select_template_tag,
 )
+from deepresearch_flow.paper.web.static_assets import resolve_asset_urls
 from deepresearch_flow.paper.web.text import normalize_title
 from deepresearch_flow.paper.web.templates import (
     build_pdfjs_viewer_url,
@@ -23,6 +25,47 @@ from deepresearch_flow.paper.web.templates import (
 )
+def _safe_read_text(path: Path) -> str:
+    try:
+        return path.read_text(encoding="utf-8")
+    except UnicodeDecodeError:
+        return path.read_text(encoding="latin-1")
+def _load_markdown_for_view(
+    index: PaperIndex,
+    asset_config,
+    export_dir: Path | None,
+    source_hash: str,
+    *,
+    lang: str | None = None,
+) -> str | None:
+    if export_dir and asset_config and asset_config.enabled and (asset_config.base_url or "") == "":
+        if lang:
+            translated_url = asset_config.translated_md_urls.get(source_hash, {}).get(lang.lower())
+            if translated_url:
+                export_path = export_dir / translated_url.lstrip("/")
+                if export_path.exists():
+                    return _safe_read_text(export_path)
+        else:
+            md_url = asset_config.md_urls.get(source_hash)
+            if md_url:
+                export_path = export_dir / md_url.lstrip("/")
+                if export_path.exists():
+                    return _safe_read_text(export_path)
+    if lang:
+        md_path = index.translated_md_by_hash.get(source_hash, {}).get(lang.lower())
+    else:
+        md_path = index.md_path_by_hash.get(source_hash)
+    if not md_path:
+        return None
+    raw = _safe_read_text(md_path)
+    if lang:
+        raw = normalize_markdown_images(raw)
+    return raw
 async def robots_txt(_: Request) -> Response:
     """Serve robots.txt to disallow all crawlers."""
     return Response("User-agent: *\nDisallow: /\n", media_type="text/plain")
@@ -78,7 +121,13 @@ async def paper_detail(request: Request) -> HTMLResponse:
     embed = request.query_params.get("embed") == "1"
     pdf_path = index.pdf_path_by_hash.get(source_hash)
-    pdf_url = f"/api/pdf/{source_hash}"
+    asset_urls = resolve_asset_urls(
+        index,
+        source_hash,
+        request.app.state.asset_config,
+        prefer_local=request.app.state.static_mode == "dev",
+    )
+    pdf_url = asset_urls["pdf_url"] or ""
     source_available = source_hash in index.md_path_by_hash
     translations = index.translated_md_by_hash.get(source_hash, {})
     translation_langs = sorted(translations.keys(), key=str.lower)
@@ -156,14 +205,18 @@ async def paper_detail(request: Request) -> HTMLResponse:
     # Initialize template variables
     body_html = ""
-    raw_content = ""
     summary_template_name = ""
     template_warning = ""
     template_controls = ""
     source_path_str = ""
     translated_path_str = ""
+    source_markdown_url = ""
+    translated_markdown_url = ""
+    images_base_url = asset_urls["images_base_url"] or ""
     pdf_filename = ""
     pdfjs_url = ""
+    pdfjs_script_url = ""
+    pdfjs_worker_url = ""
     left_src = ""
     right_src = ""
     split_options: list[tuple[str, str]] = []
@@ -208,21 +261,27 @@ if (templateSelect) {{
 </script>
 """
+    prefer_local = request.app.state.static_mode == "dev"
     # Source view
     if view == "source":
         source_path = index.md_path_by_hash.get(source_hash)
-        if not source_path:
+        if not source_path or not asset_urls["md_url"]:
             body_html = '<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
         else:
-            try:
-                raw = source_path.read_text(encoding="utf-8")
-            except UnicodeDecodeError:
-                raw = source_path.read_text(encoding="latin-1")
-            md_renderer = create_md_renderer()
-            body_html = render_markdown_with_math_placeholders(md_renderer, raw)
-            raw_content = raw
+            source_markdown_url = asset_urls["md_url"] or ""
             source_path_str = str(source_path)
             show_outline = True
+            if prefer_local:
+                raw = _load_markdown_for_view(
+                    index,
+                    request.app.state.asset_config,
+                    request.app.state.static_export_dir,
+                    source_hash,
+                )
+                if raw is not None:
+                    md_renderer = create_md_renderer()
+                    body_html = render_markdown_with_math_placeholders(md_renderer, raw)
     # Translated view
     if view == "translated":
@@ -230,38 +289,55 @@ if (templateSelect) {{
             body_html = '<div class="warning">No translated markdown found. Provide <code>--md-translated-root</code> and place <code><base>.<lang>.md</code> under that root.</div>'
         else:
             translated_path = translations.get(selected_lang)
-            if not translated_path:
+            translated_markdown_url = asset_urls["md_translated_url"].get(selected_lang, "")
+            if not translated_path or not translated_markdown_url:
                 body_html = '<div class="warning">Translated markdown not found for the selected language.</div>'
             else:
-                try:
-                    raw = translated_path.read_text(encoding="utf-8")
-                except UnicodeDecodeError:
-                    raw = translated_path.read_text(encoding="latin-1")
-                raw = normalize_markdown_images(raw)
-                md_renderer = create_md_renderer()
-                body_html = render_markdown_with_math_placeholders(md_renderer, raw)
-                raw_content = raw
                 translated_path_str = str(translated_path)
                 show_outline = True
+                if prefer_local:
+                    raw = _load_markdown_for_view(
+                        index,
+                        request.app.state.asset_config,
+                        request.app.state.static_export_dir,
+                        source_hash,
+                        lang=selected_lang,
+                    )
+                    if raw is not None:
+                        md_renderer = create_md_renderer()
+                        body_html = render_markdown_with_math_placeholders(md_renderer, raw)
     # PDF view
     if view == "pdf":
-        if not pdf_path:
+        if not pdf_path or not pdf_url:
             body_html = '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
         pdf_filename = str(pdf_path.name) if pdf_path else ""
+        pdfjs_cdn_base_url = request.app.state.pdfjs_cdn_base_url
+        if pdfjs_cdn_base_url:
+            pdfjs_script_url = f"{pdfjs_cdn_base_url}/legacy/build/pdf.min.js"
+            pdfjs_worker_url = f"{pdfjs_cdn_base_url}/legacy/build/pdf.worker.min.js"
+        else:
+            pdfjs_script_url = "/pdfjs/build/pdf.js"
+            pdfjs_worker_url = "/pdfjs/build/pdf.worker.js"
     # PDF.js view
     if view == "pdfjs":
-        if not pdf_path:
+        if not pdf_path or not pdf_url:
             body_html = '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
-        pdfjs_url = build_pdfjs_viewer_url(pdf_url)
+        pdfjs_url = build_pdfjs_viewer_url(
+            pdf_url,
+            cdn_base_url=request.app.state.pdfjs_cdn_base_url,
+        )
         pdf_filename = str(pdf_path.name) if pdf_path else ""
     # Split view
     if view == "split":
         def pane_src(pane_view: str) -> str:
-            if pane_view == "pdfjs" and pdf_path:
-                return build_pdfjs_viewer_url(pdf_url)
+            if pane_view == "pdfjs" and pdf_path and pdf_url:
+                return build_pdfjs_viewer_url(
+                    pdf_url,
+                    cdn_base_url=request.app.state.pdfjs_cdn_base_url,
+                )
             params: dict[str, str] = {"view": pane_view, "embed": "1"}
             if pane_view == "summary" and template_param:
                 params["template"] = str(template_param)
@@ -307,12 +383,14 @@ if (templateSelect) {{
             show_outline=show_outline,
             # Content variables
             body_html=body_html,
-            raw_content=raw_content,
             summary_template_name=summary_template_name,
             template_warning=template_warning,
             template_controls=template_controls,
             available_templates=available_templates,
             selected_template_tag=selected_tag,
+            images_base_url=images_base_url,
+            source_markdown_url=source_markdown_url,
+            translated_markdown_url=translated_markdown_url,
             # Source view
             source_path=source_path_str,
             # Translated view
@@ -322,6 +400,8 @@ if (templateSelect) {{
             # PDF view
             pdf_filename=pdf_filename,
             pdf_url=pdf_url,
+            pdfjs_script_url=pdfjs_script_url,
+            pdfjs_worker_url=pdfjs_worker_url,
             # PDF.js view
             pdfjs_url=pdfjs_url,
             # Split view

deepresearch_flow/paper/web/markdown.py CHANGED Viewed

@@ -41,13 +41,88 @@ def strip_paragraph_wrapped_tables(text: str) -> str:
 def normalize_footnote_definitions(text: str) -> str:
-    """Normalize footnote definitions to the markdown-it footnote format."""
+    """Normalize footnotes and numbered notes to markdown-it footnote format."""
     lines = text.splitlines()
-    for idx, line in enumerate(lines):
+    out: list[str] = []
+    in_fence = False
+    fence_char = ""
+    fence_len = 0
+    in_notes = False
+    notes_level: int | None = None
+    notes_heading_re = re.compile(
+        r"^#{1,6}\s*(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\b",
+        re.IGNORECASE,
+    )
+    notes_heading_plain_re = re.compile(
+        r"^(参考文献|参考资料|参考书目|文献|引用|注释|脚注|notes?|references?|bibliography|works\s+cited|citations?)\s*:?$",
+        re.IGNORECASE,
+    )
+    last_note_index: int | None = None
+    for line in lines:
+        stripped = line.lstrip()
+        if stripped.startswith(("```", "~~~")):
+            run_len = 0
+            while run_len < len(stripped) and stripped[run_len] == stripped[0]:
+                run_len += 1
+            if not in_fence:
+                in_fence = True
+                fence_char = stripped[0]
+                fence_len = run_len
+            elif stripped[0] == fence_char and run_len >= fence_len:
+                in_fence = False
+                fence_char = ""
+                fence_len = 0
+            out.append(line)
+            continue
+        if in_fence:
+            out.append(line)
+            continue
+        heading_match = notes_heading_re.match(stripped)
+        if heading_match:
+            in_notes = True
+            notes_level = len(stripped.split(" ")[0].lstrip("#"))
+            last_note_index = None
+        elif notes_heading_plain_re.match(stripped):
+            in_notes = True
+            notes_level = None
+            last_note_index = None
+        elif re.match(r"^#{1,6}\s+", stripped):
+            if notes_level is not None:
+                level = len(stripped.split(" ")[0].lstrip("#"))
+                if level <= notes_level:
+                    in_notes = False
+                    notes_level = None
+                    last_note_index = None
         match = re.match(r"^\[\^([0-9]+)\]\s+", line)
         if match:
-            lines[idx] = re.sub(r"^\[\^([0-9]+)\]\s+", r"[^\1]: ", line)
-    return "\n".join(lines)
+            out.append(re.sub(r"^\[\^([0-9]+)\]\s+", r"[^\1]: ", line))
+            continue
+        if in_notes:
+            list_match = re.match(r"^\s*(\d{1,4})[.)]\s+", line)
+            if list_match:
+                number = list_match.group(1)
+                rest = line[list_match.end() :].strip()
+                out.append(f"[^{number}]: {rest}")
+                last_note_index = len(out) - 1
+                continue
+            if last_note_index is not None:
+                if line.strip() == "":
+                    out.append(line)
+                    last_note_index = None
+                    continue
+                if line.startswith((" ", "\t")):
+                    out[last_note_index] = f"{out[last_note_index]} {line.strip()}"
+                    continue
+        line = re.sub(r"(?<!\^)\[(\d{1,4})\]", r"[^\1]", line)
+        out.append(line)
+    return "\n".join(out)
 def normalize_markdown_images(text: str) -> str:
@@ -96,6 +171,154 @@ def normalize_markdown_images(text: str) -> str:
     return "\n".join(out)
+def normalize_fenced_code_blocks(text: str) -> str:
+    """Ensure fenced code block markers appear on their own lines."""
+    fence_re = re.compile(r"(`{3,}|~{3,})")
+    out: list[str] = []
+    for line in text.splitlines():
+        match = fence_re.search(line)
+        if not match:
+            out.append(line)
+            continue
+        prefix = line[: match.start()]
+        suffix = line[match.start() :]
+        if prefix.strip():
+            out.append(prefix.rstrip())
+            out.append(suffix.lstrip())
+        else:
+            out.append(line)
+    return "\n".join(out)
+def normalize_mermaid_blocks(text: str) -> str:
+    """Keep mermaid fences clean by moving legend text outside the block."""
+    lines = text.splitlines()
+    out: list[str] = []
+    in_mermaid = False
+    fence_char = ""
+    fence_len = 0
+    mermaid_lines: list[str] = []
+    legend_lines: list[str] = []
+    def is_legend(line: str) -> bool:
+        stripped = line.strip()
+        if not stripped:
+            return False
+        if stripped.startswith("图例") or stripped.lower().startswith("legend"):
+            return True
+        return "节点定位" in stripped
+    for line in lines:
+        stripped = line.lstrip()
+        if stripped.startswith(("```", "~~~")):
+            run_len = 0
+            while run_len < len(stripped) and stripped[run_len] == stripped[0]:
+                run_len += 1
+            rest = stripped[run_len:].strip()
+            if not in_mermaid and rest.lower().startswith("mermaid"):
+                in_mermaid = True
+                fence_char = stripped[0]
+                fence_len = run_len
+                mermaid_lines = []
+                legend_lines = []
+                out.append(line)
+                continue
+            if in_mermaid and stripped[0] == fence_char and run_len >= fence_len and rest == "":
+                out.extend(mermaid_lines)
+                out.append(line)
+                out.extend(legend_lines)
+                in_mermaid = False
+                fence_char = ""
+                fence_len = 0
+                mermaid_lines = []
+                legend_lines = []
+                continue
+            out.append(line)
+            continue
+        if in_mermaid:
+            if is_legend(line):
+                legend_lines.append(line)
+            else:
+                mermaid_lines.append(line)
+            continue
+        out.append(line)
+    if in_mermaid:
+        out.extend(mermaid_lines)
+        out.extend(legend_lines)
+    return "\n".join(out)
+def normalize_unbalanced_fences(text: str) -> str:
+    """Drop unmatched opening fences so later content still renders."""
+    lines = text.splitlines()
+    out: list[str] = []
+    in_fence = False
+    fence_char = ""
+    fence_len = 0
+    fence_has_content = False
+    fence_open_indices: list[int] = []
+    fence_re = re.compile(r"([`~]{3,})(.*)$")
+    for line in lines:
+        stripped = line.lstrip(" ")
+        leading_spaces = len(line) - len(stripped)
+        is_fence = False
+        if leading_spaces <= 3 and stripped:
+            match = fence_re.match(stripped)
+            if match:
+                run = match.group(1)
+                fence = run[0]
+                run_len = len(run)
+                rest = match.group(2) or ""
+                has_info = bool(rest.strip())
+                if not in_fence:
+                    in_fence = True
+                    fence_char = fence
+                    fence_len = run_len
+                    fence_has_content = False
+                    fence_open_indices.append(len(out))
+                    is_fence = True
+                elif fence == fence_char and run_len >= fence_len and not has_info:
+                    if not fence_has_content:
+                        if fence_open_indices:
+                            out.pop(fence_open_indices[-1])
+                            fence_open_indices.pop()
+                        in_fence = True
+                        fence_char = fence
+                        fence_len = run_len
+                        fence_has_content = False
+                        fence_open_indices.append(len(out))
+                        is_fence = True
+                    else:
+                        in_fence = False
+                        fence_char = ""
+                        fence_len = 0
+                        fence_has_content = False
+                        is_fence = True
+                elif fence == fence_char and run_len >= fence_len and has_info:
+                    if fence_open_indices:
+                        out.pop(fence_open_indices[-1])
+                        fence_open_indices.pop()
+                    in_fence = True
+                    fence_char = fence
+                    fence_len = run_len
+                    fence_has_content = False
+                    fence_open_indices.append(len(out))
+                    is_fence = True
+        out.append(line)
+        if in_fence and not is_fence and line.strip():
+            fence_has_content = True
+    if in_fence and fence_open_indices:
+        out.pop(fence_open_indices[-1])
+    return "\n".join(out)
 def extract_math_placeholders(text: str) -> tuple[str, dict[str, str]]:
     """Extract math expressions and replace with placeholders."""
     placeholders: dict[str, str] = {}
@@ -476,6 +699,9 @@ def extract_html_table_placeholders(text: str) -> tuple[str, dict[str, str]]:
 def render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
     """Render markdown with math, images, and tables properly escaped."""
+    text = normalize_mermaid_blocks(text)
+    text = normalize_fenced_code_blocks(text)
+    text = normalize_unbalanced_fences(text)
     text = strip_paragraph_wrapped_tables(text)
     text = normalize_footnote_definitions(text)
     rendered, table_placeholders = extract_html_table_placeholders(text)

deepresearch_flow/paper/web/pdfjs/web/viewer.html CHANGED Viewed

@@ -29,11 +29,63 @@ See https://github.com/adobe-type-tools/cmap-resources
 <!-- This snippet is used in production (included from viewer.html) -->
 <link rel="resource" type="application/l10n" href="locale/locale.properties">
-<script src="../build/pdf.js"></script>
-    <link rel="stylesheet" href="viewer.css">
-  <script src="viewer.js"></script>
+<script>
+(function() {
+  var params = new URLSearchParams(window.location.search);
+  var cdnBase = params.get('cdn');
+  if (cdnBase) {
+    cdnBase = cdnBase.replace(/\/+$/, '');
+  }
+  function loadStyle(url, fallbackUrl) {
+    var link = document.createElement('link');
+    link.rel = 'stylesheet';
+    link.href = url;
+    if (fallbackUrl) {
+      link.onerror = function() {
+        if (link.dataset.fallbackLoaded) return;
+        link.dataset.fallbackLoaded = '1';
+        link.href = fallbackUrl;
+      };
+    }
+    document.head.appendChild(link);
+  }
+  function loadScript(url, fallbackUrl, onload) {
+    var script = document.createElement('script');
+    script.src = url;
+    script.defer = true;
+    script.onload = function() {
+      if (onload) onload();
+    };
+    script.onerror = function() {
+      if (!fallbackUrl) return;
+      var fallback = document.createElement('script');
+      fallback.src = fallbackUrl;
+      fallback.defer = true;
+      fallback.onload = function() {
+        if (onload) onload();
+      };
+      document.head.appendChild(fallback);
+    };
+    document.head.appendChild(script);
+  }
+  var viewerCss = cdnBase ? cdnBase + '/web/viewer.css' : 'viewer.css';
+  loadStyle(viewerCss, 'viewer.css');
+  var pdfjsUrl = cdnBase ? cdnBase + '/build/pdf.js' : '../build/pdf.js';
+  loadScript(pdfjsUrl, '../build/pdf.js', function() {
+    var workerUrl = cdnBase ? cdnBase + '/build/pdf.worker.js' : '../build/pdf.worker.js';
+    if (window.pdfjsLib) {
+      window.pdfjsLib.GlobalWorkerOptions.workerSrc = workerUrl;
+    }
+    var viewerJs = 'viewer.js';
+    var viewerFallback = cdnBase ? cdnBase + '/web/viewer.js' : null;
+    loadScript(viewerJs, viewerFallback);
+  });
+})();
+</script>
   </head>
   <body tabindex="1">

deepresearch_flow/paper/web/pdfjs/web/viewer.js CHANGED Viewed

@@ -1637,6 +1637,10 @@ exports.PDFViewerApplication = PDFViewerApplication;
       return;
     }
     try {
+      const params = new URLSearchParams(window.location.search);
+      if (params.get("allow_origin") === "1" || params.get("disable_origin_check") === "1") {
+        return;
+      }
       const viewerOrigin = new URL(window.location.href).origin || "null";
       if (HOSTED_VIEWER_ORIGINS.includes(viewerOrigin)) {
         return;
@@ -14096,4 +14100,4 @@ if (document.readyState === "interactive" || document.readyState === "complete")
 /******/ })()
 ;
-//# sourceMappingURL=viewer.js.map
+//# sourceMappingURL=viewer.js.map

deepresearch_flow/paper/web/static/css/main.css CHANGED Viewed

@@ -223,11 +223,18 @@ header a {
 .markmap {
   width: 100%;
-  height: 420px;
   border: 1px solid #e2e8f0;
   border-radius: 12px;
   background: #ffffff;
   margin: 12px 0;
+  padding: 8px;
+  overflow-x: auto;
+}
+.markmap > svg {
+  width: 100%;
+  min-height: 240px;
+  display: block;
 }
 /* Utilities */

deepresearch-flow 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

deepresearch-flow 0.4.1py3-none-any.whl → 0.5.1py3-none-any.whl