PyPI - deepresearch-flow - Versions diffs - 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

deepresearch-flow 0.5.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

deepresearch_flow/paper/cli.py +63 -0
deepresearch_flow/paper/config.py +87 -12
deepresearch_flow/paper/db.py +1041 -34
deepresearch_flow/paper/db_ops.py +145 -26
deepresearch_flow/paper/extract.py +1546 -152
deepresearch_flow/paper/prompt_templates/deep_read_phi_system.j2 +8 -0
deepresearch_flow/paper/prompt_templates/deep_read_phi_user.j2 +396 -0
deepresearch_flow/paper/prompt_templates/deep_read_system.j2 +2 -0
deepresearch_flow/paper/prompt_templates/deep_read_user.j2 +272 -40
deepresearch_flow/paper/prompt_templates/eight_questions_phi_system.j2 +7 -0
deepresearch_flow/paper/prompt_templates/eight_questions_phi_user.j2 +135 -0
deepresearch_flow/paper/prompt_templates/eight_questions_system.j2 +2 -0
deepresearch_flow/paper/prompt_templates/eight_questions_user.j2 +4 -0
deepresearch_flow/paper/prompt_templates/simple_phi_system.j2 +8 -0
deepresearch_flow/paper/prompt_templates/simple_phi_user.j2 +31 -0
deepresearch_flow/paper/prompt_templates/simple_system.j2 +2 -0
deepresearch_flow/paper/prompt_templates/simple_user.j2 +2 -0
deepresearch_flow/paper/providers/azure_openai.py +45 -3
deepresearch_flow/paper/providers/openai_compatible.py +45 -3
deepresearch_flow/paper/schemas/deep_read_phi_schema.json +31 -0
deepresearch_flow/paper/schemas/deep_read_schema.json +1 -0
deepresearch_flow/paper/schemas/default_paper_schema.json +6 -0
deepresearch_flow/paper/schemas/eight_questions_schema.json +1 -0
deepresearch_flow/paper/snapshot/__init__.py +4 -0
deepresearch_flow/paper/snapshot/api.py +941 -0
deepresearch_flow/paper/snapshot/builder.py +965 -0
deepresearch_flow/paper/snapshot/identity.py +239 -0
deepresearch_flow/paper/snapshot/schema.py +245 -0
deepresearch_flow/paper/snapshot/tests/__init__.py +2 -0
deepresearch_flow/paper/snapshot/tests/test_identity.py +123 -0
deepresearch_flow/paper/snapshot/text.py +154 -0
deepresearch_flow/paper/template_registry.py +40 -0
deepresearch_flow/paper/templates/deep_read.md.j2 +4 -0
deepresearch_flow/paper/templates/deep_read_phi.md.j2 +44 -0
deepresearch_flow/paper/templates/default_paper.md.j2 +4 -0
deepresearch_flow/paper/templates/eight_questions.md.j2 +4 -0
deepresearch_flow/paper/web/app.py +10 -3
deepresearch_flow/paper/web/markdown.py +174 -8
deepresearch_flow/paper/web/static/css/main.css +8 -1
deepresearch_flow/paper/web/static/js/detail.js +46 -12
deepresearch_flow/paper/web/templates/detail.html +9 -0
deepresearch_flow/paper/web/text.py +8 -4
deepresearch_flow/recognize/cli.py +380 -103
deepresearch_flow/recognize/markdown.py +31 -7
deepresearch_flow/recognize/math.py +47 -12
deepresearch_flow/recognize/mermaid.py +320 -10
deepresearch_flow/recognize/organize.py +35 -16
deepresearch_flow/translator/cli.py +71 -20
deepresearch_flow/translator/engine.py +220 -81
deepresearch_flow/translator/fixers.py +15 -0
deepresearch_flow/translator/prompts.py +19 -2
deepresearch_flow/translator/protector.py +15 -3
{deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/METADATA +407 -33
{deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/RECORD +58 -42
{deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/WHEEL +1 -1
{deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/entry_points.txt +0 -0
{deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/licenses/LICENSE +0 -0
{deepresearch_flow-0.5.0.dist-info → deepresearch_flow-0.6.0.dist-info}/top_level.txt +0 -0

deepresearch_flow/recognize/markdown.py CHANGED Viewed

@@ -137,7 +137,13 @@ def parse_data_url(target: str) -> Optional[tuple[str, bytes]]:
     try:
         return mime, base64.b64decode(payload)
     except Exception as exc:  # pragma: no cover - defensive
-        logger.warning("Failed to decode base64 image: %s", exc)
+        message = str(exc).strip() or "unknown error"
+        logger.warning(
+            "Failed to decode base64 image (mime=%s, chars=%d): %s",
+            mime or "<unknown>",
+            len(payload),
+            message,
+        )
         return None
@@ -218,17 +224,26 @@ async def embed_markdown_images(
             try:
                 response = await http_client.get(target)
             except Exception as exc:
-                logger.warning("Failed to fetch %s: %s", target, exc)
+                message = str(exc).strip() or "unknown error"
+                logger.warning("Failed to fetch %s (md=%s): %s", target, md_path, message)
                 return None
             if response.status_code >= 400:
-                logger.warning("Failed to fetch %s: HTTP %d", target, response.status_code)
+                logger.warning(
+                    "Failed to fetch %s (md=%s): HTTP %d",
+                    target,
+                    md_path,
+                    response.status_code,
+                )
                 return None
             content_type = response.headers.get("Content-Type", "").split(";", 1)[0].strip()
             if not content_type.startswith("image/"):
                 guessed = mime_from_path(Path(urlparse(target).path))
                 if not guessed or not guessed.startswith("image/"):
                     logger.warning(
-                        "Skipping non-image URL %s (Content-Type %s)", target, content_type
+                        "Skipping non-image URL %s (md=%s, Content-Type=%s)",
+                        target,
+                        md_path,
+                        content_type,
                     )
                     return None
                 content_type = guessed
@@ -236,11 +251,16 @@ async def embed_markdown_images(
         local_path = resolve_local_path(md_path, target)
         if not local_path.exists() or not local_path.is_file():
-            logger.warning("Image not found: %s", local_path)
+            logger.warning("Image not found: %s (md=%s, target=%s)", local_path, md_path, target)
             return None
         mime = mime_from_path(local_path)
         if not mime or not mime.startswith("image/"):
-            logger.warning("Unsupported image type: %s", local_path)
+            logger.warning(
+                "Unsupported image type: %s (md=%s, mime=%s)",
+                local_path,
+                md_path,
+                mime or "unknown",
+            )
             return None
         data = await asyncio.to_thread(local_path.read_bytes)
         return data_url_from_bytes(mime, data)
@@ -264,7 +284,11 @@ async def unpack_markdown_images(
         mime, data = parsed
         ext = extension_from_mime(mime)
         if not ext:
-            logger.warning("Unsupported MIME type: %s", mime)
+            logger.warning(
+                "Unsupported MIME type: %s (alt=%s)",
+                mime,
+                alt_text or "<empty>",
+            )
             return None
         base_name = base_name_from_alt(alt_text)
         if not base_name:

deepresearch_flow/recognize/math.py CHANGED Viewed

@@ -392,7 +392,7 @@ def _ensure_node_validator() -> NodeKatexValidator | None:
     node_path = shutil.which("node")
     if not node_path:
         if not _KATEX_WARNED:
-            logger.warning("node not available; skip KaTeX validation")
+            logger.warning("node binary not found; skip KaTeX validation")
             _KATEX_WARNED = True
         return None
     if _NODE_KATEX_READY is None:
@@ -408,7 +408,10 @@ def _ensure_node_validator() -> NodeKatexValidator | None:
             _NODE_KATEX_READY = False
         if not _NODE_KATEX_READY:
             if not _KATEX_WARNED:
-                logger.warning("katex npm package not available; skip KaTeX validation")
+                logger.warning(
+                    "katex npm package not available; skip KaTeX validation (node=%s)",
+                    node_path,
+                )
                 _KATEX_WARNED = True
             return None
     script_path = str((Path(__file__).with_name("katex_check.js")).resolve())
@@ -594,12 +597,22 @@ async def fix_math_text(
     stats: MathFixStats,
     repair_enabled: bool = True,
     spans: list[FormulaSpan] | None = None,
+    allowed_keys: set[tuple[int, str | None, int | None]] | None = None,
     progress_cb: Callable[[], None] | None = None,
 ) -> tuple[str, list[dict[str, Any]]]:
     replacements: list[tuple[int, int, str]] = []
     issues: list[FormulaIssue] = []
     if spans is None:
         spans = extract_math_spans(text, context_chars)
+    if allowed_keys:
+        filtered: list[FormulaSpan] = []
+        for span in spans:
+            line_no = line_offset + span.line - 1
+            if (line_no, field_path, item_index) in allowed_keys:
+                filtered.append(span)
+        spans = filtered
+    if not spans:
+        return text, []
     stats.formulas_total += len(spans)
     file_id = short_hash(file_path)
     for idx, span in enumerate(spans):
@@ -638,16 +651,38 @@ async def fix_math_text(
     error_records: list[dict[str, Any]] = []
     if issues and repair_enabled:
-        for batch in iter_batches(issues, batch_size):
-            repairs, error = await repair_batch(
-                batch,
-                provider,
-                model_name,
-                api_key,
-                timeout,
-                max_retries,
-                client,
-            )
+        # Convert to list for parallel processing
+        batches = list(iter_batches(issues, batch_size))
+        # Parallel batch repair
+        batch_results = await asyncio.gather(
+            *[
+                repair_batch(batch, provider, model_name, api_key, timeout, max_retries, client)
+                for batch in batches
+            ],
+            return_exceptions=True,
+        )
+        # Process results
+        for batch, result in zip(batches, batch_results):
+            if isinstance(result, Exception):
+                # Entire batch failed with exception
+                error = str(result)
+                for issue in batch:
+                    stats.formulas_failed += 1
+                    error_records.append({
+                        "path": file_path,
+                        "line": line_offset + issue.span.line - 1,
+                        "delimiter": issue.span.delimiter,
+                        "latex": issue.span.content,
+                        "errors": issue.errors + [f"batch_exception: {error}"],
+                        "field_path": issue.field_path,
+                        "item_index": issue.item_index,
+                    })
+                continue
+            repairs, error = result
             if error:
                 for issue in batch:
                     stats.formulas_failed += 1

deepresearch_flow/recognize/mermaid.py CHANGED Viewed

@@ -40,6 +40,17 @@ class MermaidIssue:
     item_index: int | None
+@dataclass
+class DiagramTask:
+    """Global diagram task for parallel processing."""
+    file_path: Path
+    file_line_offset: int
+    field_path: str | None
+    item_index: int | None
+    span: MermaidSpan
+    issue: MermaidIssue | None
 @dataclass
 class MermaidFixStats:
     diagrams_total: int = 0
@@ -574,12 +585,22 @@ async def fix_mermaid_text(
     stats: MermaidFixStats,
     repair_enabled: bool = True,
     spans: list[MermaidSpan] | None = None,
+    allowed_keys: set[tuple[int, str | None, int | None]] | None = None,
     progress_cb: Callable[[], None] | None = None,
 ) -> tuple[str, list[dict[str, Any]]]:
     replacements: list[tuple[int, int, str]] = []
     issues: list[MermaidIssue] = []
     if spans is None:
         spans = extract_mermaid_spans(text, context_chars)
+    if allowed_keys:
+        filtered: list[MermaidSpan] = []
+        for span in spans:
+            line_no = line_offset + span.line - 1
+            if (line_no, field_path, item_index) in allowed_keys:
+                filtered.append(span)
+        spans = filtered
+    if not spans:
+        return text, []
     stats.diagrams_total += len(spans)
     file_id = short_hash(file_path)
     for idx, span in enumerate(spans):
@@ -614,16 +635,37 @@ async def fix_mermaid_text(
     error_records: list[dict[str, Any]] = []
     if issues and repair_enabled:
-        for batch in iter_batches(issues, batch_size):
-            repairs, error = await repair_batch(
-                batch,
-                provider,
-                model_name,
-                api_key,
-                timeout,
-                max_retries,
-                client,
-            )
+        # Convert to list for parallel processing
+        batches = list(iter_batches(issues, batch_size))
+        # Parallel batch repair
+        batch_results = await asyncio.gather(
+            *[
+                repair_batch(batch, provider, model_name, api_key, timeout, max_retries, client)
+                for batch in batches
+            ],
+            return_exceptions=True,
+        )
+        # Process results
+        for batch, result in zip(batches, batch_results):
+            if isinstance(result, Exception):
+                # Entire batch failed with exception
+                error = str(result)
+                for issue in batch:
+                    stats.diagrams_failed += 1
+                    error_records.append({
+                        "path": file_path,
+                        "line": line_offset + issue.span.line - 1,
+                        "mermaid": issue.span.content,
+                        "errors": issue.errors + [f"batch_exception: {error}"],
+                        "field_path": issue.field_path,
+                        "item_index": issue.item_index,
+                    })
+                continue
+            repairs, error = result
             if error:
                 for issue in batch:
                     stats.diagrams_failed += 1
@@ -688,3 +730,271 @@ async def fix_mermaid_text(
     updated = apply_replacements(text, replacements)
     return updated, error_records
+def extract_diagrams_from_text(
+    text: str,
+    file_path: Path,
+    line_offset: int,
+    field_path: str | None,
+    item_index: int | None,
+    context_chars: int,
+    skip_validation: bool = False,
+) -> list[DiagramTask]:
+    """Extract all diagram tasks from a text block.
+    Args:
+        skip_validation: If True, skip validation and mark all diagrams as having issues.
+                        This is faster for initial extraction when you'll validate later.
+    """
+    tasks: list[DiagramTask] = []
+    spans = extract_mermaid_spans(text, context_chars)
+    file_id = short_hash(str(file_path))
+    for idx, span in enumerate(spans):
+        issue: MermaidIssue | None = None
+        if skip_validation:
+            # Mark all diagrams as needing validation (skip expensive mmdc call)
+            issue_id = f"{file_id}:{line_offset + span.line - 1}:{idx}"
+            issue = MermaidIssue(
+                issue_id=issue_id,
+                span=span,
+                errors=["not_validated"],
+                field_path=field_path,
+                item_index=item_index,
+            )
+        else:
+            # Full validation (expensive)
+            validation = validate_mermaid(span.content)
+            if validation:
+                # Try cleanup first
+                candidate = cleanup_mermaid(span.content)
+                if candidate != span.content:
+                    candidate_validation = validate_mermaid(candidate)
+                    if not candidate_validation:
+                        # Cleanup fixed it, no issue
+                        pass
+                    else:
+                        validation = candidate_validation
+                if validation:
+                    # Still invalid after cleanup
+                    issue_id = f"{file_id}:{line_offset + span.line - 1}:{idx}"
+                    issue = MermaidIssue(
+                        issue_id=issue_id,
+                        span=span,
+                        errors=[validation],
+                        field_path=field_path,
+                        item_index=item_index,
+                    )
+        tasks.append(
+            DiagramTask(
+                file_path=file_path,
+                file_line_offset=line_offset,
+                field_path=field_path,
+                item_index=item_index,
+                span=span,
+                issue=issue,
+            )
+        )
+    return tasks
+async def repair_all_diagrams_global(
+    tasks: list[DiagramTask],
+    batch_size: int,
+    max_concurrent_batches: int,
+    provider,
+    model_name: str,
+    api_key: str | None,
+    timeout: float,
+    max_retries: int,
+    client: httpx.AsyncClient,
+    stats: MermaidFixStats,
+    progress_cb: Callable[[], None] | None = None,
+) -> tuple[dict[Path, list[tuple[int, int, str]]], list[dict[str, Any]]]:
+    """
+    Globally repair all diagrams in parallel.
+    Returns:
+        - dict mapping file paths to list of (start, end, replacement) tuples
+        - list of error records
+    """
+    from collections import defaultdict
+    stats.diagrams_total += len(tasks)
+    file_replacements: dict[Path, list[tuple[int, int, str]]] = defaultdict(list)
+    error_records: list[dict[str, Any]] = []
+    clean_tasks: list[DiagramTask] = []
+    invalid_tasks: list[DiagramTask] = []
+    needs_validation: list[DiagramTask] = []
+    task_by_issue_id: dict[str, DiagramTask] = {}
+    for task in tasks:
+        if not task.issue:
+            clean_tasks.append(task)
+            continue
+        if task.issue.errors == ["not_validated"]:
+            needs_validation.append(task)
+            continue
+        invalid_tasks.append(task)
+        task_by_issue_id[task.issue.issue_id] = task
+    if progress_cb:
+        for _ in clean_tasks:
+            progress_cb()
+    if needs_validation:
+        validate_limit = max(1, min(8, max_concurrent_batches))
+        validate_semaphore = asyncio.Semaphore(validate_limit)
+        def validate_and_cleanup(text: str) -> tuple[str, str | None]:
+            validation = validate_mermaid(text)
+            if not validation:
+                return "clean", None
+            cleaned = cleanup_mermaid(text)
+            if cleaned != text and not validate_mermaid(cleaned):
+                return "cleaned", cleaned
+            return "invalid", validation
+        async def validate_one(task: DiagramTask) -> tuple[str, str | None]:
+            async with validate_semaphore:
+                return await asyncio.to_thread(validate_and_cleanup, task.span.content)
+        results = await asyncio.gather(*[validate_one(task) for task in needs_validation])
+        for task, (status, payload) in zip(needs_validation, results):
+            if status == "clean":
+                if progress_cb:
+                    progress_cb()
+                continue
+            if status == "cleaned":
+                stats.diagrams_repaired += 1
+                file_replacements[task.file_path].append((task.span.start, task.span.end, payload or task.span.content))
+                if progress_cb:
+                    progress_cb()
+                continue
+            # Still invalid: attach validation errors and send to LLM repair.
+            task.issue.errors = [payload] if payload else ["invalid"]
+            invalid_tasks.append(task)
+            task_by_issue_id[task.issue.issue_id] = task
+    stats.diagrams_invalid += len(invalid_tasks)
+    if not invalid_tasks:
+        return file_replacements, error_records
+    issues = [task.issue for task in invalid_tasks if task.issue]
+    batches = list(iter_batches(issues, batch_size))
+    semaphore = asyncio.Semaphore(max_concurrent_batches)
+    async def process_batch(batch: list[MermaidIssue]) -> tuple[dict[str, str], str | None]:
+        async with semaphore:
+            return await repair_batch(batch, provider, model_name, api_key, timeout, max_retries, client)
+    results = await asyncio.gather(
+        *[process_batch(batch) for batch in batches],
+        return_exceptions=True,
+    )
+    for batch, result in zip(batches, results):
+        if isinstance(result, Exception):
+            error_msg = str(result)
+            for issue in batch:
+                stats.diagrams_failed += 1
+                task = task_by_issue_id.get(issue.issue_id)
+                if not task:
+                    continue
+                error_records.append(
+                    {
+                        "path": str(task.file_path),
+                        "line": task.file_line_offset + issue.span.line - 1,
+                        "mermaid": issue.span.content,
+                        "errors": issue.errors + [f"batch_error: {error_msg}"],
+                        "field_path": issue.field_path,
+                        "item_index": issue.item_index,
+                    }
+                )
+                if progress_cb:
+                    progress_cb()
+            continue
+        repairs, batch_error = result
+        if batch_error:
+            for issue in batch:
+                stats.diagrams_failed += 1
+                task = task_by_issue_id.get(issue.issue_id)
+                if not task:
+                    continue
+                error_records.append(
+                    {
+                        "path": str(task.file_path),
+                        "line": task.file_line_offset + issue.span.line - 1,
+                        "mermaid": issue.span.content,
+                        "errors": issue.errors + [f"llm_error: {batch_error}"],
+                        "field_path": issue.field_path,
+                        "item_index": issue.item_index,
+                    }
+                )
+                if progress_cb:
+                    progress_cb()
+            continue
+        for issue in batch:
+            task = task_by_issue_id.get(issue.issue_id)
+            if not task:
+                if progress_cb:
+                    progress_cb()
+                continue
+            repaired = repairs.get(issue.issue_id)
+            if not repaired:
+                stats.diagrams_failed += 1
+                error_records.append(
+                    {
+                        "path": str(task.file_path),
+                        "line": task.file_line_offset + issue.span.line - 1,
+                        "mermaid": issue.span.content,
+                        "errors": issue.errors + ["llm_missing_output"],
+                        "field_path": issue.field_path,
+                        "item_index": issue.item_index,
+                    }
+                )
+                if progress_cb:
+                    progress_cb()
+                continue
+            repaired = strip_mermaid_fences(repaired)
+            repaired = cleanup_mermaid(repaired)
+            validation = validate_mermaid(repaired)
+            if validation:
+                stats.diagrams_failed += 1
+                error_records.append(
+                    {
+                        "path": str(task.file_path),
+                        "line": task.file_line_offset + issue.span.line - 1,
+                        "mermaid": issue.span.content,
+                        "errors": issue.errors + [f"repair_still_invalid: {validation}"],
+                        "field_path": issue.field_path,
+                        "item_index": issue.item_index,
+                    }
+                )
+                if progress_cb:
+                    progress_cb()
+                continue
+            stats.diagrams_repaired += 1
+            file_replacements[task.file_path].append((issue.span.start, issue.span.end, repaired))
+            if progress_cb:
+                progress_cb()
+    return file_replacements, error_records

deepresearch_flow/recognize/organize.py CHANGED Viewed

@@ -31,7 +31,7 @@ async def _format_markdown(text: str) -> str:
     global _RUMDL_WARNED
     if not _RUMDL_PATH:
         if not _RUMDL_WARNED:
-            logger.warning("rumdl not available; skip markdown formatting")
+            logger.warning("rumdl not available; skip markdown formatting (recognize)")
             _RUMDL_WARNED = True
         return text
@@ -45,10 +45,15 @@ async def _format_markdown(text: str) -> str:
                 check=False,
             )
         except OSError as exc:
-            logger.warning("rumdl fmt failed: %s", exc)
+            message = str(exc).strip() or "unknown error"
+            logger.warning("rumdl fmt failed (oserror=%s): %s", type(exc).__name__, message)
             return text
         if proc.returncode != 0:
-            logger.warning("rumdl fmt failed (%s): %s", proc.returncode, proc.stderr.strip())
+            logger.warning(
+                "rumdl fmt failed (rc=%s): %s",
+                proc.returncode,
+                proc.stderr.strip() or "unknown error",
+            )
             return text
         return proc.stdout or text
@@ -80,26 +85,35 @@ def discover_mineru_dirs(inputs: Iterable[str], recursive: bool) -> list[Path]:
             if path.name != "full.md":
                 raise FileNotFoundError(f"Expected full.md file but got: {path}")
             parent = path.parent.resolve()
-            if (parent / "images").is_dir():
-                results.add(parent)
-            else:
-                logger.warning("Skipping %s (missing images/)", parent)
+            if not (parent / "images").is_dir():
+                logger.warning(
+                    "Missing images/ for %s; continuing (expected=%s)",
+                    parent,
+                    parent / "images",
+                )
+            results.add(parent)
             continue
         if not path.exists():
             raise FileNotFoundError(f"Input path not found: {path}")
         if path.is_dir():
             if (path / "full.md").is_file():
-                if (path / "images").is_dir():
-                    results.add(path.resolve())
-                else:
-                    logger.warning("Skipping %s (missing images/)", path)
+                if not (path / "images").is_dir():
+                    logger.warning(
+                        "Missing images/ for %s; continuing (expected=%s)",
+                        path,
+                        path / "images",
+                    )
+                results.add(path.resolve())
             pattern = path.rglob("full.md") if recursive else path.glob("full.md")
             for full_path in pattern:
                 parent = full_path.parent.resolve()
-                if (parent / "images").is_dir():
-                    results.add(parent)
-                else:
-                    logger.warning("Skipping %s (missing images/)", parent)
+                if not (parent / "images").is_dir():
+                    logger.warning(
+                        "Missing images/ for %s; continuing (expected=%s)",
+                        parent,
+                        parent / "images",
+                    )
+                results.add(parent)
             continue
         raise FileNotFoundError(f"Input path not found: {path}")
     return sorted(results)
@@ -129,7 +143,12 @@ async def organize_mineru_dir(
                 return None
             source_path = resolve_local_path(md_path, target)
             if not source_path.exists() or not source_path.is_file():
-                logger.warning("Image not found: %s", source_path)
+                logger.warning(
+                    "Image not found: %s (md=%s, target=%s)",
+                    source_path,
+                    md_path,
+                    target,
+                )
                 return None
             if source_path in image_map:
                 return f"images/{image_map[source_path]}"

deepresearch-flow 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

deepresearch-flow 0.5.0py3-none-any.whl → 0.6.0py3-none-any.whl