PyPI - deepresearch-flow - Versions diffs - 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

deepresearch-flow 0.1.2py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

deepresearch_flow/paper/db.py CHANGED Viewed

@@ -30,7 +30,12 @@ except ImportError:
 def load_json(path: Path) -> list[dict[str, Any]]:
-    return json.loads(path.read_text(encoding="utf-8"))
+    data = json.loads(path.read_text(encoding="utf-8"))
+    if isinstance(data, dict) and isinstance(data.get("papers"), list):
+        return data["papers"]
+    if isinstance(data, list):
+        return data
+    raise click.ClickException("Input JSON must be a list or {template_tag, papers}")
 def write_json(path: Path, data: Any) -> None:
@@ -378,9 +383,25 @@ def register_db_commands(db_group: click.Group) -> None:
         month_counts: dict[str, int] = {}
         author_counts: dict[str, int] = {}
         tag_counts: dict[str, int] = {}
+        keyword_counts: dict[str, int] = {}
         journal_counts: dict[str, int] = {}
         conference_counts: dict[str, int] = {}
         other_venue_counts: dict[str, int] = {}
+        def normalize_keywords(value: Any) -> list[str]:
+            if value is None:
+                return []
+            if isinstance(value, list):
+                items = value
+            elif isinstance(value, str):
+                items = re.split(r"[;,]", value)
+            else:
+                items = [value]
+            normalized: list[str] = []
+            for item in items:
+                token = str(item).strip().lower()
+                if token:
+                    normalized.append(token)
+            return normalized
         for paper in papers:
             bibtex_fields = {}
             bibtex_type = None
@@ -406,6 +427,8 @@ def register_db_commands(db_group: click.Group) -> None:
                 author_counts[author] = author_counts.get(author, 0) + 1
             for tag in paper.get("ai_generated_tags") or []:
                 tag_counts[tag] = tag_counts.get(tag, 0) + 1
+            for keyword in normalize_keywords(paper.get("keywords")):
+                keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
             venue = None
             if bibtex_type in {"article"}:
@@ -541,6 +564,16 @@ def register_db_commands(db_group: click.Group) -> None:
                 tag_table.add_row(tag, str(count), f"{percentage:.1f}%")
             console.print(tag_table)
+        if keyword_counts:
+            keyword_table = Table(title=f"Top {top_n} Keywords")
+            keyword_table.add_column("Keyword", style="cyan")
+            keyword_table.add_column("Count", style="green", justify="right")
+            keyword_table.add_column("Percentage", style="yellow", justify="right")
+            for keyword, count in sorted(keyword_counts.items(), key=lambda item: item[1], reverse=True)[:top_n]:
+                percentage = (count / total * 100) if total else 0
+                keyword_table.add_row(keyword, str(count), f"{percentage:.1f}%")
+            console.print(keyword_table)
     @db_group.command("serve")
     @click.option("-i", "--input", "input_paths", multiple=True, required=True, help="Input JSON file path")
     @click.option("-b", "--bibtex", "bibtex_path", default=None, help="Optional BibTeX file path")

deepresearch_flow/paper/extract.py CHANGED Viewed

@@ -285,9 +285,14 @@ def load_existing(path: Path) -> list[dict[str, Any]]:
     if not path.exists():
         return []
     try:
-        return json.loads(path.read_text(encoding="utf-8"))
+        data = json.loads(path.read_text(encoding="utf-8"))
     except json.JSONDecodeError:
         return []
+    if isinstance(data, dict) and isinstance(data.get("papers"), list):
+        return data["papers"]
+    if isinstance(data, list):
+        return data
+    return []
 def load_errors(path: Path) -> list[dict[str, Any]]:
@@ -462,6 +467,7 @@ async def extract_documents(
 ) -> None:
     start_time = time.monotonic()
     markdown_files = discover_markdown(inputs, glob_pattern, recursive=True)
+    template_tag = prompt_template if not custom_prompt else "custom"
     if retry_failed:
         error_entries = load_errors(errors_path)
@@ -798,7 +804,8 @@ async def extract_documents(
         if path not in seen:
             final_results.append(entry)
-    write_json(output_path, final_results)
+    output_payload = {"template_tag": template_tag, "papers": final_results}
+    write_json(output_path, output_payload)
     error_payload = [
         {
@@ -823,7 +830,7 @@ async def extract_documents(
                 continue
             base_name = split_output_name(Path(source_path))
             file_name = unique_split_name(base_name, used_names, source_path)
-            write_json(target_dir / f"{file_name}.json", entry)
+            write_json(target_dir / f"{file_name}.json", {"template_tag": template_tag, "papers": [entry]})
     if render_md:
         try:

deepresearch-flow 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

deepresearch-flow 0.1.2py3-none-any.whl → 0.2.0py3-none-any.whl