deepresearch-flow 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,7 +31,8 @@ from deepresearch_flow.paper.template_registry import (
31
31
  from deepresearch_flow.paper.render import resolve_render_template, render_papers
32
32
 
33
33
  try:
34
- from pybtex.database import parse_file
34
+ from pybtex.database import BibliographyData, parse_file
35
+ from pybtex.database.output.bibtex import Writer
35
36
  PYBTEX_AVAILABLE = True
36
37
  except ImportError:
37
38
  PYBTEX_AVAILABLE = False
@@ -1015,11 +1016,18 @@ def register_db_commands(db_group: click.Group) -> None:
1015
1016
  def merge_group() -> None:
1016
1017
  """Merge paper JSON inputs."""
1017
1018
 
1018
- def _summarize_merge(output_path: Path, merged: list[dict[str, Any]], *, input_count: int) -> None:
1019
+ def _summarize_merge(output_path: Path, merged: Any, *, input_count: int) -> None:
1020
+ items: list[dict[str, Any]] = []
1021
+ if isinstance(merged, dict):
1022
+ raw_items = merged.get("papers")
1023
+ if isinstance(raw_items, list):
1024
+ items = [item for item in raw_items if isinstance(item, dict)]
1025
+ elif isinstance(merged, list):
1026
+ items = [item for item in merged if isinstance(item, dict)]
1027
+
1019
1028
  field_set: set[str] = set()
1020
- for item in merged:
1021
- if isinstance(item, dict):
1022
- field_set.update(item.keys())
1029
+ for item in items:
1030
+ field_set.update(item.keys())
1023
1031
  field_list = sorted(field_set)
1024
1032
 
1025
1033
  console = Console()
@@ -1027,7 +1035,7 @@ def register_db_commands(db_group: click.Group) -> None:
1027
1035
  summary.add_column("Metric", style="bold")
1028
1036
  summary.add_column("Value")
1029
1037
  summary.add_row("Inputs", str(input_count))
1030
- summary.add_row("Items", str(len(merged)))
1038
+ summary.add_row("Items", str(len(items)))
1031
1039
  summary.add_row("Fields", str(len(field_list)))
1032
1040
  summary.add_row("Output", str(output_path))
1033
1041
  console.print(summary)
@@ -1039,17 +1047,65 @@ def register_db_commands(db_group: click.Group) -> None:
1039
1047
  field_table.add_row(name)
1040
1048
  console.print(field_table)
1041
1049
 
1050
+ def _bibtex_entry_score(entry: Any) -> int:
1051
+ fields = getattr(entry, "fields", {}) or {}
1052
+ persons = getattr(entry, "persons", {}) or {}
1053
+ person_count = sum(len(people) for people in persons.values())
1054
+ return len(fields) + len(persons) + person_count
1055
+
1056
+ def _summarize_bibtex_merge(output_path: Path, *, input_count: int, entry_count: int, duplicate_count: int) -> None:
1057
+ summary = Table(title="BibTeX Merge Summary")
1058
+ summary.add_column("Metric", style="bold")
1059
+ summary.add_column("Value")
1060
+ summary.add_row("Inputs", str(input_count))
1061
+ summary.add_row("Entries", str(entry_count))
1062
+ summary.add_row("Duplicates", str(duplicate_count))
1063
+ summary.add_row("Output", str(output_path))
1064
+ Console().print(summary)
1065
+
1042
1066
  @merge_group.command("library")
1043
1067
  @click.option("-i", "--inputs", "input_paths", multiple=True, required=True, help="Input JSON files")
1068
+ @click.option("--template-tag", "template_tag", default=None, help="Template tag for merged output")
1044
1069
  @click.option("-o", "--output", "output_path", required=True, help="Output JSON file path")
1045
- def merge_library(input_paths: Iterable[str], output_path: str) -> None:
1070
+ def merge_library(input_paths: Iterable[str], template_tag: str | None, output_path: str) -> None:
1046
1071
  paths = [Path(path) for path in input_paths]
1047
1072
  merged: list[dict[str, Any]] = []
1073
+ tag_candidates: list[str] = []
1048
1074
  for path in paths:
1049
- merged.extend(load_json(path))
1075
+ payload = load_json(path)
1076
+ if isinstance(payload, dict):
1077
+ tag = str(payload.get("template_tag") or "")
1078
+ if tag:
1079
+ tag_candidates.append(tag)
1080
+ papers = payload.get("papers")
1081
+ if isinstance(papers, list):
1082
+ merged.extend(papers)
1083
+ else:
1084
+ raise click.ClickException("Input JSON must be a list or {template_tag, papers}")
1085
+ elif isinstance(payload, list):
1086
+ merged.extend(payload)
1087
+ else:
1088
+ raise click.ClickException("Input JSON must be a list or {template_tag, papers}")
1089
+ if not template_tag:
1090
+ inferred = ""
1091
+ for paper in merged:
1092
+ if not isinstance(paper, dict):
1093
+ continue
1094
+ inferred = str(paper.get("prompt_template") or paper.get("template_tag") or "")
1095
+ if inferred:
1096
+ break
1097
+ if inferred:
1098
+ template_tag = inferred
1099
+ if tag_candidates and not template_tag:
1100
+ template_tag = tag_candidates[0]
1101
+ if not template_tag:
1102
+ template_tag = "unknown"
1103
+ if tag_candidates and any(tag != template_tag for tag in tag_candidates):
1104
+ click.echo("Warning: multiple template_tag values detected in inputs; using first")
1050
1105
  output = Path(output_path)
1051
- write_json(output, merged)
1052
- _summarize_merge(output, merged, input_count=len(paths))
1106
+ bundle = {"template_tag": template_tag, "papers": merged}
1107
+ write_json(output, bundle)
1108
+ _summarize_merge(output, bundle, input_count=len(paths))
1053
1109
 
1054
1110
  @merge_group.command("templates")
1055
1111
  @click.option("-i", "--inputs", "input_paths", multiple=True, required=True, help="Input JSON files")
@@ -1201,6 +1257,62 @@ def register_db_commands(db_group: click.Group) -> None:
1201
1257
  sample_table.add_row(*row)
1202
1258
  Console().print(sample_table)
1203
1259
 
1260
+ @merge_group.command("bibtex")
1261
+ @click.option("-i", "--input", "input_paths", multiple=True, required=True, help="Input BibTeX file paths")
1262
+ @click.option("-o", "--output", "output_path", required=True, help="Output BibTeX file path")
1263
+ def merge_bibtex(input_paths: Iterable[str], output_path: str) -> None:
1264
+ if not PYBTEX_AVAILABLE:
1265
+ raise click.ClickException("pybtex is required for merge bibtex")
1266
+
1267
+ paths = [Path(path) for path in input_paths]
1268
+ if not paths:
1269
+ raise click.ClickException("No BibTeX inputs provided")
1270
+
1271
+ for path in paths:
1272
+ if not path.is_file():
1273
+ raise click.ClickException(f"BibTeX file not found: {path}")
1274
+
1275
+ merged_entries: dict[str, tuple[Any, int]] = {}
1276
+ duplicate_keys: list[str] = []
1277
+ duplicate_seen: set[str] = set()
1278
+
1279
+ for path in paths:
1280
+ bib_data = parse_file(str(path))
1281
+ for key, entry in bib_data.entries.items():
1282
+ score = _bibtex_entry_score(entry)
1283
+ if key not in merged_entries:
1284
+ merged_entries[key] = (entry, score)
1285
+ continue
1286
+ if key not in duplicate_seen:
1287
+ duplicate_seen.add(key)
1288
+ duplicate_keys.append(key)
1289
+ _, existing_score = merged_entries[key]
1290
+ if score > existing_score:
1291
+ merged_entries[key] = (entry, score)
1292
+
1293
+ output = Path(output_path)
1294
+ output.parent.mkdir(parents=True, exist_ok=True)
1295
+ out_data = BibliographyData()
1296
+ for key, (entry, _) in merged_entries.items():
1297
+ out_data.entries[key] = entry
1298
+ with output.open("w", encoding="utf-8") as handle:
1299
+ Writer().write_stream(out_data, handle)
1300
+
1301
+ _summarize_bibtex_merge(
1302
+ output,
1303
+ input_count=len(paths),
1304
+ entry_count=len(merged_entries),
1305
+ duplicate_count=len(duplicate_keys),
1306
+ )
1307
+
1308
+ if duplicate_keys:
1309
+ preview_limit = 20
1310
+ preview = ", ".join(duplicate_keys[:preview_limit])
1311
+ if len(duplicate_keys) > preview_limit:
1312
+ preview = f"{preview}, ... (+{len(duplicate_keys) - preview_limit} more)"
1313
+ note = "Kept entry with most fields; ties keep first input order."
1314
+ Console().print(Panel(f"{note}\n{preview}", title=f"Duplicate keys ({len(duplicate_keys)})", style="yellow"))
1315
+
1204
1316
  @db_group.command("render-md")
1205
1317
  @click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
1206
1318
  @click.option("-d", "--output-dir", "output_dir", default="rendered_md", help="Output directory")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepresearch-flow
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: Workflow tools for paper extraction, review, and research automation.
5
5
  Author-email: DengQi <dengqi935@gmail.com>
6
6
  License: MIT License
@@ -400,6 +400,41 @@ uv run deepresearch-flow paper db merge templates \
400
400
 
401
401
  Note: `paper db merge` is now split into `merge library` and `merge templates`.
402
402
 
403
+ ### Merge multiple databases (PDF + Markdown + BibTeX)
404
+
405
+ ```bash
406
+ # 1) Copy PDFs into a single folder
407
+ rsync -av ./pdfs_a/ ./pdfs_merged/
408
+ rsync -av ./pdfs_b/ ./pdfs_merged/
409
+
410
+ # 2) Copy Markdown folders into a single folder
411
+ rsync -av ./md_a/ ./md_merged/
412
+ rsync -av ./md_b/ ./md_merged/
413
+
414
+ # 3) Merge JSON libraries
415
+ uv run deepresearch-flow paper db merge library \
416
+ --inputs ./paper_infos_a.json \
417
+ --inputs ./paper_infos_b.json \
418
+ --output ./paper_infos_merged.json
419
+
420
+ # 4) Merge BibTeX files
421
+ uv run deepresearch-flow paper db merge bibtex \
422
+ -i ./library_a.bib \
423
+ -i ./library_b.bib \
424
+ -o ./library_merged.bib
425
+ ```
426
+
427
+ ### Merge BibTeX files
428
+
429
+ ```bash
430
+ uv run deepresearch-flow paper db merge bibtex \
431
+ -i ./library_a.bib \
432
+ -i ./library_b.bib \
433
+ -o ./library_merged.bib
434
+ ```
435
+
436
+ Duplicate keys keep the entry with the most fields; ties keep the first input order.
437
+
403
438
  ### Recommended: Merge templates then filter by BibTeX
404
439
 
405
440
  ```bash
@@ -4,7 +4,7 @@ deepresearch_flow/cli.py,sha256=t4oowCNWldL0DrVJ4d0UlRkuGU2qHej_G0mAc_quteQ,455
4
4
  deepresearch_flow/paper/__init__.py,sha256=sunaOkcgAJBrfmcaJTumcWbPGVUSGWvOv2a2Yidzy0A,43
5
5
  deepresearch_flow/paper/cli.py,sha256=68d-yccScU0yL6d7eqZVdudPO6i_in8F4v-hKDWILMo,13647
6
6
  deepresearch_flow/paper/config.py,sha256=V7z4ApPXCV1acSl2FU3nZGq6nt8uisMhm0GtOq5zzmg,12021
7
- deepresearch_flow/paper/db.py,sha256=UL2q4CFI33a3DZsZ42VOS_3FtTORnQuAogUfzPVjcO0,86579
7
+ deepresearch_flow/paper/db.py,sha256=Bxhrd8NCaPZ9Ijtp1uiOplwh2Uy0n2Dyn1bO0d4A9bE,91780
8
8
  deepresearch_flow/paper/db_ops.py,sha256=cb64jn2ax39i3tCS-0DYmlsJdGX3uBS2u5ncUIbUBic,73980
9
9
  deepresearch_flow/paper/extract.py,sha256=78ASAyNLfCl1AsAk2o_v1vskZCNZuayaCHgr0S4V2Vs,87632
10
10
  deepresearch_flow/paper/llm.py,sha256=mHfs5IkT3Q6BOh46MDlfUmgVTX24WRf0IKKoOnN8nV8,4007
@@ -463,9 +463,9 @@ deepresearch_flow/translator/placeholder.py,sha256=mEgqA-dPdOsIhno0h_hzfpXpY2asb
463
463
  deepresearch_flow/translator/prompts.py,sha256=EvfBvBIpQXARDj4m87GAyFXJGL8EJeahj_rOmp9mv68,5556
464
464
  deepresearch_flow/translator/protector.py,sha256=yUMuS2FgVofK_MRXrcauLRiwNvdCCjNAnh6CcNd686o,11777
465
465
  deepresearch_flow/translator/segment.py,sha256=rBFMCLTrvm2GrPc_hNFymi-8Ih2DAtUQlZHCRE9nLaM,5146
466
- deepresearch_flow-0.6.0.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
467
- deepresearch_flow-0.6.0.dist-info/METADATA,sha256=fyynvn8LYDTZlsIaKDr3SxQbR8nqQSOk3s85ZIh1t6E,25838
468
- deepresearch_flow-0.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
469
- deepresearch_flow-0.6.0.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
470
- deepresearch_flow-0.6.0.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
471
- deepresearch_flow-0.6.0.dist-info/RECORD,,
466
+ deepresearch_flow-0.6.1.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
467
+ deepresearch_flow-0.6.1.dist-info/METADATA,sha256=y_CHy1YJ-3P31W43Q_fd8dEkznj7LKLRrCF6F-sGHaQ,26696
468
+ deepresearch_flow-0.6.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
469
+ deepresearch_flow-0.6.1.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
470
+ deepresearch_flow-0.6.1.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
471
+ deepresearch_flow-0.6.1.dist-info/RECORD,,