deepresearch-flow 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepresearch_flow/paper/db.py +122 -10
- {deepresearch_flow-0.6.0.dist-info → deepresearch_flow-0.6.1.dist-info}/METADATA +36 -1
- {deepresearch_flow-0.6.0.dist-info → deepresearch_flow-0.6.1.dist-info}/RECORD +7 -7
- {deepresearch_flow-0.6.0.dist-info → deepresearch_flow-0.6.1.dist-info}/WHEEL +0 -0
- {deepresearch_flow-0.6.0.dist-info → deepresearch_flow-0.6.1.dist-info}/entry_points.txt +0 -0
- {deepresearch_flow-0.6.0.dist-info → deepresearch_flow-0.6.1.dist-info}/licenses/LICENSE +0 -0
- {deepresearch_flow-0.6.0.dist-info → deepresearch_flow-0.6.1.dist-info}/top_level.txt +0 -0
deepresearch_flow/paper/db.py
CHANGED
|
@@ -31,7 +31,8 @@ from deepresearch_flow.paper.template_registry import (
|
|
|
31
31
|
from deepresearch_flow.paper.render import resolve_render_template, render_papers
|
|
32
32
|
|
|
33
33
|
try:
|
|
34
|
-
from pybtex.database import parse_file
|
|
34
|
+
from pybtex.database import BibliographyData, parse_file
|
|
35
|
+
from pybtex.database.output.bibtex import Writer
|
|
35
36
|
PYBTEX_AVAILABLE = True
|
|
36
37
|
except ImportError:
|
|
37
38
|
PYBTEX_AVAILABLE = False
|
|
@@ -1015,11 +1016,18 @@ def register_db_commands(db_group: click.Group) -> None:
|
|
|
1015
1016
|
def merge_group() -> None:
|
|
1016
1017
|
"""Merge paper JSON inputs."""
|
|
1017
1018
|
|
|
1018
|
-
def _summarize_merge(output_path: Path, merged:
|
|
1019
|
+
def _summarize_merge(output_path: Path, merged: Any, *, input_count: int) -> None:
|
|
1020
|
+
items: list[dict[str, Any]] = []
|
|
1021
|
+
if isinstance(merged, dict):
|
|
1022
|
+
raw_items = merged.get("papers")
|
|
1023
|
+
if isinstance(raw_items, list):
|
|
1024
|
+
items = [item for item in raw_items if isinstance(item, dict)]
|
|
1025
|
+
elif isinstance(merged, list):
|
|
1026
|
+
items = [item for item in merged if isinstance(item, dict)]
|
|
1027
|
+
|
|
1019
1028
|
field_set: set[str] = set()
|
|
1020
|
-
for item in
|
|
1021
|
-
|
|
1022
|
-
field_set.update(item.keys())
|
|
1029
|
+
for item in items:
|
|
1030
|
+
field_set.update(item.keys())
|
|
1023
1031
|
field_list = sorted(field_set)
|
|
1024
1032
|
|
|
1025
1033
|
console = Console()
|
|
@@ -1027,7 +1035,7 @@ def register_db_commands(db_group: click.Group) -> None:
|
|
|
1027
1035
|
summary.add_column("Metric", style="bold")
|
|
1028
1036
|
summary.add_column("Value")
|
|
1029
1037
|
summary.add_row("Inputs", str(input_count))
|
|
1030
|
-
summary.add_row("Items", str(len(
|
|
1038
|
+
summary.add_row("Items", str(len(items)))
|
|
1031
1039
|
summary.add_row("Fields", str(len(field_list)))
|
|
1032
1040
|
summary.add_row("Output", str(output_path))
|
|
1033
1041
|
console.print(summary)
|
|
@@ -1039,17 +1047,65 @@ def register_db_commands(db_group: click.Group) -> None:
|
|
|
1039
1047
|
field_table.add_row(name)
|
|
1040
1048
|
console.print(field_table)
|
|
1041
1049
|
|
|
1050
|
+
def _bibtex_entry_score(entry: Any) -> int:
|
|
1051
|
+
fields = getattr(entry, "fields", {}) or {}
|
|
1052
|
+
persons = getattr(entry, "persons", {}) or {}
|
|
1053
|
+
person_count = sum(len(people) for people in persons.values())
|
|
1054
|
+
return len(fields) + len(persons) + person_count
|
|
1055
|
+
|
|
1056
|
+
def _summarize_bibtex_merge(output_path: Path, *, input_count: int, entry_count: int, duplicate_count: int) -> None:
|
|
1057
|
+
summary = Table(title="BibTeX Merge Summary")
|
|
1058
|
+
summary.add_column("Metric", style="bold")
|
|
1059
|
+
summary.add_column("Value")
|
|
1060
|
+
summary.add_row("Inputs", str(input_count))
|
|
1061
|
+
summary.add_row("Entries", str(entry_count))
|
|
1062
|
+
summary.add_row("Duplicates", str(duplicate_count))
|
|
1063
|
+
summary.add_row("Output", str(output_path))
|
|
1064
|
+
Console().print(summary)
|
|
1065
|
+
|
|
1042
1066
|
@merge_group.command("library")
|
|
1043
1067
|
@click.option("-i", "--inputs", "input_paths", multiple=True, required=True, help="Input JSON files")
|
|
1068
|
+
@click.option("--template-tag", "template_tag", default=None, help="Template tag for merged output")
|
|
1044
1069
|
@click.option("-o", "--output", "output_path", required=True, help="Output JSON file path")
|
|
1045
|
-
def merge_library(input_paths: Iterable[str], output_path: str) -> None:
|
|
1070
|
+
def merge_library(input_paths: Iterable[str], template_tag: str | None, output_path: str) -> None:
|
|
1046
1071
|
paths = [Path(path) for path in input_paths]
|
|
1047
1072
|
merged: list[dict[str, Any]] = []
|
|
1073
|
+
tag_candidates: list[str] = []
|
|
1048
1074
|
for path in paths:
|
|
1049
|
-
|
|
1075
|
+
payload = load_json(path)
|
|
1076
|
+
if isinstance(payload, dict):
|
|
1077
|
+
tag = str(payload.get("template_tag") or "")
|
|
1078
|
+
if tag:
|
|
1079
|
+
tag_candidates.append(tag)
|
|
1080
|
+
papers = payload.get("papers")
|
|
1081
|
+
if isinstance(papers, list):
|
|
1082
|
+
merged.extend(papers)
|
|
1083
|
+
else:
|
|
1084
|
+
raise click.ClickException("Input JSON must be a list or {template_tag, papers}")
|
|
1085
|
+
elif isinstance(payload, list):
|
|
1086
|
+
merged.extend(payload)
|
|
1087
|
+
else:
|
|
1088
|
+
raise click.ClickException("Input JSON must be a list or {template_tag, papers}")
|
|
1089
|
+
if not template_tag:
|
|
1090
|
+
inferred = ""
|
|
1091
|
+
for paper in merged:
|
|
1092
|
+
if not isinstance(paper, dict):
|
|
1093
|
+
continue
|
|
1094
|
+
inferred = str(paper.get("prompt_template") or paper.get("template_tag") or "")
|
|
1095
|
+
if inferred:
|
|
1096
|
+
break
|
|
1097
|
+
if inferred:
|
|
1098
|
+
template_tag = inferred
|
|
1099
|
+
if tag_candidates and not template_tag:
|
|
1100
|
+
template_tag = tag_candidates[0]
|
|
1101
|
+
if not template_tag:
|
|
1102
|
+
template_tag = "unknown"
|
|
1103
|
+
if tag_candidates and any(tag != template_tag for tag in tag_candidates):
|
|
1104
|
+
click.echo("Warning: multiple template_tag values detected in inputs; using first")
|
|
1050
1105
|
output = Path(output_path)
|
|
1051
|
-
|
|
1052
|
-
|
|
1106
|
+
bundle = {"template_tag": template_tag, "papers": merged}
|
|
1107
|
+
write_json(output, bundle)
|
|
1108
|
+
_summarize_merge(output, bundle, input_count=len(paths))
|
|
1053
1109
|
|
|
1054
1110
|
@merge_group.command("templates")
|
|
1055
1111
|
@click.option("-i", "--inputs", "input_paths", multiple=True, required=True, help="Input JSON files")
|
|
@@ -1201,6 +1257,62 @@ def register_db_commands(db_group: click.Group) -> None:
|
|
|
1201
1257
|
sample_table.add_row(*row)
|
|
1202
1258
|
Console().print(sample_table)
|
|
1203
1259
|
|
|
1260
|
+
@merge_group.command("bibtex")
|
|
1261
|
+
@click.option("-i", "--input", "input_paths", multiple=True, required=True, help="Input BibTeX file paths")
|
|
1262
|
+
@click.option("-o", "--output", "output_path", required=True, help="Output BibTeX file path")
|
|
1263
|
+
def merge_bibtex(input_paths: Iterable[str], output_path: str) -> None:
|
|
1264
|
+
if not PYBTEX_AVAILABLE:
|
|
1265
|
+
raise click.ClickException("pybtex is required for merge bibtex")
|
|
1266
|
+
|
|
1267
|
+
paths = [Path(path) for path in input_paths]
|
|
1268
|
+
if not paths:
|
|
1269
|
+
raise click.ClickException("No BibTeX inputs provided")
|
|
1270
|
+
|
|
1271
|
+
for path in paths:
|
|
1272
|
+
if not path.is_file():
|
|
1273
|
+
raise click.ClickException(f"BibTeX file not found: {path}")
|
|
1274
|
+
|
|
1275
|
+
merged_entries: dict[str, tuple[Any, int]] = {}
|
|
1276
|
+
duplicate_keys: list[str] = []
|
|
1277
|
+
duplicate_seen: set[str] = set()
|
|
1278
|
+
|
|
1279
|
+
for path in paths:
|
|
1280
|
+
bib_data = parse_file(str(path))
|
|
1281
|
+
for key, entry in bib_data.entries.items():
|
|
1282
|
+
score = _bibtex_entry_score(entry)
|
|
1283
|
+
if key not in merged_entries:
|
|
1284
|
+
merged_entries[key] = (entry, score)
|
|
1285
|
+
continue
|
|
1286
|
+
if key not in duplicate_seen:
|
|
1287
|
+
duplicate_seen.add(key)
|
|
1288
|
+
duplicate_keys.append(key)
|
|
1289
|
+
_, existing_score = merged_entries[key]
|
|
1290
|
+
if score > existing_score:
|
|
1291
|
+
merged_entries[key] = (entry, score)
|
|
1292
|
+
|
|
1293
|
+
output = Path(output_path)
|
|
1294
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
1295
|
+
out_data = BibliographyData()
|
|
1296
|
+
for key, (entry, _) in merged_entries.items():
|
|
1297
|
+
out_data.entries[key] = entry
|
|
1298
|
+
with output.open("w", encoding="utf-8") as handle:
|
|
1299
|
+
Writer().write_stream(out_data, handle)
|
|
1300
|
+
|
|
1301
|
+
_summarize_bibtex_merge(
|
|
1302
|
+
output,
|
|
1303
|
+
input_count=len(paths),
|
|
1304
|
+
entry_count=len(merged_entries),
|
|
1305
|
+
duplicate_count=len(duplicate_keys),
|
|
1306
|
+
)
|
|
1307
|
+
|
|
1308
|
+
if duplicate_keys:
|
|
1309
|
+
preview_limit = 20
|
|
1310
|
+
preview = ", ".join(duplicate_keys[:preview_limit])
|
|
1311
|
+
if len(duplicate_keys) > preview_limit:
|
|
1312
|
+
preview = f"{preview}, ... (+{len(duplicate_keys) - preview_limit} more)"
|
|
1313
|
+
note = "Kept entry with most fields; ties keep first input order."
|
|
1314
|
+
Console().print(Panel(f"{note}\n{preview}", title=f"Duplicate keys ({len(duplicate_keys)})", style="yellow"))
|
|
1315
|
+
|
|
1204
1316
|
@db_group.command("render-md")
|
|
1205
1317
|
@click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
|
|
1206
1318
|
@click.option("-d", "--output-dir", "output_dir", default="rendered_md", help="Output directory")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepresearch-flow
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: Workflow tools for paper extraction, review, and research automation.
|
|
5
5
|
Author-email: DengQi <dengqi935@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -400,6 +400,41 @@ uv run deepresearch-flow paper db merge templates \
|
|
|
400
400
|
|
|
401
401
|
Note: `paper db merge` is now split into `merge library` and `merge templates`.
|
|
402
402
|
|
|
403
|
+
### Merge multiple databases (PDF + Markdown + BibTeX)
|
|
404
|
+
|
|
405
|
+
```bash
|
|
406
|
+
# 1) Copy PDFs into a single folder
|
|
407
|
+
rsync -av ./pdfs_a/ ./pdfs_merged/
|
|
408
|
+
rsync -av ./pdfs_b/ ./pdfs_merged/
|
|
409
|
+
|
|
410
|
+
# 2) Copy Markdown folders into a single folder
|
|
411
|
+
rsync -av ./md_a/ ./md_merged/
|
|
412
|
+
rsync -av ./md_b/ ./md_merged/
|
|
413
|
+
|
|
414
|
+
# 3) Merge JSON libraries
|
|
415
|
+
uv run deepresearch-flow paper db merge library \
|
|
416
|
+
--inputs ./paper_infos_a.json \
|
|
417
|
+
--inputs ./paper_infos_b.json \
|
|
418
|
+
--output ./paper_infos_merged.json
|
|
419
|
+
|
|
420
|
+
# 4) Merge BibTeX files
|
|
421
|
+
uv run deepresearch-flow paper db merge bibtex \
|
|
422
|
+
-i ./library_a.bib \
|
|
423
|
+
-i ./library_b.bib \
|
|
424
|
+
-o ./library_merged.bib
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
### Merge BibTeX files
|
|
428
|
+
|
|
429
|
+
```bash
|
|
430
|
+
uv run deepresearch-flow paper db merge bibtex \
|
|
431
|
+
-i ./library_a.bib \
|
|
432
|
+
-i ./library_b.bib \
|
|
433
|
+
-o ./library_merged.bib
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
Duplicate keys keep the entry with the most fields; ties keep the first input order.
|
|
437
|
+
|
|
403
438
|
### Recommended: Merge templates then filter by BibTeX
|
|
404
439
|
|
|
405
440
|
```bash
|
|
@@ -4,7 +4,7 @@ deepresearch_flow/cli.py,sha256=t4oowCNWldL0DrVJ4d0UlRkuGU2qHej_G0mAc_quteQ,455
|
|
|
4
4
|
deepresearch_flow/paper/__init__.py,sha256=sunaOkcgAJBrfmcaJTumcWbPGVUSGWvOv2a2Yidzy0A,43
|
|
5
5
|
deepresearch_flow/paper/cli.py,sha256=68d-yccScU0yL6d7eqZVdudPO6i_in8F4v-hKDWILMo,13647
|
|
6
6
|
deepresearch_flow/paper/config.py,sha256=V7z4ApPXCV1acSl2FU3nZGq6nt8uisMhm0GtOq5zzmg,12021
|
|
7
|
-
deepresearch_flow/paper/db.py,sha256=
|
|
7
|
+
deepresearch_flow/paper/db.py,sha256=Bxhrd8NCaPZ9Ijtp1uiOplwh2Uy0n2Dyn1bO0d4A9bE,91780
|
|
8
8
|
deepresearch_flow/paper/db_ops.py,sha256=cb64jn2ax39i3tCS-0DYmlsJdGX3uBS2u5ncUIbUBic,73980
|
|
9
9
|
deepresearch_flow/paper/extract.py,sha256=78ASAyNLfCl1AsAk2o_v1vskZCNZuayaCHgr0S4V2Vs,87632
|
|
10
10
|
deepresearch_flow/paper/llm.py,sha256=mHfs5IkT3Q6BOh46MDlfUmgVTX24WRf0IKKoOnN8nV8,4007
|
|
@@ -463,9 +463,9 @@ deepresearch_flow/translator/placeholder.py,sha256=mEgqA-dPdOsIhno0h_hzfpXpY2asb
|
|
|
463
463
|
deepresearch_flow/translator/prompts.py,sha256=EvfBvBIpQXARDj4m87GAyFXJGL8EJeahj_rOmp9mv68,5556
|
|
464
464
|
deepresearch_flow/translator/protector.py,sha256=yUMuS2FgVofK_MRXrcauLRiwNvdCCjNAnh6CcNd686o,11777
|
|
465
465
|
deepresearch_flow/translator/segment.py,sha256=rBFMCLTrvm2GrPc_hNFymi-8Ih2DAtUQlZHCRE9nLaM,5146
|
|
466
|
-
deepresearch_flow-0.6.
|
|
467
|
-
deepresearch_flow-0.6.
|
|
468
|
-
deepresearch_flow-0.6.
|
|
469
|
-
deepresearch_flow-0.6.
|
|
470
|
-
deepresearch_flow-0.6.
|
|
471
|
-
deepresearch_flow-0.6.
|
|
466
|
+
deepresearch_flow-0.6.1.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
|
|
467
|
+
deepresearch_flow-0.6.1.dist-info/METADATA,sha256=y_CHy1YJ-3P31W43Q_fd8dEkznj7LKLRrCF6F-sGHaQ,26696
|
|
468
|
+
deepresearch_flow-0.6.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
469
|
+
deepresearch_flow-0.6.1.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
|
|
470
|
+
deepresearch_flow-0.6.1.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
|
|
471
|
+
deepresearch_flow-0.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|