deepresearch-flow 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,7 +30,12 @@ except ImportError:
30
30
 
31
31
 
32
32
  def load_json(path: Path) -> list[dict[str, Any]]:
33
- return json.loads(path.read_text(encoding="utf-8"))
33
+ data = json.loads(path.read_text(encoding="utf-8"))
34
+ if isinstance(data, dict) and isinstance(data.get("papers"), list):
35
+ return data["papers"]
36
+ if isinstance(data, list):
37
+ return data
38
+ raise click.ClickException("Input JSON must be a list or {template_tag, papers}")
34
39
 
35
40
 
36
41
  def write_json(path: Path, data: Any) -> None:
@@ -378,9 +383,25 @@ def register_db_commands(db_group: click.Group) -> None:
378
383
  month_counts: dict[str, int] = {}
379
384
  author_counts: dict[str, int] = {}
380
385
  tag_counts: dict[str, int] = {}
386
+ keyword_counts: dict[str, int] = {}
381
387
  journal_counts: dict[str, int] = {}
382
388
  conference_counts: dict[str, int] = {}
383
389
  other_venue_counts: dict[str, int] = {}
390
+ def normalize_keywords(value: Any) -> list[str]:
391
+ if value is None:
392
+ return []
393
+ if isinstance(value, list):
394
+ items = value
395
+ elif isinstance(value, str):
396
+ items = re.split(r"[;,]", value)
397
+ else:
398
+ items = [value]
399
+ normalized: list[str] = []
400
+ for item in items:
401
+ token = str(item).strip().lower()
402
+ if token:
403
+ normalized.append(token)
404
+ return normalized
384
405
  for paper in papers:
385
406
  bibtex_fields = {}
386
407
  bibtex_type = None
@@ -406,6 +427,8 @@ def register_db_commands(db_group: click.Group) -> None:
406
427
  author_counts[author] = author_counts.get(author, 0) + 1
407
428
  for tag in paper.get("ai_generated_tags") or []:
408
429
  tag_counts[tag] = tag_counts.get(tag, 0) + 1
430
+ for keyword in normalize_keywords(paper.get("keywords")):
431
+ keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
409
432
 
410
433
  venue = None
411
434
  if bibtex_type in {"article"}:
@@ -541,6 +564,16 @@ def register_db_commands(db_group: click.Group) -> None:
541
564
  tag_table.add_row(tag, str(count), f"{percentage:.1f}%")
542
565
  console.print(tag_table)
543
566
 
567
+ if keyword_counts:
568
+ keyword_table = Table(title=f"Top {top_n} Keywords")
569
+ keyword_table.add_column("Keyword", style="cyan")
570
+ keyword_table.add_column("Count", style="green", justify="right")
571
+ keyword_table.add_column("Percentage", style="yellow", justify="right")
572
+ for keyword, count in sorted(keyword_counts.items(), key=lambda item: item[1], reverse=True)[:top_n]:
573
+ percentage = (count / total * 100) if total else 0
574
+ keyword_table.add_row(keyword, str(count), f"{percentage:.1f}%")
575
+ console.print(keyword_table)
576
+
544
577
  @db_group.command("serve")
545
578
  @click.option("-i", "--input", "input_paths", multiple=True, required=True, help="Input JSON file path")
546
579
  @click.option("-b", "--bibtex", "bibtex_path", default=None, help="Optional BibTeX file path")
@@ -285,9 +285,14 @@ def load_existing(path: Path) -> list[dict[str, Any]]:
285
285
  if not path.exists():
286
286
  return []
287
287
  try:
288
- return json.loads(path.read_text(encoding="utf-8"))
288
+ data = json.loads(path.read_text(encoding="utf-8"))
289
289
  except json.JSONDecodeError:
290
290
  return []
291
+ if isinstance(data, dict) and isinstance(data.get("papers"), list):
292
+ return data["papers"]
293
+ if isinstance(data, list):
294
+ return data
295
+ return []
291
296
 
292
297
 
293
298
  def load_errors(path: Path) -> list[dict[str, Any]]:
@@ -462,6 +467,7 @@ async def extract_documents(
462
467
  ) -> None:
463
468
  start_time = time.monotonic()
464
469
  markdown_files = discover_markdown(inputs, glob_pattern, recursive=True)
470
+ template_tag = prompt_template if not custom_prompt else "custom"
465
471
 
466
472
  if retry_failed:
467
473
  error_entries = load_errors(errors_path)
@@ -798,7 +804,8 @@ async def extract_documents(
798
804
  if path not in seen:
799
805
  final_results.append(entry)
800
806
 
801
- write_json(output_path, final_results)
807
+ output_payload = {"template_tag": template_tag, "papers": final_results}
808
+ write_json(output_path, output_payload)
802
809
 
803
810
  error_payload = [
804
811
  {
@@ -823,7 +830,7 @@ async def extract_documents(
823
830
  continue
824
831
  base_name = split_output_name(Path(source_path))
825
832
  file_name = unique_split_name(base_name, used_names, source_path)
826
- write_json(target_dir / f"{file_name}.json", entry)
833
+ write_json(target_dir / f"{file_name}.json", {"template_tag": template_tag, "papers": [entry]})
827
834
 
828
835
  if render_md:
829
836
  try: