moducomp 0.7.18__py3-none-any.whl → 0.7.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
moducomp/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  moducomp: metabolic module completeness and complementarity for microbiomes.
3
3
  """
4
4
 
5
- __version__ = "0.7.18"
5
+ __version__ = "0.7.23"
6
6
  __author__ = "Juan C. Villada"
7
7
  __email__ = "jvillada@lbl.gov"
8
8
  __title__ = "moducomp"
moducomp/moducomp.py CHANGED
@@ -418,18 +418,208 @@ def _set_configured_eggnog_dir(path: Path, logger: Optional[logging.Logger] = No
418
418
  _save_config(data, logger)
419
419
 
420
420
 
421
- def _find_eggnog_downloader() -> Optional[str]:
422
- for name in ("download_eggnog_data.py", "download_eggnog_data"):
423
- path = shutil.which(name)
421
+ def _get_eggnog_db_version(logger: Optional[logging.Logger] = None) -> str:
422
+ try:
423
+ from eggnogmapper.version import __DB_VERSION__
424
+ except Exception as exc:
425
+ if logger:
426
+ logger.error("eggNOG-mapper is required for setup: %s", exc)
427
+ raise typer.Exit(1)
428
+ if not __DB_VERSION__:
429
+ if logger:
430
+ logger.error("eggNOG-mapper did not report a database version.")
431
+ raise typer.Exit(1)
432
+ return __DB_VERSION__
433
+
434
+
435
+ def _select_download_tool() -> Tuple[str, str]:
436
+ for tool in ("aria2c", "wget", "curl"):
437
+ path = shutil.which(tool)
424
438
  if path:
425
- try:
426
- wrapper_text = Path(path).read_text(encoding="utf-8", errors="ignore")
427
- except OSError:
428
- wrapper_text = ""
429
- if "moducomp.moducomp" in wrapper_text:
439
+ return tool, path
440
+ raise FileNotFoundError("No download tool found (aria2c, wget, or curl).")
441
+
442
+
443
+ def _run_download_cmd(
444
+ cmd: List[str],
445
+ logger: Optional[logging.Logger] = None,
446
+ verbose: bool = True,
447
+ description: str = "Command",
448
+ ) -> None:
449
+ if logger:
450
+ logger.info("%s: %s", description, " ".join(shlex.quote(arg) for arg in cmd))
451
+ try:
452
+ if verbose:
453
+ subprocess.run(cmd, check=True)
454
+ else:
455
+ subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
456
+ except subprocess.CalledProcessError as exc:
457
+ if logger:
458
+ logger.error("Download command failed with exit code %s.", exc.returncode)
459
+ raise typer.Exit(exc.returncode)
460
+
461
+
462
+ def _download_file(
463
+ url: str,
464
+ dest: Path,
465
+ logger: Optional[logging.Logger] = None,
466
+ verbose: bool = True,
467
+ ) -> None:
468
+ try:
469
+ tool, tool_path = _select_download_tool()
470
+ except FileNotFoundError as exc:
471
+ if logger:
472
+ logger.error("%s", exc)
473
+ raise typer.Exit(1)
474
+ dest.parent.mkdir(parents=True, exist_ok=True)
475
+
476
+ if tool == "aria2c":
477
+ cmd = [
478
+ tool_path,
479
+ "--console-log-level=warn",
480
+ "--dir",
481
+ str(dest.parent),
482
+ "--out",
483
+ dest.name,
484
+ "--allow-overwrite=true",
485
+ "--auto-file-renaming=false",
486
+ "--continue=true",
487
+ "--file-allocation=none",
488
+ "--max-tries=0",
489
+ "--retry-wait=30",
490
+ "--timeout=60",
491
+ "--summary-interval=60",
492
+ "-x",
493
+ "8",
494
+ "-s",
495
+ "8",
496
+ "-k",
497
+ "1M",
498
+ url,
499
+ ]
500
+ if not verbose:
501
+ cmd.insert(1, "--quiet")
502
+ elif tool == "wget":
503
+ cmd = [
504
+ tool_path,
505
+ "--continue",
506
+ "--tries=0",
507
+ "--retry-connrefused",
508
+ "--waitretry=30",
509
+ "--read-timeout=30",
510
+ "--timeout=30",
511
+ "--progress=dot:giga",
512
+ "-O",
513
+ str(dest),
514
+ url,
515
+ ]
516
+ if not verbose:
517
+ cmd.insert(1, "--quiet")
518
+ else:
519
+ cmd = [
520
+ tool_path,
521
+ "--location",
522
+ "--continue-at",
523
+ "-",
524
+ "--retry",
525
+ "9999",
526
+ "--retry-delay",
527
+ "30",
528
+ "--retry-all-errors",
529
+ "--connect-timeout",
530
+ "30",
531
+ "--max-time",
532
+ "0",
533
+ "-o",
534
+ str(dest),
535
+ url,
536
+ ]
537
+ if not verbose:
538
+ cmd.insert(1, "--silent")
539
+ cmd.insert(2, "--show-error")
540
+
541
+ _run_download_cmd(cmd, logger=logger, verbose=verbose, description="Download command")
542
+
543
+
544
+ def _extract_archive(
545
+ archive_path: Path,
546
+ target_dir: Path,
547
+ mode: str,
548
+ logger: Optional[logging.Logger] = None,
549
+ verbose: bool = True,
550
+ ) -> None:
551
+ if mode == "gunzip":
552
+ cmd = ["gunzip", "-f", str(archive_path)]
553
+ _run_download_cmd(cmd, logger=logger, verbose=verbose, description="Decompression command")
554
+ return
555
+
556
+ if mode == "tar":
557
+ cmd = ["tar", "-zxf", str(archive_path), "-C", str(target_dir)]
558
+ _run_download_cmd(cmd, logger=logger, verbose=verbose, description="Extraction command")
559
+ try:
560
+ archive_path.unlink()
561
+ except OSError:
562
+ if logger:
563
+ logger.warning("Could not remove archive %s after extraction.", archive_path)
564
+ return
565
+
566
+ if logger:
567
+ logger.error("Unknown archive extraction mode: %s", mode)
568
+ raise typer.Exit(1)
569
+
570
+
571
+ def _download_eggnog_core_data(
572
+ target_dir: Path,
573
+ force: bool,
574
+ yes: bool,
575
+ logger: Optional[logging.Logger] = None,
576
+ verbose: bool = True,
577
+ ) -> None:
578
+ db_version = _get_eggnog_db_version(logger)
579
+ base_url = f"http://eggnog5.embl.de/download/emapperdb-{db_version}"
580
+ downloads = [
581
+ ("eggnog.db.gz", "eggnog.db", "gunzip"),
582
+ ("eggnog.taxa.tar.gz", "eggnog.taxa.db", "tar"),
583
+ ("eggnog_proteins.dmnd.gz", "eggnog_proteins.dmnd", "gunzip"),
584
+ ]
585
+
586
+ for archive_name, output_name, mode in downloads:
587
+ archive_path = target_dir / archive_name
588
+ output_path = target_dir / output_name
589
+
590
+ if force:
591
+ for path in (archive_path, output_path):
592
+ if path.exists():
593
+ try:
594
+ path.unlink()
595
+ except OSError:
596
+ if logger:
597
+ logger.warning("Failed to remove %s before re-download.", path)
598
+
599
+ if output_path.exists():
600
+ if logger:
601
+ logger.info("eggNOG data file already present: %s", output_path)
602
+ continue
603
+
604
+ if not yes:
605
+ prompt = f"Download {archive_name} to {target_dir}?"
606
+ if not typer.confirm(prompt):
607
+ if logger:
608
+ logger.warning("Skipped download of %s by user choice.", archive_name)
430
609
  continue
431
- return path
432
- return None
610
+
611
+ url = f"{base_url}/{archive_name}"
612
+ if not archive_path.exists():
613
+ if logger:
614
+ logger.info("Downloading %s from %s", archive_name, url)
615
+ _download_file(url, archive_path, logger=logger, verbose=verbose)
616
+
617
+ _extract_archive(archive_path, target_dir, mode, logger=logger, verbose=verbose)
618
+
619
+ if not output_path.exists():
620
+ if logger:
621
+ logger.error("Expected eggNOG file missing after extraction: %s", output_path)
622
+ raise typer.Exit(1)
433
623
 
434
624
  def run_subprocess_with_logging(
435
625
  cmd: List[str],
@@ -3847,7 +4037,7 @@ def setup(
3847
4037
  help="Stream downloader output to the console.",
3848
4038
  ),
3849
4039
  ) -> None:
3850
- """Download eggNOG-mapper data (via download_eggnog_data.py) and persist the location for future runs."""
4040
+ """Download eggNOG-mapper core data and persist the location for future runs."""
3851
4041
  target_dir = Path(eggnog_data_dir).expanduser().resolve() if eggnog_data_dir else default_eggnog_data_dir()
3852
4042
  target_dir = target_dir.expanduser().resolve()
3853
4043
 
@@ -3863,29 +4053,8 @@ def setup(
3863
4053
  if _has_eggnog_core_files(target_dir) and not force:
3864
4054
  logger.info("eggNOG data already present; skipping download.")
3865
4055
  else:
3866
- downloader = _find_eggnog_downloader()
3867
- if downloader is None:
3868
- message = (
3869
- "download_eggnog_data.py not found in PATH. "
3870
- "Ensure eggnog-mapper is installed."
3871
- )
3872
- emit_error(message, logger)
3873
- raise typer.Exit(1)
3874
-
3875
- cmd = [downloader, "--data_dir", str(target_dir)]
3876
- if yes:
3877
- cmd.append("-y")
3878
- if force:
3879
- cmd.append("-f")
3880
- if not verbose:
3881
- cmd.append("-q")
3882
-
3883
- logger.info("Running eggNOG-mapper downloader: %s", " ".join(shlex.quote(arg) for arg in cmd))
3884
- try:
3885
- subprocess.run(cmd, check=True)
3886
- except subprocess.CalledProcessError as exc:
3887
- emit_error(f"eggNOG-mapper downloader failed with exit code {exc.returncode}", logger)
3888
- raise typer.Exit(exc.returncode)
4056
+ logger.info("Downloading eggNOG-mapper core databases.")
4057
+ _download_eggnog_core_data(target_dir, force, yes, logger=logger, verbose=verbose)
3889
4058
 
3890
4059
  if not _has_eggnog_core_files(target_dir):
3891
4060
  emit_error(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moducomp
3
- Version: 0.7.18
3
+ Version: 0.7.23
4
4
  Summary: moducomp: metabolic module completeness and complementarity for microbiomes.
5
5
  Keywords: bioinformatics,microbiome,metabolic,kegg,genomics
6
6
  Author-email: "Juan C. Villada" <jvillada@lbl.gov>
@@ -60,7 +60,7 @@ pixi global install \
60
60
 
61
61
  ## Setup data (required)
62
62
 
63
- `moducomp` needs the eggNOG-mapper database to run. Use `moducomp setup` to download the data with eggNOG-mapper's official downloader and record the location for future runs. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
63
+ `moducomp` needs the eggNOG-mapper database to run. Use `moducomp setup` to download the data using ModuComp's built-in downloader (with resume/retry) and record the location for future runs. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
64
64
 
65
65
  ```bash
66
66
  moducomp setup
@@ -386,3 +386,9 @@ Genome identifiers are stored as `taxon_oid`. In pipeline mode, ModuComp expects
386
386
  ## Citation
387
387
  Villada, JC. & Schulz, F. (2025). Assessment of metabolic module completeness of genomes and metabolic complementarity in microbiomes with `moducomp` . `moducomp` (v0.5.1) Zenodo. https://doi.org/10.5281/zenodo.16116092
388
388
 
389
+ ## Tool Citations
390
+ If you use ModuComp in a study, please also cite the wrapped tools and data sources:
391
+
392
+ - eggNOG-mapper: Cantalapiedra CP et al. (2021). “eggNOG-mapper v2: Functional Annotation, Orthology Assignments, and Domain Prediction at the Metagenomic Scale.” Molecular Biology and Evolution. DOI: 10.1093/molbev/msab293. GitHub: https://github.com/eggnogdb/eggnog-mapper
393
+ - KEGG Pathways Completeness Tool (KPCT): GitHub: https://github.com/EBI-Metagenomics/kegg-pathways-completeness-tool. KEGG source: Kanehisa M, Goto S. (2000). “KEGG: Kyoto Encyclopedia of Genes and Genomes.” Nucleic Acids Research. DOI: 10.1093/nar/28.1.27
394
+
@@ -1,11 +1,11 @@
1
- moducomp/__init__.py,sha256=QckhvOCk6fBvyH6EpjdAN3ZM6ruukir0bhosib1U8Xc,659
1
+ moducomp/__init__.py,sha256=nJN89FGBpZiExcHKGNdOz3gtzXtd8VK5u6Nkbrfd1bQ,659
2
2
  moducomp/__main__.py,sha256=1O2pv6IGjUgqnbqsiMLtVqjxWQpRtZUjp8LDljZ1bsI,185
3
- moducomp/moducomp.py,sha256=hzvbqpBfk0AXYz6EKpwhAd_rNXkhTEgdpPK8nn4qxhM,179059
3
+ moducomp/moducomp.py,sha256=dL3BurI7dahfuoJVPrIbbu-pYz0lpsR_PTmQ1lcUnXM,184074
4
4
  moducomp/data/test_genomes/IMG2562617132.faa,sha256=gZPh-08pMRdAWJRr3__TbnU1F68CdkDb3gxtpaCLTTc,356863
5
5
  moducomp/data/test_genomes/IMG2568526683.faa,sha256=PxFJwe-68UGw7il1hGlNhZt4-2WzzxXxGE1GTskDnow,343109
6
6
  moducomp/data/test_genomes/IMG2740892217.faa,sha256=WsId4sIPxENbqF6tYFouAgDCy6T0SXNY6TywxBNe-3E,548954
7
- moducomp-0.7.18.dist-info/entry_points.txt,sha256=RSz22pcRnRKHRpABJKRjClInK25iiHNpbTB1Kj5D3m4,50
8
- moducomp-0.7.18.dist-info/licenses/LICENSE.txt,sha256=pt0cfIq9Wop21KDZYyQgP0M1YWYvKG0PomA5cUDC4TI,1536
9
- moducomp-0.7.18.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
10
- moducomp-0.7.18.dist-info/METADATA,sha256=6YLcTn2BiP6VNBKpk5vo1iigK4O9pW-KVhlprjWNHlY,21642
11
- moducomp-0.7.18.dist-info/RECORD,,
7
+ moducomp-0.7.23.dist-info/entry_points.txt,sha256=RSz22pcRnRKHRpABJKRjClInK25iiHNpbTB1Kj5D3m4,50
8
+ moducomp-0.7.23.dist-info/licenses/LICENSE.txt,sha256=pt0cfIq9Wop21KDZYyQgP0M1YWYvKG0PomA5cUDC4TI,1536
9
+ moducomp-0.7.23.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
10
+ moducomp-0.7.23.dist-info/METADATA,sha256=YiH9_ZC3ZRLg7xGD2upFh1svy4gSAEqb1VDR5fmdeDc,22304
11
+ moducomp-0.7.23.dist-info/RECORD,,