moducomp 0.7.18__tar.gz → 0.7.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ output*
2
2
  test_parallel*
3
3
  test_ko_matrix.csv
4
4
  __pycache__
5
+ logs/
5
6
 
6
7
 
7
8
  # Byte-compiled / optimized / DLL files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moducomp
3
- Version: 0.7.18
3
+ Version: 0.7.23
4
4
  Summary: moducomp: metabolic module completeness and complementarity for microbiomes.
5
5
  Keywords: bioinformatics,microbiome,metabolic,kegg,genomics
6
6
  Author-email: "Juan C. Villada" <jvillada@lbl.gov>
@@ -60,7 +60,7 @@ pixi global install \
60
60
 
61
61
  ## Setup data (required)
62
62
 
63
- `moducomp` needs the eggNOG-mapper database to run. Use `moducomp setup` to download the data with eggNOG-mapper's official downloader and record the location for future runs. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
63
+ `moducomp` needs the eggNOG-mapper database to run. Use `moducomp setup` to download the data using ModuComp's built-in downloader (with resume/retry) and record the location for future runs. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
64
64
 
65
65
  ```bash
66
66
  moducomp setup
@@ -386,3 +386,9 @@ Genome identifiers are stored as `taxon_oid`. In pipeline mode, ModuComp expects
386
386
  ## Citation
387
387
  Villada, JC. & Schulz, F. (2025). Assessment of metabolic module completeness of genomes and metabolic complementarity in microbiomes with `moducomp` . `moducomp` (v0.5.1) Zenodo. https://doi.org/10.5281/zenodo.16116092
388
388
 
389
+ ## Tool Citations
390
+ If you use ModuComp in a study, please also cite the wrapped tools and data sources:
391
+
392
+ - eggNOG-mapper: Cantalapiedra CP et al. (2021). “eggNOG-mapper v2: Functional Annotation, Orthology Assignments, and Domain Prediction at the Metagenomic Scale.” Molecular Biology and Evolution. DOI: 10.1093/molbev/msab293. GitHub: https://github.com/eggnogdb/eggnog-mapper
393
+ - KEGG Pathways Completeness Tool (KPCT): GitHub: https://github.com/EBI-Metagenomics/kegg-pathways-completeness-tool. KEGG source: Kanehisa M, Goto S. (2000). “KEGG: Kyoto Encyclopedia of Genes and Genomes.” Nucleic Acids Research. DOI: 10.1093/nar/28.1.27
394
+
@@ -35,7 +35,7 @@ pixi global install \
35
35
 
36
36
  ## Setup data (required)
37
37
 
38
- `moducomp` needs the eggNOG-mapper database to run. Use `moducomp setup` to download the data with eggNOG-mapper's official downloader and record the location for future runs. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
38
+ `moducomp` needs the eggNOG-mapper database to run. Use `moducomp setup` to download the data using ModuComp's built-in downloader (with resume/retry) and record the location for future runs. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
39
39
 
40
40
  ```bash
41
41
  moducomp setup
@@ -360,3 +360,9 @@ Genome identifiers are stored as `taxon_oid`. In pipeline mode, ModuComp expects
360
360
 
361
361
  ## Citation
362
362
  Villada, JC. & Schulz, F. (2025). Assessment of metabolic module completeness of genomes and metabolic complementarity in microbiomes with `moducomp` . `moducomp` (v0.5.1) Zenodo. https://doi.org/10.5281/zenodo.16116092
363
+
364
+ ## Tool Citations
365
+ If you use ModuComp in a study, please also cite the wrapped tools and data sources:
366
+
367
+ - eggNOG-mapper: Cantalapiedra CP et al. (2021). “eggNOG-mapper v2: Functional Annotation, Orthology Assignments, and Domain Prediction at the Metagenomic Scale.” Molecular Biology and Evolution. DOI: 10.1093/molbev/msab293. GitHub: https://github.com/eggnogdb/eggnog-mapper
368
+ - KEGG Pathways Completeness Tool (KPCT): GitHub: https://github.com/EBI-Metagenomics/kegg-pathways-completeness-tool. KEGG source: Kanehisa M, Goto S. (2000). “KEGG: Kyoto Encyclopedia of Genes and Genomes.” Nucleic Acids Research. DOI: 10.1093/nar/28.1.27
@@ -2,7 +2,7 @@
2
2
  moducomp: metabolic module completeness and complementarity for microbiomes.
3
3
  """
4
4
 
5
- __version__ = "0.7.18"
5
+ __version__ = "0.7.23"
6
6
  __author__ = "Juan C. Villada"
7
7
  __email__ = "jvillada@lbl.gov"
8
8
  __title__ = "moducomp"
@@ -418,18 +418,208 @@ def _set_configured_eggnog_dir(path: Path, logger: Optional[logging.Logger] = No
418
418
  _save_config(data, logger)
419
419
 
420
420
 
421
- def _find_eggnog_downloader() -> Optional[str]:
422
- for name in ("download_eggnog_data.py", "download_eggnog_data"):
423
- path = shutil.which(name)
421
+ def _get_eggnog_db_version(logger: Optional[logging.Logger] = None) -> str:
422
+ try:
423
+ from eggnogmapper.version import __DB_VERSION__
424
+ except Exception as exc:
425
+ if logger:
426
+ logger.error("eggNOG-mapper is required for setup: %s", exc)
427
+ raise typer.Exit(1)
428
+ if not __DB_VERSION__:
429
+ if logger:
430
+ logger.error("eggNOG-mapper did not report a database version.")
431
+ raise typer.Exit(1)
432
+ return __DB_VERSION__
433
+
434
+
435
+ def _select_download_tool() -> Tuple[str, str]:
436
+ for tool in ("aria2c", "wget", "curl"):
437
+ path = shutil.which(tool)
424
438
  if path:
425
- try:
426
- wrapper_text = Path(path).read_text(encoding="utf-8", errors="ignore")
427
- except OSError:
428
- wrapper_text = ""
429
- if "moducomp.moducomp" in wrapper_text:
439
+ return tool, path
440
+ raise FileNotFoundError("No download tool found (aria2c, wget, or curl).")
441
+
442
+
443
+ def _run_download_cmd(
444
+ cmd: List[str],
445
+ logger: Optional[logging.Logger] = None,
446
+ verbose: bool = True,
447
+ description: str = "Command",
448
+ ) -> None:
449
+ if logger:
450
+ logger.info("%s: %s", description, " ".join(shlex.quote(arg) for arg in cmd))
451
+ try:
452
+ if verbose:
453
+ subprocess.run(cmd, check=True)
454
+ else:
455
+ subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
456
+ except subprocess.CalledProcessError as exc:
457
+ if logger:
458
+ logger.error("Download command failed with exit code %s.", exc.returncode)
459
+ raise typer.Exit(exc.returncode)
460
+
461
+
462
+ def _download_file(
463
+ url: str,
464
+ dest: Path,
465
+ logger: Optional[logging.Logger] = None,
466
+ verbose: bool = True,
467
+ ) -> None:
468
+ try:
469
+ tool, tool_path = _select_download_tool()
470
+ except FileNotFoundError as exc:
471
+ if logger:
472
+ logger.error("%s", exc)
473
+ raise typer.Exit(1)
474
+ dest.parent.mkdir(parents=True, exist_ok=True)
475
+
476
+ if tool == "aria2c":
477
+ cmd = [
478
+ tool_path,
479
+ "--console-log-level=warn",
480
+ "--dir",
481
+ str(dest.parent),
482
+ "--out",
483
+ dest.name,
484
+ "--allow-overwrite=true",
485
+ "--auto-file-renaming=false",
486
+ "--continue=true",
487
+ "--file-allocation=none",
488
+ "--max-tries=0",
489
+ "--retry-wait=30",
490
+ "--timeout=60",
491
+ "--summary-interval=60",
492
+ "-x",
493
+ "8",
494
+ "-s",
495
+ "8",
496
+ "-k",
497
+ "1M",
498
+ url,
499
+ ]
500
+ if not verbose:
501
+ cmd.insert(1, "--quiet")
502
+ elif tool == "wget":
503
+ cmd = [
504
+ tool_path,
505
+ "--continue",
506
+ "--tries=0",
507
+ "--retry-connrefused",
508
+ "--waitretry=30",
509
+ "--read-timeout=30",
510
+ "--timeout=30",
511
+ "--progress=dot:giga",
512
+ "-O",
513
+ str(dest),
514
+ url,
515
+ ]
516
+ if not verbose:
517
+ cmd.insert(1, "--quiet")
518
+ else:
519
+ cmd = [
520
+ tool_path,
521
+ "--location",
522
+ "--continue-at",
523
+ "-",
524
+ "--retry",
525
+ "9999",
526
+ "--retry-delay",
527
+ "30",
528
+ "--retry-all-errors",
529
+ "--connect-timeout",
530
+ "30",
531
+ "--max-time",
532
+ "0",
533
+ "-o",
534
+ str(dest),
535
+ url,
536
+ ]
537
+ if not verbose:
538
+ cmd.insert(1, "--silent")
539
+ cmd.insert(2, "--show-error")
540
+
541
+ _run_download_cmd(cmd, logger=logger, verbose=verbose, description="Download command")
542
+
543
+
544
+ def _extract_archive(
545
+ archive_path: Path,
546
+ target_dir: Path,
547
+ mode: str,
548
+ logger: Optional[logging.Logger] = None,
549
+ verbose: bool = True,
550
+ ) -> None:
551
+ if mode == "gunzip":
552
+ cmd = ["gunzip", "-f", str(archive_path)]
553
+ _run_download_cmd(cmd, logger=logger, verbose=verbose, description="Decompression command")
554
+ return
555
+
556
+ if mode == "tar":
557
+ cmd = ["tar", "-zxf", str(archive_path), "-C", str(target_dir)]
558
+ _run_download_cmd(cmd, logger=logger, verbose=verbose, description="Extraction command")
559
+ try:
560
+ archive_path.unlink()
561
+ except OSError:
562
+ if logger:
563
+ logger.warning("Could not remove archive %s after extraction.", archive_path)
564
+ return
565
+
566
+ if logger:
567
+ logger.error("Unknown archive extraction mode: %s", mode)
568
+ raise typer.Exit(1)
569
+
570
+
571
+ def _download_eggnog_core_data(
572
+ target_dir: Path,
573
+ force: bool,
574
+ yes: bool,
575
+ logger: Optional[logging.Logger] = None,
576
+ verbose: bool = True,
577
+ ) -> None:
578
+ db_version = _get_eggnog_db_version(logger)
579
+ base_url = f"http://eggnog5.embl.de/download/emapperdb-{db_version}"
580
+ downloads = [
581
+ ("eggnog.db.gz", "eggnog.db", "gunzip"),
582
+ ("eggnog.taxa.tar.gz", "eggnog.taxa.db", "tar"),
583
+ ("eggnog_proteins.dmnd.gz", "eggnog_proteins.dmnd", "gunzip"),
584
+ ]
585
+
586
+ for archive_name, output_name, mode in downloads:
587
+ archive_path = target_dir / archive_name
588
+ output_path = target_dir / output_name
589
+
590
+ if force:
591
+ for path in (archive_path, output_path):
592
+ if path.exists():
593
+ try:
594
+ path.unlink()
595
+ except OSError:
596
+ if logger:
597
+ logger.warning("Failed to remove %s before re-download.", path)
598
+
599
+ if output_path.exists():
600
+ if logger:
601
+ logger.info("eggNOG data file already present: %s", output_path)
602
+ continue
603
+
604
+ if not yes:
605
+ prompt = f"Download {archive_name} to {target_dir}?"
606
+ if not typer.confirm(prompt):
607
+ if logger:
608
+ logger.warning("Skipped download of %s by user choice.", archive_name)
430
609
  continue
431
- return path
432
- return None
610
+
611
+ url = f"{base_url}/{archive_name}"
612
+ if not archive_path.exists():
613
+ if logger:
614
+ logger.info("Downloading %s from %s", archive_name, url)
615
+ _download_file(url, archive_path, logger=logger, verbose=verbose)
616
+
617
+ _extract_archive(archive_path, target_dir, mode, logger=logger, verbose=verbose)
618
+
619
+ if not output_path.exists():
620
+ if logger:
621
+ logger.error("Expected eggNOG file missing after extraction: %s", output_path)
622
+ raise typer.Exit(1)
433
623
 
434
624
  def run_subprocess_with_logging(
435
625
  cmd: List[str],
@@ -3847,7 +4037,7 @@ def setup(
3847
4037
  help="Stream downloader output to the console.",
3848
4038
  ),
3849
4039
  ) -> None:
3850
- """Download eggNOG-mapper data (via download_eggnog_data.py) and persist the location for future runs."""
4040
+ """Download eggNOG-mapper core data and persist the location for future runs."""
3851
4041
  target_dir = Path(eggnog_data_dir).expanduser().resolve() if eggnog_data_dir else default_eggnog_data_dir()
3852
4042
  target_dir = target_dir.expanduser().resolve()
3853
4043
 
@@ -3863,29 +4053,8 @@ def setup(
3863
4053
  if _has_eggnog_core_files(target_dir) and not force:
3864
4054
  logger.info("eggNOG data already present; skipping download.")
3865
4055
  else:
3866
- downloader = _find_eggnog_downloader()
3867
- if downloader is None:
3868
- message = (
3869
- "download_eggnog_data.py not found in PATH. "
3870
- "Ensure eggnog-mapper is installed."
3871
- )
3872
- emit_error(message, logger)
3873
- raise typer.Exit(1)
3874
-
3875
- cmd = [downloader, "--data_dir", str(target_dir)]
3876
- if yes:
3877
- cmd.append("-y")
3878
- if force:
3879
- cmd.append("-f")
3880
- if not verbose:
3881
- cmd.append("-q")
3882
-
3883
- logger.info("Running eggNOG-mapper downloader: %s", " ".join(shlex.quote(arg) for arg in cmd))
3884
- try:
3885
- subprocess.run(cmd, check=True)
3886
- except subprocess.CalledProcessError as exc:
3887
- emit_error(f"eggNOG-mapper downloader failed with exit code {exc.returncode}", logger)
3888
- raise typer.Exit(exc.returncode)
4056
+ logger.info("Downloading eggNOG-mapper core databases.")
4057
+ _download_eggnog_core_data(target_dir, force, yes, logger=logger, verbose=verbose)
3889
4058
 
3890
4059
  if not _has_eggnog_core_files(target_dir):
3891
4060
  emit_error(
@@ -1340,8 +1340,8 @@ packages:
1340
1340
  timestamp: 1737229717596
1341
1341
  - pypi: ./
1342
1342
  name: moducomp
1343
- version: 0.7.14
1344
- sha256: 026a6159ce9247e5ce3136eee254748b471c239b82283298602c7c42837348be
1343
+ version: 0.7.19
1344
+ sha256: b5cc6b0c3691a3d299d910db6987c31bc75eb0efb70a7e1ef69d825354b1e820
1345
1345
  requires_dist:
1346
1346
  - typer>=0.9.1,<0.10.0
1347
1347
  - pandas>=1.5,<2.3
@@ -51,7 +51,7 @@ eggnog-mapper = ">=2.1.9,<2.2.0"
51
51
  kegg-pathways-completeness = "*"
52
52
 
53
53
  [tool.pixi.tasks]
54
- moducomp = "python -m moducomp --help"
54
+ moducomp = "python -m moducomp"
55
55
  test = "python -m moducomp test --ncpus 2 --calculate-complementarity 2"
56
56
  setup = "python -m moducomp setup"
57
57
  pipeline = "python -m moducomp pipeline --help"
@@ -1,5 +1,5 @@
1
1
  context:
2
- version: 0.7.18
2
+ version: 0.7.23
3
3
 
4
4
  package:
5
5
  name: moducomp
@@ -7,7 +7,7 @@ package:
7
7
 
8
8
  source:
9
9
  - url: https://pypi.org/packages/source/m/moducomp/moducomp-${{ version }}.tar.gz
10
- sha256: c2780812e19805007dbb85edb7fe5ca6e2d7f971c13114e5eb9ab9780cc48a28
10
+ sha256: b160286cbd1193fc521bb5bf7bc295d68c9e9df29dd3918cf2154dd4b87710c6
11
11
 
12
12
  build:
13
13
  script:
File without changes