moducomp 0.7.9__py3-none-any.whl → 0.7.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
moducomp/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  moducomp: metabolic module completeness and complementarity for microbiomes.
3
3
  """
4
4
 
5
- __version__ = "0.7.9"
5
+ __version__ = "0.7.10"
6
6
  __author__ = "Juan C. Villada"
7
7
  __email__ = "jvillada@lbl.gov"
8
8
  __title__ = "moducomp"
moducomp/moducomp.py CHANGED
@@ -3122,21 +3122,55 @@ def run_kpct_with_fallback(
3122
3122
  app = typer.Typer()
3123
3123
 
3124
3124
  @app.command()
3125
- def pipeline(genomedir: str,
3126
- savedir: str,
3127
- ncpus: int=16,
3128
- adapt_headers: bool=False,
3129
- del_tmp: bool=True,
3130
- calculate_complementarity: int=0,
3125
+ def pipeline(
3126
+ genomedir: str = typer.Argument(
3127
+ ...,
3128
+ help="Directory containing genome FAA files.",
3129
+ ),
3130
+ savedir: str = typer.Argument(
3131
+ ...,
3132
+ help="Directory to write all outputs.",
3133
+ ),
3134
+ ncpus: int = typer.Option(
3135
+ 16,
3136
+ "--ncpus",
3137
+ "-n",
3138
+ min=1,
3139
+ help="Number of CPU cores to use for eggNOG-mapper and KPCT.",
3140
+ ),
3141
+ adapt_headers: bool = typer.Option(
3142
+ False,
3143
+ "--adapt-headers/--no-adapt-headers",
3144
+ help="Adapt FASTA headers to genome|protein_N before running eggNOG-mapper.",
3145
+ ),
3146
+ del_tmp: bool = typer.Option(
3147
+ True,
3148
+ "--del-tmp/--keep-tmp",
3149
+ help="Delete temporary files after completion.",
3150
+ ),
3151
+ calculate_complementarity: int = typer.Option(
3152
+ 0,
3153
+ "--calculate-complementarity",
3154
+ "-c",
3155
+ help="Complementarity size to compute (0 disables).",
3156
+ ),
3131
3157
  lowmem: bool = typer.Option(
3132
3158
  False,
3133
3159
  "--lowmem/--fullmem",
3134
3160
  "--low-mem/--full-mem",
3135
- help="Run emapper with reduced memory footprint, omitting --dbmem flag.",
3161
+ help="Run eggNOG-mapper with reduced memory footprint by omitting --dbmem.",
3162
+ ),
3163
+ verbose: bool = typer.Option(
3164
+ False,
3165
+ "--verbose/--quiet",
3166
+ help="Enable verbose output with detailed progress information.",
3136
3167
  ),
3137
- verbose: bool = typer.Option(False, "--verbose", help="Enable verbose output with detailed progress information."),
3138
3168
  log_level: str = typer.Option("INFO", "--log-level", "-l", help="Logging level (DEBUG, INFO, WARNING, ERROR)."),
3139
- eggnog_data_dir: Optional[str] = typer.Option(None, "--eggnog-data-dir", help="Path to eggNOG-mapper data directory (sets EGGNOG_DATA_DIR)."),
3169
+ eggnog_data_dir: Optional[str] = typer.Option(
3170
+ None,
3171
+ "--eggnog-data-dir",
3172
+ help="Path to eggNOG-mapper data directory (sets EGGNOG_DATA_DIR).",
3173
+ ),
3140
3174
  ) -> None:
3141
3175
  """
3142
3176
  Run the ModuComp pipeline on a directory of genome files.
@@ -3428,7 +3462,7 @@ def test(
3428
3462
  ),
3429
3463
  adapt_headers: bool = typer.Option(
3430
3464
  False,
3431
- "--adapt-headers",
3465
+ "--adapt-headers/--no-adapt-headers",
3432
3466
  help="Adapt FASTA headers before running the test pipeline.",
3433
3467
  ),
3434
3468
  del_tmp: bool = typer.Option(
@@ -3665,13 +3699,42 @@ def download_eggnog_data_cli() -> None:
3665
3699
 
3666
3700
  @app.command()
3667
3701
  def analyze_ko_matrix(
3668
- kos_matrix: str,
3669
- savedir: str,
3670
- calculate_complementarity: int=0,
3671
- kpct_outprefix: str="output_give_completeness",
3672
- del_tmp: bool=True,
3673
- ncpus: int=16,
3674
- verbose: bool = typer.Option(False, "--verbose", help="Enable verbose output with detailed progress information."),
3702
+ kos_matrix: str = typer.Argument(
3703
+ ...,
3704
+ help="Path to KO matrix CSV/TSV file.",
3705
+ ),
3706
+ savedir: str = typer.Argument(
3707
+ ...,
3708
+ help="Directory to write outputs.",
3709
+ ),
3710
+ calculate_complementarity: int = typer.Option(
3711
+ 0,
3712
+ "--calculate-complementarity",
3713
+ "-c",
3714
+ help="Complementarity size to compute (0 disables).",
3715
+ ),
3716
+ kpct_outprefix: str = typer.Option(
3717
+ "output_give_completeness",
3718
+ "--kpct-outprefix",
3719
+ help="Prefix for KPCT output files.",
3720
+ ),
3721
+ del_tmp: bool = typer.Option(
3722
+ True,
3723
+ "--del-tmp/--keep-tmp",
3724
+ help="Delete temporary files after completion.",
3725
+ ),
3726
+ ncpus: int = typer.Option(
3727
+ 16,
3728
+ "--ncpus",
3729
+ "-n",
3730
+ min=1,
3731
+ help="CPU cores for KPCT parallel processing.",
3732
+ ),
3733
+ verbose: bool = typer.Option(
3734
+ False,
3735
+ "--verbose/--quiet",
3736
+ help="Enable verbose output with detailed progress information.",
3737
+ ),
3675
3738
  log_level: str = typer.Option("INFO", "--log-level", "-l", help="Logging level (DEBUG, INFO, WARNING, ERROR)."),
3676
3739
  ) -> None:
3677
3740
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moducomp
3
- Version: 0.7.9
3
+ Version: 0.7.10
4
4
  Summary: moducomp: metabolic module completeness and complementarity for microbiomes.
5
5
  Keywords: bioinformatics,microbiome,metabolic,kegg,genomics
6
6
  Author-email: "Juan C. Villada" <jvillada@lbl.gov>
@@ -37,6 +37,7 @@ Project-URL: Repository, https://github.com/NeLLi-team/moducomp
37
37
  - Generation of complementarity reports highlighting modules completed through genome partnerships.
38
38
  - Tracks and reports the actual proteins that are responsible for the completion of the module in the combination of N genomes.
39
39
  - **Automatic resource monitoring** with timestamped logs tracking CPU usage, memory consumption, and runtime for reproducibility.
40
+ - **Consistent logging to stdout/stderr** with a per-command resource summary emitted at the end of each run.
40
41
 
41
42
  ## Installation (Recommended)
42
43
 
@@ -107,6 +108,82 @@ You should see the command line help without errors.
107
108
 
108
109
  `moducomp` provides two main commands: `pipeline` and `analyze-ko-matrix`. You can run these commands using Pixi tasks defined in `pyproject.toml` or directly within the Pixi environment.
109
110
 
111
+ ### Pipeline overview
112
+
113
+ The diagram below shows the main stages executed by ModuComp.
114
+
115
+ ```mermaid
116
+ graph TD
117
+ A([Start run]) --> B[Initialize logging and resource monitoring]
118
+ B --> C{Input type}
119
+ C -->|pipeline| D[Validate genome directory]
120
+ C -->|analyze-ko-matrix| H[Load existing KO matrix]
121
+ D --> E[Prepare genomes: adapt headers or copy to tmp]
122
+ E --> F[Merge genomes into single FAA]
123
+ F --> G[Run eggNOG-mapper (if needed)]
124
+ G --> H[Create KO matrix (`kos_matrix.csv`)]
125
+ H --> I[Convert KO matrix to KPCT input]
126
+ I --> J[Run KPCT (parallel with fallback)]
127
+ J --> K[Create module completeness matrix]
128
+ K --> L{Complementarity requested?}
129
+ L -->|Yes| M[Generate complementarity report(s)]
130
+ L -->|No| N[Skip]
131
+ M --> O[Write outputs + logs]
132
+ N --> O
133
+ O --> P[Optional cleanup of `tmp/`]
134
+ P --> Q([Pipeline complete])
135
+ ```
136
+
137
+ ### CLI options and defaults
138
+
139
+ This section lists all CLI options implemented today, along with their default values.
140
+
141
+ #### `pipeline` command (positional args: `genomedir`, `savedir`)
142
+
143
+ | Option | Default | Description |
144
+ | --- | --- | --- |
145
+ | `--ncpus`, `-n` | `16` | Number of CPU cores to use for eggNOG-mapper and KPCT. |
146
+ | `--calculate-complementarity`, `-c` | `0` | Complementarity size to compute (0 disables). |
147
+ | `--adapt-headers/--no-adapt-headers` | `false` | Adapt FASTA headers to `genome|protein_N`. |
148
+ | `--del-tmp/--keep-tmp` | `true` | Delete temporary files after completion. |
149
+ | `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `fullmem` | Run eggNOG-mapper without `--dbmem` to reduce RAM. |
150
+ | `--verbose/--quiet` | `false` | Enable verbose progress output. |
151
+ | `--log-level`, `-l` | `INFO` | Logging level: `DEBUG`, `INFO`, `WARNING`, `ERROR`. |
152
+ | `--eggnog-data-dir` | `EGGNOG_DATA_DIR` | Path to eggNOG-mapper data (sets `EGGNOG_DATA_DIR`). |
153
+
154
+ #### `test` command (bundled test genomes)
155
+
156
+ | Option | Default | Description |
157
+ | --- | --- | --- |
158
+ | `--output-dir`, `-o` | `output_test_moducomp_<DATETIME>` | Output directory for test run. |
159
+ | `--ncpus`, `-n` | `2` | CPU cores for the test run. |
160
+ | `--calculate-complementarity`, `-c` | `2` | Complementarity size to compute (0 disables). |
161
+ | `--adapt-headers/--no-adapt-headers` | `false` | Adapt FASTA headers before the test. |
162
+ | `--del-tmp/--keep-tmp` | `true` | Delete temporary files after the test completes. |
163
+ | `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `lowmem` | Low-memory mode is the default for tests. |
164
+ | `--verbose/--quiet` | `verbose` | Verbose output is the default for tests. |
165
+ | `--log-level`, `-l` | `INFO` | Logging level: `DEBUG`, `INFO`, `WARNING`, `ERROR`. |
166
+ | `--eggnog-data-dir` | `EGGNOG_DATA_DIR` | Path to eggNOG-mapper data (sets `EGGNOG_DATA_DIR`). |
167
+
168
+ #### `analyze-ko-matrix` command (positional args: `kos_matrix`, `savedir`)
169
+
170
+ | Option | Default | Description |
171
+ | --- | --- | --- |
172
+ | `--calculate-complementarity`, `-c` | `0` | Complementarity size to compute (0 disables). |
173
+ | `--kpct-outprefix` | `output_give_completeness` | Prefix for KPCT output files. |
174
+ | `--del-tmp/--keep-tmp` | `true` | Delete temporary files after completion. |
175
+ | `--ncpus`, `-n` | `16` | CPU cores for KPCT parallel processing. |
176
+ | `--verbose/--quiet` | `false` | Enable verbose progress output. |
177
+ | `--log-level`, `-l` | `INFO` | Logging level: `DEBUG`, `INFO`, `WARNING`, `ERROR`. |
178
+
179
+ #### `download-eggnog-data` command
180
+
181
+ | Option | Default | Description |
182
+ | --- | --- | --- |
183
+ | `--eggnog-data-dir` | `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog` | Destination for eggNOG-mapper data (sets `EGGNOG_DATA_DIR`). |
184
+ | `--log-level`, `-l` | `INFO` | Logging level: `DEBUG`, `INFO`, `WARNING`, `ERROR`. |
185
+ | `--verbose/--quiet` | `verbose` | Stream downloader output to the console. |
186
+
110
187
  ### Performance and parallel processing
111
188
 
112
189
  `moducomp` includes **parallel processing capabilities** for the KPCT (KEGG Pathways Completeness Tool) analysis, which can significantly improve performance for large datasets:
@@ -166,7 +243,7 @@ moducomp pipeline \
166
243
  # Optional flags:
167
244
  # --lowmem/--fullmem # Optional: Use low-mem if you have less than 64GB of RAM (default is full mem)
168
245
  # --adapt-headers # If your FASTA headers need modification
169
- # --del-tmp # To delete temporary files
246
+ # --del-tmp/--keep-tmp # Delete or keep temporary files
170
247
  # --eggnog-data-dir /path # If EGGNOG_DATA_DIR is not set
171
248
  # --verbose # Enable verbose output with detailed progress information
172
249
  ```
@@ -185,7 +262,7 @@ moducomp analyze-ko-matrix \
185
262
  --calculate-complementarity <N> # 0 to disable, 2 for 2-member, 3 for 3-member complementarity.
186
263
 
187
264
  # Optional flags:
188
- # --del-tmp false
265
+ # --keep-tmp # Keep temporary files
189
266
  # --verbose # Enable verbose output with detailed progress information
190
267
  ```
191
268
 
@@ -229,7 +306,7 @@ moducomp pipeline ./genomes ./output_lowmem --ncpus 8 --lowmem --calculate-compl
229
306
  - **`module_completeness.tsv`**: Module completeness scores for individual genomes and combinations
230
307
  - **`module_completeness_complementarity_Nmember.tsv`**: Complementarity reports (if requested)
231
308
  - **`logs/resource_usage_YYYYMMDD_HHMMSS.log`**: Resource monitoring log with CPU, memory, and runtime metrics for reproducibility
232
- - **`logs/moducomp.log`**: Detailed pipeline execution log
309
+ - **`logs/moducomp.log`**: Detailed pipeline execution log with a per-command resource summary at the end of the run
233
310
 
234
311
  ## Citation
235
312
  Villada, JC. & Schulz, F. (2025). Assessment of metabolic module completeness of genomes and metabolic complementarity in microbiomes with `moducomp` . `moducomp` (v0.5.1) Zenodo. https://doi.org/10.5281/zenodo.16116092
@@ -0,0 +1,11 @@
1
+ moducomp/__init__.py,sha256=KXXHUQxz2yNFGf1_6cHMtl1fr2gbXjF6UEIzns9QBTM,659
2
+ moducomp/__main__.py,sha256=1O2pv6IGjUgqnbqsiMLtVqjxWQpRtZUjp8LDljZ1bsI,185
3
+ moducomp/moducomp.py,sha256=R4_mXvfpe_ojfDKibduMvgkTC1QDn4sFUt9TFc9xVUw,142734
4
+ moducomp/data/test_genomes/IMG2562617132.faa,sha256=gZPh-08pMRdAWJRr3__TbnU1F68CdkDb3gxtpaCLTTc,356863
5
+ moducomp/data/test_genomes/IMG2568526683.faa,sha256=PxFJwe-68UGw7il1hGlNhZt4-2WzzxXxGE1GTskDnow,343109
6
+ moducomp/data/test_genomes/IMG2740892217.faa,sha256=WsId4sIPxENbqF6tYFouAgDCy6T0SXNY6TywxBNe-3E,548954
7
+ moducomp-0.7.10.dist-info/entry_points.txt,sha256=dwt0_w7Ex9p1vhfp2fl4WXJLBh50u9fXTRNlAOJkAd4,114
8
+ moducomp-0.7.10.dist-info/licenses/LICENSE.txt,sha256=pt0cfIq9Wop21KDZYyQgP0M1YWYvKG0PomA5cUDC4TI,1536
9
+ moducomp-0.7.10.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
10
+ moducomp-0.7.10.dist-info/METADATA,sha256=_WnWpR9pSOpKcgNOolcf1uZaUYLgg0udk1YrYxqu0A4,14726
11
+ moducomp-0.7.10.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- moducomp/__init__.py,sha256=jGWxMLVvJ2vExtn7UZ1_AN3yz2bw_A_wXw30h5LMLqw,658
2
- moducomp/__main__.py,sha256=1O2pv6IGjUgqnbqsiMLtVqjxWQpRtZUjp8LDljZ1bsI,185
3
- moducomp/moducomp.py,sha256=0q51Ob8EqNxJkArCq3uzMR2draku87SQiuok12FxMjQ,140864
4
- moducomp/data/test_genomes/IMG2562617132.faa,sha256=gZPh-08pMRdAWJRr3__TbnU1F68CdkDb3gxtpaCLTTc,356863
5
- moducomp/data/test_genomes/IMG2568526683.faa,sha256=PxFJwe-68UGw7il1hGlNhZt4-2WzzxXxGE1GTskDnow,343109
6
- moducomp/data/test_genomes/IMG2740892217.faa,sha256=WsId4sIPxENbqF6tYFouAgDCy6T0SXNY6TywxBNe-3E,548954
7
- moducomp-0.7.9.dist-info/entry_points.txt,sha256=dwt0_w7Ex9p1vhfp2fl4WXJLBh50u9fXTRNlAOJkAd4,114
8
- moducomp-0.7.9.dist-info/licenses/LICENSE.txt,sha256=pt0cfIq9Wop21KDZYyQgP0M1YWYvKG0PomA5cUDC4TI,1536
9
- moducomp-0.7.9.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
10
- moducomp-0.7.9.dist-info/METADATA,sha256=bR9nZwDPvsdniD7du2WaYZiRiDfA4lKB6uHX4wu3gII,10774
11
- moducomp-0.7.9.dist-info/RECORD,,