moducomp 0.7.14__tar.gz → 0.7.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {moducomp-0.7.14 → moducomp-0.7.16}/PKG-INFO +9 -6
- {moducomp-0.7.14 → moducomp-0.7.16}/README.md +8 -5
- {moducomp-0.7.14 → moducomp-0.7.16}/moducomp/__init__.py +1 -1
- {moducomp-0.7.14 → moducomp-0.7.16}/moducomp/moducomp.py +25 -17
- {moducomp-0.7.14 → moducomp-0.7.16}/pixi.lock +2 -2
- {moducomp-0.7.14 → moducomp-0.7.16}/recipe.yaml +2 -2
- {moducomp-0.7.14 → moducomp-0.7.16}/.gitignore +0 -0
- {moducomp-0.7.14 → moducomp-0.7.16}/LICENSE.txt +0 -0
- {moducomp-0.7.14 → moducomp-0.7.16}/moducomp/__main__.py +0 -0
- {moducomp-0.7.14 → moducomp-0.7.16}/moducomp/data/test_genomes/IMG2562617132.faa +0 -0
- {moducomp-0.7.14 → moducomp-0.7.16}/moducomp/data/test_genomes/IMG2568526683.faa +0 -0
- {moducomp-0.7.14 → moducomp-0.7.16}/moducomp/data/test_genomes/IMG2740892217.faa +0 -0
- {moducomp-0.7.14 → moducomp-0.7.16}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: moducomp
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.16
|
|
4
4
|
Summary: moducomp: metabolic module completeness and complementarity for microbiomes.
|
|
5
5
|
Keywords: bioinformatics,microbiome,metabolic,kegg,genomics
|
|
6
6
|
Author-email: "Juan C. Villada" <jvillada@lbl.gov>
|
|
@@ -153,7 +153,7 @@ This section lists all CLI options implemented today, along with their default v
|
|
|
153
153
|
| `--calculate-complementarity`, `-c` | `0` | Complementarity size to compute (0 disables). |
|
|
154
154
|
| `--adapt-headers/--no-adapt-headers` | `false` | Adapt FASTA headers to `genome|protein_N`. |
|
|
155
155
|
| `--del-tmp/--keep-tmp` | `true` | Delete temporary files after completion. |
|
|
156
|
-
| `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `
|
|
156
|
+
| `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `lowmem` | Run eggNOG-mapper without `--dbmem` to reduce RAM. |
|
|
157
157
|
| `--verbose/--quiet` | `false` | Enable verbose progress output. |
|
|
158
158
|
| `--validate/--no-validate` | `validate` | Run post-run validation checks. |
|
|
159
159
|
| `--validate-report/--no-validate-report` | `validate-report` | Write `validation_report.json` in the output directory. |
|
|
@@ -259,7 +259,7 @@ moducomp validate /path/to/output --strict
|
|
|
259
259
|
|
|
260
260
|
### ⚠️ Important note 2
|
|
261
261
|
|
|
262
|
-
`moducomp` is specifically designed for large scale analysis of microbiomes with hundreds of members, and works on Linux systems with at least **64GB of RAM**.
|
|
262
|
+
`moducomp` is specifically designed for large scale analysis of microbiomes with hundreds of members, and works on Linux systems with at least **64GB of RAM**. For robustness, **low-memory mode is now the default** for `pipeline` and `test`. If you have ample RAM and want full-memory mode, add `--fullmem` (`--full-mem`).
|
|
263
263
|
|
|
264
264
|
### Notes on bundled test data
|
|
265
265
|
|
|
@@ -296,7 +296,7 @@ moducomp pipeline \
|
|
|
296
296
|
--ncpus <number_of_cpus_to_use> \
|
|
297
297
|
--calculate-complementarity <N> # 0 to disable, 2 for 2-member, 3 for 3-member complementarity.
|
|
298
298
|
# Optional flags:
|
|
299
|
-
# --
|
|
299
|
+
# --fullmem # Optional: Use full-mem if you have ample RAM (default is low-mem)
|
|
300
300
|
# --adapt-headers # If your FASTA headers need modification
|
|
301
301
|
# --del-tmp/--keep-tmp # Delete or keep temporary files
|
|
302
302
|
# --eggnog-data-dir /path # If EGGNOG_DATA_DIR is not set
|
|
@@ -349,8 +349,11 @@ moducomp pipeline ./large_genome_collection ./output_large --ncpus 32 --calculat
|
|
|
349
349
|
# For moderate datasets with verbose output
|
|
350
350
|
moducomp analyze-ko-matrix ./ko_matrix.csv ./output_moderate --ncpus 16 --calculate-complementarity 2 --verbose
|
|
351
351
|
|
|
352
|
-
# For systems with limited memory
|
|
353
|
-
moducomp pipeline ./genomes ./output_lowmem --ncpus 8 --
|
|
352
|
+
# For systems with limited memory (default behavior)
|
|
353
|
+
moducomp pipeline ./genomes ./output_lowmem --ncpus 8 --calculate-complementarity 2
|
|
354
|
+
|
|
355
|
+
# For systems with ample RAM
|
|
356
|
+
moducomp pipeline ./genomes ./output_fullmem --ncpus 8 --fullmem --calculate-complementarity 2
|
|
354
357
|
```
|
|
355
358
|
|
|
356
359
|
## Expected outputs
|
|
@@ -128,7 +128,7 @@ This section lists all CLI options implemented today, along with their default v
|
|
|
128
128
|
| `--calculate-complementarity`, `-c` | `0` | Complementarity size to compute (0 disables). |
|
|
129
129
|
| `--adapt-headers/--no-adapt-headers` | `false` | Adapt FASTA headers to `genome|protein_N`. |
|
|
130
130
|
| `--del-tmp/--keep-tmp` | `true` | Delete temporary files after completion. |
|
|
131
|
-
| `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `
|
|
131
|
+
| `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `lowmem` | Run eggNOG-mapper without `--dbmem` to reduce RAM. |
|
|
132
132
|
| `--verbose/--quiet` | `false` | Enable verbose progress output. |
|
|
133
133
|
| `--validate/--no-validate` | `validate` | Run post-run validation checks. |
|
|
134
134
|
| `--validate-report/--no-validate-report` | `validate-report` | Write `validation_report.json` in the output directory. |
|
|
@@ -234,7 +234,7 @@ moducomp validate /path/to/output --strict
|
|
|
234
234
|
|
|
235
235
|
### ⚠️ Important note 2
|
|
236
236
|
|
|
237
|
-
`moducomp` is specifically designed for large scale analysis of microbiomes with hundreds of members, and works on Linux systems with at least **64GB of RAM**.
|
|
237
|
+
`moducomp` is specifically designed for large scale analysis of microbiomes with hundreds of members, and works on Linux systems with at least **64GB of RAM**. For robustness, **low-memory mode is now the default** for `pipeline` and `test`. If you have ample RAM and want full-memory mode, add `--fullmem` (`--full-mem`).
|
|
238
238
|
|
|
239
239
|
### Notes on bundled test data
|
|
240
240
|
|
|
@@ -271,7 +271,7 @@ moducomp pipeline \
|
|
|
271
271
|
--ncpus <number_of_cpus_to_use> \
|
|
272
272
|
--calculate-complementarity <N> # 0 to disable, 2 for 2-member, 3 for 3-member complementarity.
|
|
273
273
|
# Optional flags:
|
|
274
|
-
# --
|
|
274
|
+
# --fullmem # Optional: Use full-mem if you have ample RAM (default is low-mem)
|
|
275
275
|
# --adapt-headers # If your FASTA headers need modification
|
|
276
276
|
# --del-tmp/--keep-tmp # Delete or keep temporary files
|
|
277
277
|
# --eggnog-data-dir /path # If EGGNOG_DATA_DIR is not set
|
|
@@ -324,8 +324,11 @@ moducomp pipeline ./large_genome_collection ./output_large --ncpus 32 --calculat
|
|
|
324
324
|
# For moderate datasets with verbose output
|
|
325
325
|
moducomp analyze-ko-matrix ./ko_matrix.csv ./output_moderate --ncpus 16 --calculate-complementarity 2 --verbose
|
|
326
326
|
|
|
327
|
-
# For systems with limited memory
|
|
328
|
-
moducomp pipeline ./genomes ./output_lowmem --ncpus 8 --
|
|
327
|
+
# For systems with limited memory (default behavior)
|
|
328
|
+
moducomp pipeline ./genomes ./output_lowmem --ncpus 8 --calculate-complementarity 2
|
|
329
|
+
|
|
330
|
+
# For systems with ample RAM
|
|
331
|
+
moducomp pipeline ./genomes ./output_fullmem --ncpus 8 --fullmem --calculate-complementarity 2
|
|
329
332
|
```
|
|
330
333
|
|
|
331
334
|
## Expected outputs
|
|
@@ -3272,7 +3272,7 @@ def pipeline(
|
|
|
3272
3272
|
help="Complementarity size to compute (0 disables).",
|
|
3273
3273
|
),
|
|
3274
3274
|
lowmem: bool = typer.Option(
|
|
3275
|
-
|
|
3275
|
+
True,
|
|
3276
3276
|
"--lowmem/--fullmem",
|
|
3277
3277
|
"--low-mem/--full-mem",
|
|
3278
3278
|
help="Run eggNOG-mapper with reduced memory footprint by omitting --dbmem.",
|
|
@@ -3449,6 +3449,7 @@ def _run_pipeline_core(
|
|
|
3449
3449
|
tmp_emapper_output_dir = f"{get_tmp_dir(savedir)}/emapper_output"
|
|
3450
3450
|
tmp_emapper_file = f"{tmp_emapper_output_dir}/emapper_out.emapper.annotations"
|
|
3451
3451
|
ko_matrix_path = f"{savedir}/kos_matrix.csv"
|
|
3452
|
+
kpct_outprefix = "output_give_completeness"
|
|
3452
3453
|
|
|
3453
3454
|
# Process annotations and create KO matrix
|
|
3454
3455
|
if os.path.exists(ko_matrix_path):
|
|
@@ -3517,7 +3518,6 @@ def _run_pipeline_core(
|
|
|
3517
3518
|
)
|
|
3518
3519
|
else:
|
|
3519
3520
|
# Set up KPCT processing
|
|
3520
|
-
kpct_outprefix = "output_give_completeness"
|
|
3521
3521
|
kpct_input_file = os.path.join(savedir, "ko_file_for_kpct.txt")
|
|
3522
3522
|
|
|
3523
3523
|
# Check if KPCT output already exists
|
|
@@ -3602,7 +3602,7 @@ def _run_pipeline_core(
|
|
|
3602
3602
|
try:
|
|
3603
3603
|
validate(
|
|
3604
3604
|
savedir=savedir,
|
|
3605
|
-
mode="
|
|
3605
|
+
mode="pipeline",
|
|
3606
3606
|
calculate_complementarity=calculate_complementarity,
|
|
3607
3607
|
kpct_outprefix=kpct_outprefix,
|
|
3608
3608
|
strict=validate_strict,
|
|
@@ -4151,24 +4151,32 @@ def analyze_ko_matrix(
|
|
|
4151
4151
|
# Generate final resource usage summary
|
|
4152
4152
|
log_final_resource_summary(resource_log_file, start_time, logger, verbose)
|
|
4153
4153
|
|
|
4154
|
-
# Display pipeline completion summary
|
|
4155
|
-
display_pipeline_completion_summary(start_time, savedir, logger, verbose)
|
|
4156
|
-
|
|
4157
4154
|
if run_validation:
|
|
4158
4155
|
logger.info("Running post-run validation checks.")
|
|
4159
4156
|
report_path = None
|
|
4160
4157
|
if validation_report:
|
|
4161
4158
|
report_path = os.path.join(savedir, "validation_report.json")
|
|
4162
|
-
|
|
4163
|
-
|
|
4164
|
-
|
|
4165
|
-
|
|
4166
|
-
|
|
4167
|
-
|
|
4168
|
-
|
|
4169
|
-
|
|
4170
|
-
|
|
4171
|
-
|
|
4159
|
+
try:
|
|
4160
|
+
validate(
|
|
4161
|
+
savedir=savedir,
|
|
4162
|
+
mode="ko-matrix",
|
|
4163
|
+
calculate_complementarity=calculate_complementarity,
|
|
4164
|
+
kpct_outprefix=kpct_outprefix,
|
|
4165
|
+
strict=validate_strict,
|
|
4166
|
+
report=report_path,
|
|
4167
|
+
verbose=verbose,
|
|
4168
|
+
log_level=log_level,
|
|
4169
|
+
)
|
|
4170
|
+
except typer.Exit as exc:
|
|
4171
|
+
if logger:
|
|
4172
|
+
logger.error("Validation failed with exit code %s.", exc.exit_code)
|
|
4173
|
+
logger.error("Outputs written to: %s", savedir)
|
|
4174
|
+
if report_path:
|
|
4175
|
+
logger.error("Validation report: %s", report_path)
|
|
4176
|
+
raise
|
|
4177
|
+
|
|
4178
|
+
# Display pipeline completion summary
|
|
4179
|
+
display_pipeline_completion_summary(start_time, savedir, logger, verbose)
|
|
4172
4180
|
|
|
4173
4181
|
except Exception as e:
|
|
4174
4182
|
if logger:
|
|
@@ -4753,7 +4761,7 @@ def validate(
|
|
|
4753
4761
|
)
|
|
4754
4762
|
|
|
4755
4763
|
# Complementarity checks
|
|
4756
|
-
comp_pattern = re.compile(r"module_completeness_complementarity_(
|
|
4764
|
+
comp_pattern = re.compile(r"module_completeness_complementarity_(\d+)member\.tsv$")
|
|
4757
4765
|
comp_files: Dict[int, Path] = {}
|
|
4758
4766
|
for file_path in Path(savedir).glob("module_completeness_complementarity_*member.tsv"):
|
|
4759
4767
|
match = comp_pattern.match(file_path.name)
|
|
@@ -1340,8 +1340,8 @@ packages:
|
|
|
1340
1340
|
timestamp: 1737229717596
|
|
1341
1341
|
- pypi: ./
|
|
1342
1342
|
name: moducomp
|
|
1343
|
-
version: 0.7.
|
|
1344
|
-
sha256:
|
|
1343
|
+
version: 0.7.14
|
|
1344
|
+
sha256: 026a6159ce9247e5ce3136eee254748b471c239b82283298602c7c42837348be
|
|
1345
1345
|
requires_dist:
|
|
1346
1346
|
- typer>=0.9.1,<0.10.0
|
|
1347
1347
|
- pandas>=1.5,<2.3
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
context:
|
|
2
|
-
version: 0.7.
|
|
2
|
+
version: 0.7.16
|
|
3
3
|
|
|
4
4
|
package:
|
|
5
5
|
name: moducomp
|
|
@@ -7,7 +7,7 @@ package:
|
|
|
7
7
|
|
|
8
8
|
source:
|
|
9
9
|
- url: https://pypi.org/packages/source/m/moducomp/moducomp-${{ version }}.tar.gz
|
|
10
|
-
sha256:
|
|
10
|
+
sha256: 917f8ebcba65b5607985fa5fdd7fc0823cd3edb401cc34e2efa0d6f9e650a62b
|
|
11
11
|
|
|
12
12
|
build:
|
|
13
13
|
script:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|