moducomp 0.7.13__py3-none-any.whl → 0.7.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moducomp/__init__.py +1 -1
- moducomp/moducomp.py +42 -26
- {moducomp-0.7.13.dist-info → moducomp-0.7.16.dist-info}/METADATA +17 -8
- {moducomp-0.7.13.dist-info → moducomp-0.7.16.dist-info}/RECORD +7 -7
- {moducomp-0.7.13.dist-info → moducomp-0.7.16.dist-info}/WHEEL +0 -0
- {moducomp-0.7.13.dist-info → moducomp-0.7.16.dist-info}/entry_points.txt +0 -0
- {moducomp-0.7.13.dist-info → moducomp-0.7.16.dist-info}/licenses/LICENSE.txt +0 -0
moducomp/__init__.py
CHANGED
moducomp/moducomp.py
CHANGED
|
@@ -145,15 +145,23 @@ def require_eggnog_data_dir(eggnog_data_dir: Optional[str], logger: Optional[log
|
|
|
145
145
|
if eggnog_data_dir:
|
|
146
146
|
os.environ["EGGNOG_DATA_DIR"] = eggnog_data_dir
|
|
147
147
|
|
|
148
|
-
env_value = os.environ.get("EGGNOG_DATA_DIR", "")
|
|
149
|
-
if not env_value
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
"
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
148
|
+
env_value = os.environ.get("EGGNOG_DATA_DIR", "").strip()
|
|
149
|
+
if not env_value:
|
|
150
|
+
default_dir = default_eggnog_data_dir()
|
|
151
|
+
if default_dir.exists() and default_dir.is_dir() and any(default_dir.iterdir()):
|
|
152
|
+
os.environ["EGGNOG_DATA_DIR"] = str(default_dir)
|
|
153
|
+
env_value = str(default_dir)
|
|
154
|
+
if logger:
|
|
155
|
+
logger.info("EGGNOG_DATA_DIR not set; using default %s", env_value)
|
|
156
|
+
else:
|
|
157
|
+
message = (
|
|
158
|
+
"EGGNOG_DATA_DIR is required to run eggNOG-mapper. "
|
|
159
|
+
"Set the EGGNOG_DATA_DIR environment variable or pass --eggnog-data-dir. "
|
|
160
|
+
f"Default location is {default_dir}. "
|
|
161
|
+
"Download the data with: download_eggnog_data.py or moducomp download-eggnog-data"
|
|
162
|
+
)
|
|
163
|
+
emit_error(message, logger)
|
|
164
|
+
raise typer.Exit(1)
|
|
157
165
|
|
|
158
166
|
data_dir = Path(env_value).expanduser().resolve()
|
|
159
167
|
if not data_dir.exists() or not data_dir.is_dir():
|
|
@@ -3264,7 +3272,7 @@ def pipeline(
|
|
|
3264
3272
|
help="Complementarity size to compute (0 disables).",
|
|
3265
3273
|
),
|
|
3266
3274
|
lowmem: bool = typer.Option(
|
|
3267
|
-
|
|
3275
|
+
True,
|
|
3268
3276
|
"--lowmem/--fullmem",
|
|
3269
3277
|
"--low-mem/--full-mem",
|
|
3270
3278
|
help="Run eggNOG-mapper with reduced memory footprint by omitting --dbmem.",
|
|
@@ -3441,6 +3449,7 @@ def _run_pipeline_core(
|
|
|
3441
3449
|
tmp_emapper_output_dir = f"{get_tmp_dir(savedir)}/emapper_output"
|
|
3442
3450
|
tmp_emapper_file = f"{tmp_emapper_output_dir}/emapper_out.emapper.annotations"
|
|
3443
3451
|
ko_matrix_path = f"{savedir}/kos_matrix.csv"
|
|
3452
|
+
kpct_outprefix = "output_give_completeness"
|
|
3444
3453
|
|
|
3445
3454
|
# Process annotations and create KO matrix
|
|
3446
3455
|
if os.path.exists(ko_matrix_path):
|
|
@@ -3509,7 +3518,6 @@ def _run_pipeline_core(
|
|
|
3509
3518
|
)
|
|
3510
3519
|
else:
|
|
3511
3520
|
# Set up KPCT processing
|
|
3512
|
-
kpct_outprefix = "output_give_completeness"
|
|
3513
3521
|
kpct_input_file = os.path.join(savedir, "ko_file_for_kpct.txt")
|
|
3514
3522
|
|
|
3515
3523
|
# Check if KPCT output already exists
|
|
@@ -3594,7 +3602,7 @@ def _run_pipeline_core(
|
|
|
3594
3602
|
try:
|
|
3595
3603
|
validate(
|
|
3596
3604
|
savedir=savedir,
|
|
3597
|
-
mode="
|
|
3605
|
+
mode="pipeline",
|
|
3598
3606
|
calculate_complementarity=calculate_complementarity,
|
|
3599
3607
|
kpct_outprefix=kpct_outprefix,
|
|
3600
3608
|
strict=validate_strict,
|
|
@@ -4143,24 +4151,32 @@ def analyze_ko_matrix(
|
|
|
4143
4151
|
# Generate final resource usage summary
|
|
4144
4152
|
log_final_resource_summary(resource_log_file, start_time, logger, verbose)
|
|
4145
4153
|
|
|
4146
|
-
# Display pipeline completion summary
|
|
4147
|
-
display_pipeline_completion_summary(start_time, savedir, logger, verbose)
|
|
4148
|
-
|
|
4149
4154
|
if run_validation:
|
|
4150
4155
|
logger.info("Running post-run validation checks.")
|
|
4151
4156
|
report_path = None
|
|
4152
4157
|
if validation_report:
|
|
4153
4158
|
report_path = os.path.join(savedir, "validation_report.json")
|
|
4154
|
-
|
|
4155
|
-
|
|
4156
|
-
|
|
4157
|
-
|
|
4158
|
-
|
|
4159
|
-
|
|
4160
|
-
|
|
4161
|
-
|
|
4162
|
-
|
|
4163
|
-
|
|
4159
|
+
try:
|
|
4160
|
+
validate(
|
|
4161
|
+
savedir=savedir,
|
|
4162
|
+
mode="ko-matrix",
|
|
4163
|
+
calculate_complementarity=calculate_complementarity,
|
|
4164
|
+
kpct_outprefix=kpct_outprefix,
|
|
4165
|
+
strict=validate_strict,
|
|
4166
|
+
report=report_path,
|
|
4167
|
+
verbose=verbose,
|
|
4168
|
+
log_level=log_level,
|
|
4169
|
+
)
|
|
4170
|
+
except typer.Exit as exc:
|
|
4171
|
+
if logger:
|
|
4172
|
+
logger.error("Validation failed with exit code %s.", exc.exit_code)
|
|
4173
|
+
logger.error("Outputs written to: %s", savedir)
|
|
4174
|
+
if report_path:
|
|
4175
|
+
logger.error("Validation report: %s", report_path)
|
|
4176
|
+
raise
|
|
4177
|
+
|
|
4178
|
+
# Display pipeline completion summary
|
|
4179
|
+
display_pipeline_completion_summary(start_time, savedir, logger, verbose)
|
|
4164
4180
|
|
|
4165
4181
|
except Exception as e:
|
|
4166
4182
|
if logger:
|
|
@@ -4745,7 +4761,7 @@ def validate(
|
|
|
4745
4761
|
)
|
|
4746
4762
|
|
|
4747
4763
|
# Complementarity checks
|
|
4748
|
-
comp_pattern = re.compile(r"module_completeness_complementarity_(
|
|
4764
|
+
comp_pattern = re.compile(r"module_completeness_complementarity_(\d+)member\.tsv$")
|
|
4749
4765
|
comp_files: Dict[int, Path] = {}
|
|
4750
4766
|
for file_path in Path(savedir).glob("module_completeness_complementarity_*member.tsv"):
|
|
4751
4767
|
match = comp_pattern.match(file_path.name)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: moducomp
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.16
|
|
4
4
|
Summary: moducomp: metabolic module completeness and complementarity for microbiomes.
|
|
5
5
|
Keywords: bioinformatics,microbiome,metabolic,kegg,genomics
|
|
6
6
|
Author-email: "Juan C. Villada" <jvillada@lbl.gov>
|
|
@@ -62,6 +62,14 @@ pixi global install \
|
|
|
62
62
|
|
|
63
63
|
`moducomp` needs the eggNOG-mapper database to run. The primary (recommended) way to download it is using the `download_eggnog_data.py` wrapper, which mirrors the upstream downloader behavior. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
|
|
64
64
|
|
|
65
|
+
```bash
|
|
66
|
+
download_eggnog_data.py
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
By default, the data are stored in `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog`, and `moducomp` will auto-detect that location without needing `EGGNOG_DATA_DIR`.
|
|
70
|
+
|
|
71
|
+
To use a custom location:
|
|
72
|
+
|
|
65
73
|
```bash
|
|
66
74
|
export EGGNOG_DATA_DIR="/path/to/eggnog-data"
|
|
67
75
|
download_eggnog_data.py --eggnog-data-dir "$EGGNOG_DATA_DIR"
|
|
@@ -69,8 +77,6 @@ download_eggnog_data.py --eggnog-data-dir "$EGGNOG_DATA_DIR"
|
|
|
69
77
|
# moducomp download-eggnog-data --eggnog-data-dir "$EGGNOG_DATA_DIR"
|
|
70
78
|
```
|
|
71
79
|
|
|
72
|
-
If `EGGNOG_DATA_DIR` is not set, the downloader defaults to `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog`.
|
|
73
|
-
|
|
74
80
|
### Quick test
|
|
75
81
|
|
|
76
82
|
Small test data sets ship with `moducomp`. After installation you can confirm the pipeline by running:
|
|
@@ -147,7 +153,7 @@ This section lists all CLI options implemented today, along with their default v
|
|
|
147
153
|
| `--calculate-complementarity`, `-c` | `0` | Complementarity size to compute (0 disables). |
|
|
148
154
|
| `--adapt-headers/--no-adapt-headers` | `false` | Adapt FASTA headers to `genome|protein_N`. |
|
|
149
155
|
| `--del-tmp/--keep-tmp` | `true` | Delete temporary files after completion. |
|
|
150
|
-
| `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `
|
|
156
|
+
| `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `lowmem` | Run eggNOG-mapper without `--dbmem` to reduce RAM. |
|
|
151
157
|
| `--verbose/--quiet` | `false` | Enable verbose progress output. |
|
|
152
158
|
| `--validate/--no-validate` | `validate` | Run post-run validation checks. |
|
|
153
159
|
| `--validate-report/--no-validate-report` | `validate-report` | Write `validation_report.json` in the output directory. |
|
|
@@ -253,7 +259,7 @@ moducomp validate /path/to/output --strict
|
|
|
253
259
|
|
|
254
260
|
### ⚠️ Important note 2
|
|
255
261
|
|
|
256
|
-
`moducomp` is specifically designed for large scale analysis of microbiomes with hundreds of members, and works on Linux systems with at least **64GB of RAM**.
|
|
262
|
+
`moducomp` is specifically designed for large scale analysis of microbiomes with hundreds of members, and works on Linux systems with at least **64GB of RAM**. For robustness, **low-memory mode is now the default** for `pipeline` and `test`. If you have ample RAM and want full-memory mode, add `--fullmem` (`--full-mem`).
|
|
257
263
|
|
|
258
264
|
### Notes on bundled test data
|
|
259
265
|
|
|
@@ -290,7 +296,7 @@ moducomp pipeline \
|
|
|
290
296
|
--ncpus <number_of_cpus_to_use> \
|
|
291
297
|
--calculate-complementarity <N> # 0 to disable, 2 for 2-member, 3 for 3-member complementarity.
|
|
292
298
|
# Optional flags:
|
|
293
|
-
# --
|
|
299
|
+
# --fullmem # Optional: Use full-mem if you have ample RAM (default is low-mem)
|
|
294
300
|
# --adapt-headers # If your FASTA headers need modification
|
|
295
301
|
# --del-tmp/--keep-tmp # Delete or keep temporary files
|
|
296
302
|
# --eggnog-data-dir /path # If EGGNOG_DATA_DIR is not set
|
|
@@ -343,8 +349,11 @@ moducomp pipeline ./large_genome_collection ./output_large --ncpus 32 --calculat
|
|
|
343
349
|
# For moderate datasets with verbose output
|
|
344
350
|
moducomp analyze-ko-matrix ./ko_matrix.csv ./output_moderate --ncpus 16 --calculate-complementarity 2 --verbose
|
|
345
351
|
|
|
346
|
-
# For systems with limited memory
|
|
347
|
-
moducomp pipeline ./genomes ./output_lowmem --ncpus 8 --
|
|
352
|
+
# For systems with limited memory (default behavior)
|
|
353
|
+
moducomp pipeline ./genomes ./output_lowmem --ncpus 8 --calculate-complementarity 2
|
|
354
|
+
|
|
355
|
+
# For systems with ample RAM
|
|
356
|
+
moducomp pipeline ./genomes ./output_fullmem --ncpus 8 --fullmem --calculate-complementarity 2
|
|
348
357
|
```
|
|
349
358
|
|
|
350
359
|
## Expected outputs
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
moducomp/__init__.py,sha256=
|
|
1
|
+
moducomp/__init__.py,sha256=cuhC_H9napqaM3ggu_0MSW9Aq99fUGDL8jKS2UL9FfE,659
|
|
2
2
|
moducomp/__main__.py,sha256=1O2pv6IGjUgqnbqsiMLtVqjxWQpRtZUjp8LDljZ1bsI,185
|
|
3
|
-
moducomp/moducomp.py,sha256=
|
|
3
|
+
moducomp/moducomp.py,sha256=z3-PBUK15nbthjbfMRU8R4Hkw2NMEbrNd2aZ8S2S0Uk,179334
|
|
4
4
|
moducomp/data/test_genomes/IMG2562617132.faa,sha256=gZPh-08pMRdAWJRr3__TbnU1F68CdkDb3gxtpaCLTTc,356863
|
|
5
5
|
moducomp/data/test_genomes/IMG2568526683.faa,sha256=PxFJwe-68UGw7il1hGlNhZt4-2WzzxXxGE1GTskDnow,343109
|
|
6
6
|
moducomp/data/test_genomes/IMG2740892217.faa,sha256=WsId4sIPxENbqF6tYFouAgDCy6T0SXNY6TywxBNe-3E,548954
|
|
7
|
-
moducomp-0.7.
|
|
8
|
-
moducomp-0.7.
|
|
9
|
-
moducomp-0.7.
|
|
10
|
-
moducomp-0.7.
|
|
11
|
-
moducomp-0.7.
|
|
7
|
+
moducomp-0.7.16.dist-info/entry_points.txt,sha256=oIUiacp53fiOCWDXpc2rpq5hS4wDdpasx24EXit3daE,181
|
|
8
|
+
moducomp-0.7.16.dist-info/licenses/LICENSE.txt,sha256=pt0cfIq9Wop21KDZYyQgP0M1YWYvKG0PomA5cUDC4TI,1536
|
|
9
|
+
moducomp-0.7.16.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
|
|
10
|
+
moducomp-0.7.16.dist-info/METADATA,sha256=nB3DHs6ZjL8rlxcjqAUnnf5LbhdAW7hfqU1usNmfBs8,21662
|
|
11
|
+
moducomp-0.7.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|