moducomp 0.7.13__py3-none-any.whl → 0.7.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
moducomp/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  moducomp: metabolic module completeness and complementarity for microbiomes.
3
3
  """
4
4
 
5
- __version__ = "0.7.13"
5
+ __version__ = "0.7.16"
6
6
  __author__ = "Juan C. Villada"
7
7
  __email__ = "jvillada@lbl.gov"
8
8
  __title__ = "moducomp"
moducomp/moducomp.py CHANGED
@@ -145,15 +145,23 @@ def require_eggnog_data_dir(eggnog_data_dir: Optional[str], logger: Optional[log
145
145
  if eggnog_data_dir:
146
146
  os.environ["EGGNOG_DATA_DIR"] = eggnog_data_dir
147
147
 
148
- env_value = os.environ.get("EGGNOG_DATA_DIR", "")
149
- if not env_value.strip():
150
- message = (
151
- "EGGNOG_DATA_DIR is required to run eggNOG-mapper. "
152
- "Set the EGGNOG_DATA_DIR environment variable or pass --eggnog-data-dir. "
153
- "Download the data with: download_eggnog_data.py or moducomp download-eggnog-data"
154
- )
155
- emit_error(message, logger)
156
- raise typer.Exit(1)
148
+ env_value = os.environ.get("EGGNOG_DATA_DIR", "").strip()
149
+ if not env_value:
150
+ default_dir = default_eggnog_data_dir()
151
+ if default_dir.exists() and default_dir.is_dir() and any(default_dir.iterdir()):
152
+ os.environ["EGGNOG_DATA_DIR"] = str(default_dir)
153
+ env_value = str(default_dir)
154
+ if logger:
155
+ logger.info("EGGNOG_DATA_DIR not set; using default %s", env_value)
156
+ else:
157
+ message = (
158
+ "EGGNOG_DATA_DIR is required to run eggNOG-mapper. "
159
+ "Set the EGGNOG_DATA_DIR environment variable or pass --eggnog-data-dir. "
160
+ f"Default location is {default_dir}. "
161
+ "Download the data with: download_eggnog_data.py or moducomp download-eggnog-data"
162
+ )
163
+ emit_error(message, logger)
164
+ raise typer.Exit(1)
157
165
 
158
166
  data_dir = Path(env_value).expanduser().resolve()
159
167
  if not data_dir.exists() or not data_dir.is_dir():
@@ -3264,7 +3272,7 @@ def pipeline(
3264
3272
  help="Complementarity size to compute (0 disables).",
3265
3273
  ),
3266
3274
  lowmem: bool = typer.Option(
3267
- False,
3275
+ True,
3268
3276
  "--lowmem/--fullmem",
3269
3277
  "--low-mem/--full-mem",
3270
3278
  help="Run eggNOG-mapper with reduced memory footprint by omitting --dbmem.",
@@ -3441,6 +3449,7 @@ def _run_pipeline_core(
3441
3449
  tmp_emapper_output_dir = f"{get_tmp_dir(savedir)}/emapper_output"
3442
3450
  tmp_emapper_file = f"{tmp_emapper_output_dir}/emapper_out.emapper.annotations"
3443
3451
  ko_matrix_path = f"{savedir}/kos_matrix.csv"
3452
+ kpct_outprefix = "output_give_completeness"
3444
3453
 
3445
3454
  # Process annotations and create KO matrix
3446
3455
  if os.path.exists(ko_matrix_path):
@@ -3509,7 +3518,6 @@ def _run_pipeline_core(
3509
3518
  )
3510
3519
  else:
3511
3520
  # Set up KPCT processing
3512
- kpct_outprefix = "output_give_completeness"
3513
3521
  kpct_input_file = os.path.join(savedir, "ko_file_for_kpct.txt")
3514
3522
 
3515
3523
  # Check if KPCT output already exists
@@ -3594,7 +3602,7 @@ def _run_pipeline_core(
3594
3602
  try:
3595
3603
  validate(
3596
3604
  savedir=savedir,
3597
- mode="ko-matrix",
3605
+ mode="pipeline",
3598
3606
  calculate_complementarity=calculate_complementarity,
3599
3607
  kpct_outprefix=kpct_outprefix,
3600
3608
  strict=validate_strict,
@@ -4143,24 +4151,32 @@ def analyze_ko_matrix(
4143
4151
  # Generate final resource usage summary
4144
4152
  log_final_resource_summary(resource_log_file, start_time, logger, verbose)
4145
4153
 
4146
- # Display pipeline completion summary
4147
- display_pipeline_completion_summary(start_time, savedir, logger, verbose)
4148
-
4149
4154
  if run_validation:
4150
4155
  logger.info("Running post-run validation checks.")
4151
4156
  report_path = None
4152
4157
  if validation_report:
4153
4158
  report_path = os.path.join(savedir, "validation_report.json")
4154
- validate(
4155
- savedir=savedir,
4156
- mode="ko-matrix",
4157
- calculate_complementarity=calculate_complementarity,
4158
- kpct_outprefix=kpct_outprefix,
4159
- strict=validate_strict,
4160
- report=report_path,
4161
- verbose=verbose,
4162
- log_level=log_level,
4163
- )
4159
+ try:
4160
+ validate(
4161
+ savedir=savedir,
4162
+ mode="ko-matrix",
4163
+ calculate_complementarity=calculate_complementarity,
4164
+ kpct_outprefix=kpct_outprefix,
4165
+ strict=validate_strict,
4166
+ report=report_path,
4167
+ verbose=verbose,
4168
+ log_level=log_level,
4169
+ )
4170
+ except typer.Exit as exc:
4171
+ if logger:
4172
+ logger.error("Validation failed with exit code %s.", exc.exit_code)
4173
+ logger.error("Outputs written to: %s", savedir)
4174
+ if report_path:
4175
+ logger.error("Validation report: %s", report_path)
4176
+ raise
4177
+
4178
+ # Display pipeline completion summary
4179
+ display_pipeline_completion_summary(start_time, savedir, logger, verbose)
4164
4180
 
4165
4181
  except Exception as e:
4166
4182
  if logger:
@@ -4745,7 +4761,7 @@ def validate(
4745
4761
  )
4746
4762
 
4747
4763
  # Complementarity checks
4748
- comp_pattern = re.compile(r"module_completeness_complementarity_(\\d+)member\\.tsv$")
4764
+ comp_pattern = re.compile(r"module_completeness_complementarity_(\d+)member\.tsv$")
4749
4765
  comp_files: Dict[int, Path] = {}
4750
4766
  for file_path in Path(savedir).glob("module_completeness_complementarity_*member.tsv"):
4751
4767
  match = comp_pattern.match(file_path.name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moducomp
3
- Version: 0.7.13
3
+ Version: 0.7.16
4
4
  Summary: moducomp: metabolic module completeness and complementarity for microbiomes.
5
5
  Keywords: bioinformatics,microbiome,metabolic,kegg,genomics
6
6
  Author-email: "Juan C. Villada" <jvillada@lbl.gov>
@@ -62,6 +62,14 @@ pixi global install \
62
62
 
63
63
  `moducomp` needs the eggNOG-mapper database to run. The primary (recommended) way to download it is using the `download_eggnog_data.py` wrapper, which mirrors the upstream downloader behavior. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
64
64
 
65
+ ```bash
66
+ download_eggnog_data.py
67
+ ```
68
+
69
+ By default, the data are stored in `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog`, and `moducomp` will auto-detect that location without needing `EGGNOG_DATA_DIR`.
70
+
71
+ To use a custom location:
72
+
65
73
  ```bash
66
74
  export EGGNOG_DATA_DIR="/path/to/eggnog-data"
67
75
  download_eggnog_data.py --eggnog-data-dir "$EGGNOG_DATA_DIR"
@@ -69,8 +77,6 @@ download_eggnog_data.py --eggnog-data-dir "$EGGNOG_DATA_DIR"
69
77
  # moducomp download-eggnog-data --eggnog-data-dir "$EGGNOG_DATA_DIR"
70
78
  ```
71
79
 
72
- If `EGGNOG_DATA_DIR` is not set, the downloader defaults to `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog`.
73
-
74
80
  ### Quick test
75
81
 
76
82
  Small test data sets ship with `moducomp`. After installation you can confirm the pipeline by running:
@@ -147,7 +153,7 @@ This section lists all CLI options implemented today, along with their default v
147
153
  | `--calculate-complementarity`, `-c` | `0` | Complementarity size to compute (0 disables). |
148
154
  | `--adapt-headers/--no-adapt-headers` | `false` | Adapt FASTA headers to `genome|protein_N`. |
149
155
  | `--del-tmp/--keep-tmp` | `true` | Delete temporary files after completion. |
150
- | `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `fullmem` | Run eggNOG-mapper without `--dbmem` to reduce RAM. |
156
+ | `--lowmem/--fullmem` (`--low-mem/--full-mem`) | `lowmem` | Run eggNOG-mapper without `--dbmem` to reduce RAM. |
151
157
  | `--verbose/--quiet` | `false` | Enable verbose progress output. |
152
158
  | `--validate/--no-validate` | `validate` | Run post-run validation checks. |
153
159
  | `--validate-report/--no-validate-report` | `validate-report` | Write `validation_report.json` in the output directory. |
@@ -253,7 +259,7 @@ moducomp validate /path/to/output --strict
253
259
 
254
260
  ### ⚠️ Important note 2
255
261
 
256
- `moducomp` is specifically designed for large scale analysis of microbiomes with hundreds of members, and works on Linux systems with at least **64GB of RAM**. Nevertheless, it can be run on **smaller systems with less RAM, using the flag `--lowmem` (`--low-mem`) when running the `pipeline` command**. The `test` command uses low-memory mode by default and can be switched to full memory with `--fullmem` (`--full-mem`).
262
+ `moducomp` is specifically designed for large scale analysis of microbiomes with hundreds of members, and works on Linux systems with at least **64GB of RAM**. For robustness, **low-memory mode is now the default** for `pipeline` and `test`. If you have ample RAM and want full-memory mode, add `--fullmem` (`--full-mem`).
257
263
 
258
264
  ### Notes on bundled test data
259
265
 
@@ -290,7 +296,7 @@ moducomp pipeline \
290
296
  --ncpus <number_of_cpus_to_use> \
291
297
  --calculate-complementarity <N> # 0 to disable, 2 for 2-member, 3 for 3-member complementarity.
292
298
  # Optional flags:
293
- # --lowmem/--fullmem # Optional: Use low-mem if you have less than 64GB of RAM (default is full mem)
299
+ # --fullmem # Optional: Use full-mem if you have ample RAM (default is low-mem)
294
300
  # --adapt-headers # If your FASTA headers need modification
295
301
  # --del-tmp/--keep-tmp # Delete or keep temporary files
296
302
  # --eggnog-data-dir /path # If EGGNOG_DATA_DIR is not set
@@ -343,8 +349,11 @@ moducomp pipeline ./large_genome_collection ./output_large --ncpus 32 --calculat
343
349
  # For moderate datasets with verbose output
344
350
  moducomp analyze-ko-matrix ./ko_matrix.csv ./output_moderate --ncpus 16 --calculate-complementarity 2 --verbose
345
351
 
346
- # For systems with limited memory
347
- moducomp pipeline ./genomes ./output_lowmem --ncpus 8 --lowmem --calculate-complementarity 2
352
+ # For systems with limited memory (default behavior)
353
+ moducomp pipeline ./genomes ./output_lowmem --ncpus 8 --calculate-complementarity 2
354
+
355
+ # For systems with ample RAM
356
+ moducomp pipeline ./genomes ./output_fullmem --ncpus 8 --fullmem --calculate-complementarity 2
348
357
  ```
349
358
 
350
359
  ## Expected outputs
@@ -1,11 +1,11 @@
1
- moducomp/__init__.py,sha256=4JZhb_CRk2p0sYjkmp2gH__rAKsrR1CdZshT9XGbXEg,659
1
+ moducomp/__init__.py,sha256=cuhC_H9napqaM3ggu_0MSW9Aq99fUGDL8jKS2UL9FfE,659
2
2
  moducomp/__main__.py,sha256=1O2pv6IGjUgqnbqsiMLtVqjxWQpRtZUjp8LDljZ1bsI,185
3
- moducomp/moducomp.py,sha256=ntC6RHC1yb5Pa3WFFEASfkvG6z_fSc0q12PDbjGxsLQ,178488
3
+ moducomp/moducomp.py,sha256=z3-PBUK15nbthjbfMRU8R4Hkw2NMEbrNd2aZ8S2S0Uk,179334
4
4
  moducomp/data/test_genomes/IMG2562617132.faa,sha256=gZPh-08pMRdAWJRr3__TbnU1F68CdkDb3gxtpaCLTTc,356863
5
5
  moducomp/data/test_genomes/IMG2568526683.faa,sha256=PxFJwe-68UGw7il1hGlNhZt4-2WzzxXxGE1GTskDnow,343109
6
6
  moducomp/data/test_genomes/IMG2740892217.faa,sha256=WsId4sIPxENbqF6tYFouAgDCy6T0SXNY6TywxBNe-3E,548954
7
- moducomp-0.7.13.dist-info/entry_points.txt,sha256=oIUiacp53fiOCWDXpc2rpq5hS4wDdpasx24EXit3daE,181
8
- moducomp-0.7.13.dist-info/licenses/LICENSE.txt,sha256=pt0cfIq9Wop21KDZYyQgP0M1YWYvKG0PomA5cUDC4TI,1536
9
- moducomp-0.7.13.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
10
- moducomp-0.7.13.dist-info/METADATA,sha256=Aj9sjpYv4eFd1dbRJsyot2NKp_-RMi3x1K2SlXy9wYg,21520
11
- moducomp-0.7.13.dist-info/RECORD,,
7
+ moducomp-0.7.16.dist-info/entry_points.txt,sha256=oIUiacp53fiOCWDXpc2rpq5hS4wDdpasx24EXit3daE,181
8
+ moducomp-0.7.16.dist-info/licenses/LICENSE.txt,sha256=pt0cfIq9Wop21KDZYyQgP0M1YWYvKG0PomA5cUDC4TI,1536
9
+ moducomp-0.7.16.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
10
+ moducomp-0.7.16.dist-info/METADATA,sha256=nB3DHs6ZjL8rlxcjqAUnnf5LbhdAW7hfqU1usNmfBs8,21662
11
+ moducomp-0.7.16.dist-info/RECORD,,