moducomp 0.7.17__tar.gz → 0.7.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {moducomp-0.7.17 → moducomp-0.7.18}/PKG-INFO +15 -21
- {moducomp-0.7.17 → moducomp-0.7.18}/README.md +14 -20
- {moducomp-0.7.17 → moducomp-0.7.18}/moducomp/__init__.py +1 -1
- {moducomp-0.7.17 → moducomp-0.7.18}/moducomp/moducomp.py +155 -168
- {moducomp-0.7.17 → moducomp-0.7.18}/pyproject.toml +1 -6
- {moducomp-0.7.17 → moducomp-0.7.18}/recipe.yaml +2 -5
- moducomp-0.7.17/scripts/download_eggnog_data.py +0 -8
- {moducomp-0.7.17 → moducomp-0.7.18}/.gitignore +0 -0
- {moducomp-0.7.17 → moducomp-0.7.18}/LICENSE.txt +0 -0
- {moducomp-0.7.17 → moducomp-0.7.18}/moducomp/__main__.py +0 -0
- {moducomp-0.7.17 → moducomp-0.7.18}/moducomp/data/test_genomes/IMG2562617132.faa +0 -0
- {moducomp-0.7.17 → moducomp-0.7.18}/moducomp/data/test_genomes/IMG2568526683.faa +0 -0
- {moducomp-0.7.17 → moducomp-0.7.18}/moducomp/data/test_genomes/IMG2740892217.faa +0 -0
- {moducomp-0.7.17 → moducomp-0.7.18}/pixi.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: moducomp
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.18
|
|
4
4
|
Summary: moducomp: metabolic module completeness and complementarity for microbiomes.
|
|
5
5
|
Keywords: bioinformatics,microbiome,metabolic,kegg,genomics
|
|
6
6
|
Author-email: "Juan C. Villada" <jvillada@lbl.gov>
|
|
@@ -60,29 +60,28 @@ pixi global install \
|
|
|
60
60
|
|
|
61
61
|
## Setup data (required)
|
|
62
62
|
|
|
63
|
-
`moducomp` needs the eggNOG-mapper database to run.
|
|
63
|
+
`moducomp` needs the eggNOG-mapper database to run. Use `moducomp setup` to download the data with eggNOG-mapper's official downloader and record the location for future runs. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
|
|
64
64
|
|
|
65
65
|
```bash
|
|
66
|
-
|
|
66
|
+
moducomp setup
|
|
67
67
|
```
|
|
68
68
|
|
|
69
|
-
By default, the data are stored in `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog`, and `moducomp` will auto-detect that location without needing `EGGNOG_DATA_DIR`.
|
|
69
|
+
By default, the data are stored in `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog`, and `moducomp` will auto-detect that location on future runs without needing `EGGNOG_DATA_DIR`.
|
|
70
70
|
|
|
71
|
-
To use a custom location:
|
|
71
|
+
To use a custom location (or point to an existing download):
|
|
72
72
|
|
|
73
73
|
```bash
|
|
74
|
-
|
|
75
|
-
download_eggnog_data.py --eggnog-data-dir "$EGGNOG_DATA_DIR"
|
|
76
|
-
# equivalent:
|
|
77
|
-
# moducomp download-eggnog-data --eggnog-data-dir "$EGGNOG_DATA_DIR"
|
|
74
|
+
moducomp setup --eggnog-data-dir /path/to/eggnog-data
|
|
78
75
|
```
|
|
79
76
|
|
|
77
|
+
Add `--force` to re-download the data if the directory already exists. Use `--prompt` if you want to confirm each download interactively.
|
|
78
|
+
|
|
80
79
|
### Quick test
|
|
81
80
|
|
|
82
81
|
Small test data sets ship with `moducomp`. After installation you can confirm the pipeline by running:
|
|
83
82
|
|
|
84
83
|
```bash
|
|
85
|
-
moducomp test --ncpus 16 --calculate-complementarity 2
|
|
84
|
+
moducomp test --ncpus 16 --calculate-complementarity 2
|
|
86
85
|
```
|
|
87
86
|
|
|
88
87
|
The test command runs in low-memory mode by default. If you have plenty of RAM and want full-memory mode, add `--fullmem` (or `--full-mem`).
|
|
@@ -204,11 +203,13 @@ This section lists all CLI options implemented today, along with their default v
|
|
|
204
203
|
| `--verbose/--quiet` | `false` | Enable verbose progress output. |
|
|
205
204
|
| `--log-level`, `-l` | `INFO` | Logging level: `DEBUG`, `INFO`, `WARNING`, `ERROR`. |
|
|
206
205
|
|
|
207
|
-
#### `
|
|
206
|
+
#### `setup` command
|
|
208
207
|
|
|
209
208
|
| Option | Default | Description |
|
|
210
209
|
| --- | --- | --- |
|
|
211
210
|
| `--eggnog-data-dir` | `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog` | Destination for eggNOG-mapper data (sets `EGGNOG_DATA_DIR`). |
|
|
211
|
+
| `--yes/--prompt`, `-y` | `yes` | Automatically accept core database downloads (use `--prompt` to confirm each). |
|
|
212
|
+
| `--force`, `-f` | `false` | Re-download data even if files already exist. |
|
|
212
213
|
| `--log-level`, `-l` | `INFO` | Logging level: `DEBUG`, `INFO`, `WARNING`, `ERROR`. |
|
|
213
214
|
| `--verbose/--quiet` | `verbose` | Stream downloader output to the console. |
|
|
214
215
|
|
|
@@ -266,19 +267,12 @@ moducomp validate /path/to/output --strict
|
|
|
266
267
|
You can override the bundled data location with `MODUCOMP_DATA_DIR`.
|
|
267
268
|
When working from source, the bundled test genomes live at `moducomp/data/test_genomes`.
|
|
268
269
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
Pixi task (supports passing a custom location):
|
|
272
|
-
|
|
273
|
-
```bash
|
|
274
|
-
export EGGNOG_DATA_DIR=/path/to/eggnog-data
|
|
275
|
-
pixi run download-eggnog-data --eggnog-data-dir /path/to/eggnog-data
|
|
276
|
-
```
|
|
270
|
+
Use `moducomp setup` to download and register the eggNOG data directory. The location is stored in `${XDG_CONFIG_HOME:-~/.config}/moducomp/config.json` so future runs can find the data automatically.
|
|
277
271
|
|
|
278
|
-
|
|
272
|
+
From source (Pixi):
|
|
279
273
|
|
|
280
274
|
```bash
|
|
281
|
-
|
|
275
|
+
pixi run python -m moducomp setup --eggnog-data-dir /path/to/eggnog-data
|
|
282
276
|
```
|
|
283
277
|
|
|
284
278
|
### Running with your samples
|
|
@@ -35,29 +35,28 @@ pixi global install \
|
|
|
35
35
|
|
|
36
36
|
## Setup data (required)
|
|
37
37
|
|
|
38
|
-
`moducomp` needs the eggNOG-mapper database to run.
|
|
38
|
+
`moducomp` needs the eggNOG-mapper database to run. Use `moducomp setup` to download the data with eggNOG-mapper's official downloader and record the location for future runs. For upstream details, see the eggNOG-mapper setup guide: [eggNOG-mapper database setup](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.13#user-content-Setup).
|
|
39
39
|
|
|
40
40
|
```bash
|
|
41
|
-
|
|
41
|
+
moducomp setup
|
|
42
42
|
```
|
|
43
43
|
|
|
44
|
-
By default, the data are stored in `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog`, and `moducomp` will auto-detect that location without needing `EGGNOG_DATA_DIR`.
|
|
44
|
+
By default, the data are stored in `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog`, and `moducomp` will auto-detect that location on future runs without needing `EGGNOG_DATA_DIR`.
|
|
45
45
|
|
|
46
|
-
To use a custom location:
|
|
46
|
+
To use a custom location (or point to an existing download):
|
|
47
47
|
|
|
48
48
|
```bash
|
|
49
|
-
|
|
50
|
-
download_eggnog_data.py --eggnog-data-dir "$EGGNOG_DATA_DIR"
|
|
51
|
-
# equivalent:
|
|
52
|
-
# moducomp download-eggnog-data --eggnog-data-dir "$EGGNOG_DATA_DIR"
|
|
49
|
+
moducomp setup --eggnog-data-dir /path/to/eggnog-data
|
|
53
50
|
```
|
|
54
51
|
|
|
52
|
+
Add `--force` to re-download the data if the directory already exists. Use `--prompt` if you want to confirm each download interactively.
|
|
53
|
+
|
|
55
54
|
### Quick test
|
|
56
55
|
|
|
57
56
|
Small test data sets ship with `moducomp`. After installation you can confirm the pipeline by running:
|
|
58
57
|
|
|
59
58
|
```bash
|
|
60
|
-
moducomp test --ncpus 16 --calculate-complementarity 2
|
|
59
|
+
moducomp test --ncpus 16 --calculate-complementarity 2
|
|
61
60
|
```
|
|
62
61
|
|
|
63
62
|
The test command runs in low-memory mode by default. If you have plenty of RAM and want full-memory mode, add `--fullmem` (or `--full-mem`).
|
|
@@ -179,11 +178,13 @@ This section lists all CLI options implemented today, along with their default v
|
|
|
179
178
|
| `--verbose/--quiet` | `false` | Enable verbose progress output. |
|
|
180
179
|
| `--log-level`, `-l` | `INFO` | Logging level: `DEBUG`, `INFO`, `WARNING`, `ERROR`. |
|
|
181
180
|
|
|
182
|
-
#### `
|
|
181
|
+
#### `setup` command
|
|
183
182
|
|
|
184
183
|
| Option | Default | Description |
|
|
185
184
|
| --- | --- | --- |
|
|
186
185
|
| `--eggnog-data-dir` | `${XDG_DATA_HOME:-~/.local/share}/moducomp/eggnog` | Destination for eggNOG-mapper data (sets `EGGNOG_DATA_DIR`). |
|
|
186
|
+
| `--yes/--prompt`, `-y` | `yes` | Automatically accept core database downloads (use `--prompt` to confirm each). |
|
|
187
|
+
| `--force`, `-f` | `false` | Re-download data even if files already exist. |
|
|
187
188
|
| `--log-level`, `-l` | `INFO` | Logging level: `DEBUG`, `INFO`, `WARNING`, `ERROR`. |
|
|
188
189
|
| `--verbose/--quiet` | `verbose` | Stream downloader output to the console. |
|
|
189
190
|
|
|
@@ -241,19 +242,12 @@ moducomp validate /path/to/output --strict
|
|
|
241
242
|
You can override the bundled data location with `MODUCOMP_DATA_DIR`.
|
|
242
243
|
When working from source, the bundled test genomes live at `moducomp/data/test_genomes`.
|
|
243
244
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
Pixi task (supports passing a custom location):
|
|
247
|
-
|
|
248
|
-
```bash
|
|
249
|
-
export EGGNOG_DATA_DIR=/path/to/eggnog-data
|
|
250
|
-
pixi run download-eggnog-data --eggnog-data-dir /path/to/eggnog-data
|
|
251
|
-
```
|
|
245
|
+
Use `moducomp setup` to download and register the eggNOG data directory. The location is stored in `${XDG_CONFIG_HOME:-~/.config}/moducomp/config.json` so future runs can find the data automatically.
|
|
252
246
|
|
|
253
|
-
|
|
247
|
+
From source (Pixi):
|
|
254
248
|
|
|
255
249
|
```bash
|
|
256
|
-
|
|
250
|
+
pixi run python -m moducomp setup --eggnog-data-dir /path/to/eggnog-data
|
|
257
251
|
```
|
|
258
252
|
|
|
259
253
|
### Running with your samples
|
|
@@ -147,41 +147,48 @@ def require_eggnog_data_dir(eggnog_data_dir: Optional[str], logger: Optional[log
|
|
|
147
147
|
|
|
148
148
|
env_value = os.environ.get("EGGNOG_DATA_DIR", "").strip()
|
|
149
149
|
if not env_value:
|
|
150
|
-
|
|
151
|
-
if
|
|
152
|
-
os.environ["EGGNOG_DATA_DIR"] = str(
|
|
153
|
-
env_value = str(
|
|
150
|
+
configured = _get_configured_eggnog_dir(logger)
|
|
151
|
+
if configured:
|
|
152
|
+
os.environ["EGGNOG_DATA_DIR"] = str(configured)
|
|
153
|
+
env_value = str(configured)
|
|
154
154
|
if logger:
|
|
155
|
-
logger.info("EGGNOG_DATA_DIR not set; using
|
|
155
|
+
logger.info("EGGNOG_DATA_DIR not set; using configured %s", env_value)
|
|
156
156
|
else:
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
"
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
157
|
+
default_dir = default_eggnog_data_dir()
|
|
158
|
+
if default_dir.exists() and default_dir.is_dir() and _has_eggnog_core_files(default_dir):
|
|
159
|
+
os.environ["EGGNOG_DATA_DIR"] = str(default_dir)
|
|
160
|
+
env_value = str(default_dir)
|
|
161
|
+
if logger:
|
|
162
|
+
logger.info("EGGNOG_DATA_DIR not set; using default %s", env_value)
|
|
163
|
+
else:
|
|
164
|
+
message = (
|
|
165
|
+
"EGGNOG_DATA_DIR is required to run eggNOG-mapper. "
|
|
166
|
+
"Run `moducomp setup` or pass --eggnog-data-dir. "
|
|
167
|
+
f"Default location is {default_dir}."
|
|
168
|
+
)
|
|
169
|
+
emit_error(message, logger)
|
|
170
|
+
raise typer.Exit(1)
|
|
165
171
|
|
|
166
172
|
data_dir = Path(env_value).expanduser().resolve()
|
|
167
173
|
if not data_dir.exists() or not data_dir.is_dir():
|
|
168
174
|
message = (
|
|
169
175
|
f"EGGNOG_DATA_DIR is not a valid directory: {data_dir}. "
|
|
170
|
-
"
|
|
176
|
+
"Run `moducomp setup` to download the data."
|
|
171
177
|
)
|
|
172
178
|
emit_error(message, logger)
|
|
173
179
|
raise typer.Exit(1)
|
|
174
180
|
|
|
175
|
-
if not
|
|
181
|
+
if not _has_eggnog_core_files(data_dir):
|
|
176
182
|
message = (
|
|
177
|
-
f"EGGNOG_DATA_DIR
|
|
178
|
-
"
|
|
183
|
+
f"EGGNOG_DATA_DIR is missing required eggNOG files: {data_dir}. "
|
|
184
|
+
"Run `moducomp setup` to download the data."
|
|
179
185
|
)
|
|
180
186
|
emit_error(message, logger)
|
|
181
187
|
raise typer.Exit(1)
|
|
182
188
|
|
|
183
189
|
if logger:
|
|
184
190
|
logger.info("Using EGGNOG_DATA_DIR: %s", data_dir)
|
|
191
|
+
_set_configured_eggnog_dir(data_dir, logger)
|
|
185
192
|
return data_dir
|
|
186
193
|
def conditional_output(message: str, color: str = "white", verbose: bool = True) -> None:
|
|
187
194
|
"""
|
|
@@ -346,6 +353,84 @@ def default_eggnog_data_dir() -> Path:
|
|
|
346
353
|
base = Path(xdg_home).expanduser() if xdg_home else Path.home() / ".local" / "share"
|
|
347
354
|
return base / "moducomp" / "eggnog"
|
|
348
355
|
|
|
356
|
+
|
|
357
|
+
def _config_dir() -> Path:
|
|
358
|
+
xdg_home = os.environ.get("XDG_CONFIG_HOME")
|
|
359
|
+
base = Path(xdg_home).expanduser() if xdg_home else Path.home() / ".config"
|
|
360
|
+
return base / "moducomp"
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _config_path() -> Path:
|
|
364
|
+
return _config_dir() / "config.json"
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _load_config(logger: Optional[logging.Logger] = None) -> Dict[str, Any]:
|
|
368
|
+
path = _config_path()
|
|
369
|
+
if not path.exists():
|
|
370
|
+
return {}
|
|
371
|
+
try:
|
|
372
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
373
|
+
data = json.load(handle)
|
|
374
|
+
if isinstance(data, dict):
|
|
375
|
+
return data
|
|
376
|
+
except Exception as exc:
|
|
377
|
+
if logger:
|
|
378
|
+
logger.warning("Failed to read config %s: %s", path, exc)
|
|
379
|
+
return {}
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _save_config(data: Dict[str, Any], logger: Optional[logging.Logger] = None) -> None:
|
|
383
|
+
path = _config_path()
|
|
384
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
385
|
+
try:
|
|
386
|
+
with path.open("w", encoding="utf-8") as handle:
|
|
387
|
+
json.dump(data, handle, indent=2, sort_keys=True)
|
|
388
|
+
except Exception as exc:
|
|
389
|
+
if logger:
|
|
390
|
+
logger.warning("Failed to write config %s: %s", path, exc)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def _has_eggnog_core_files(data_dir: Path) -> bool:
|
|
394
|
+
required = ("eggnog.db", "eggnog.taxa.db", "eggnog_proteins.dmnd")
|
|
395
|
+
return all((data_dir / name).exists() for name in required)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _get_configured_eggnog_dir(logger: Optional[logging.Logger] = None) -> Optional[Path]:
|
|
399
|
+
data = _load_config(logger)
|
|
400
|
+
value = data.get("eggnog_data_dir")
|
|
401
|
+
if not value:
|
|
402
|
+
return None
|
|
403
|
+
path = Path(value).expanduser().resolve()
|
|
404
|
+
if not path.exists() or not path.is_dir():
|
|
405
|
+
if logger:
|
|
406
|
+
logger.warning("Configured eggNOG data dir is invalid: %s", path)
|
|
407
|
+
return None
|
|
408
|
+
if not _has_eggnog_core_files(path):
|
|
409
|
+
if logger:
|
|
410
|
+
logger.warning("Configured eggNOG data dir is missing required files: %s", path)
|
|
411
|
+
return None
|
|
412
|
+
return path
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _set_configured_eggnog_dir(path: Path, logger: Optional[logging.Logger] = None) -> None:
|
|
416
|
+
data = _load_config(logger)
|
|
417
|
+
data["eggnog_data_dir"] = str(path)
|
|
418
|
+
_save_config(data, logger)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def _find_eggnog_downloader() -> Optional[str]:
|
|
422
|
+
for name in ("download_eggnog_data.py", "download_eggnog_data"):
|
|
423
|
+
path = shutil.which(name)
|
|
424
|
+
if path:
|
|
425
|
+
try:
|
|
426
|
+
wrapper_text = Path(path).read_text(encoding="utf-8", errors="ignore")
|
|
427
|
+
except OSError:
|
|
428
|
+
wrapper_text = ""
|
|
429
|
+
if "moducomp.moducomp" in wrapper_text:
|
|
430
|
+
continue
|
|
431
|
+
return path
|
|
432
|
+
return None
|
|
433
|
+
|
|
349
434
|
def run_subprocess_with_logging(
|
|
350
435
|
cmd: List[str],
|
|
351
436
|
logger: Optional[logging.Logger] = None,
|
|
@@ -3732,11 +3817,23 @@ def test(
|
|
|
3732
3817
|
|
|
3733
3818
|
|
|
3734
3819
|
@app.command()
|
|
3735
|
-
def
|
|
3820
|
+
def setup(
|
|
3736
3821
|
eggnog_data_dir: Optional[str] = typer.Option(
|
|
3737
3822
|
None,
|
|
3738
3823
|
"--eggnog-data-dir",
|
|
3739
|
-
help="Destination directory for eggNOG-mapper data (sets EGGNOG_DATA_DIR).",
|
|
3824
|
+
help="Destination directory for eggNOG-mapper data (sets EGGNOG_DATA_DIR). Can point to an existing download.",
|
|
3825
|
+
),
|
|
3826
|
+
yes: bool = typer.Option(
|
|
3827
|
+
True,
|
|
3828
|
+
"--yes/--prompt",
|
|
3829
|
+
"-y",
|
|
3830
|
+
help="Automatically accept core database downloads (use --prompt to confirm each).",
|
|
3831
|
+
),
|
|
3832
|
+
force: bool = typer.Option(
|
|
3833
|
+
False,
|
|
3834
|
+
"--force",
|
|
3835
|
+
"-f",
|
|
3836
|
+
help="Re-download data even if files already exist.",
|
|
3740
3837
|
),
|
|
3741
3838
|
log_level: str = typer.Option(
|
|
3742
3839
|
"INFO",
|
|
@@ -3750,166 +3847,56 @@ def download_eggnog_data(
|
|
|
3750
3847
|
help="Stream downloader output to the console.",
|
|
3751
3848
|
),
|
|
3752
3849
|
) -> None:
|
|
3753
|
-
"""Download eggNOG-mapper data
|
|
3754
|
-
if eggnog_data_dir
|
|
3755
|
-
|
|
3850
|
+
"""Download eggNOG-mapper data (via download_eggnog_data.py) and persist the location for future runs."""
|
|
3851
|
+
target_dir = Path(eggnog_data_dir).expanduser().resolve() if eggnog_data_dir else default_eggnog_data_dir()
|
|
3852
|
+
target_dir = target_dir.expanduser().resolve()
|
|
3756
3853
|
|
|
3757
3854
|
log_dir = Path.cwd() / "logs"
|
|
3758
3855
|
logger = configure_logging(log_level, log_dir)
|
|
3759
|
-
logger.info("Starting
|
|
3856
|
+
logger.info("Starting moducomp setup.")
|
|
3760
3857
|
logger.info("CLI command: %s", " ".join(shlex.quote(arg) for arg in sys.argv))
|
|
3761
3858
|
|
|
3762
|
-
|
|
3763
|
-
|
|
3764
|
-
|
|
3765
|
-
os.environ["EGGNOG_DATA_DIR"] = str(default_dir)
|
|
3766
|
-
env_value = str(default_dir)
|
|
3767
|
-
logger.info("EGGNOG_DATA_DIR not set; using default %s", env_value)
|
|
3859
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
|
3860
|
+
os.environ["EGGNOG_DATA_DIR"] = str(target_dir)
|
|
3861
|
+
logger.info("Using EGGNOG_DATA_DIR: %s", target_dir)
|
|
3768
3862
|
|
|
3769
|
-
|
|
3770
|
-
|
|
3771
|
-
|
|
3863
|
+
if _has_eggnog_core_files(target_dir) and not force:
|
|
3864
|
+
logger.info("eggNOG data already present; skipping download.")
|
|
3865
|
+
else:
|
|
3866
|
+
downloader = _find_eggnog_downloader()
|
|
3867
|
+
if downloader is None:
|
|
3868
|
+
message = (
|
|
3869
|
+
"download_eggnog_data.py not found in PATH. "
|
|
3870
|
+
"Ensure eggnog-mapper is installed."
|
|
3871
|
+
)
|
|
3872
|
+
emit_error(message, logger)
|
|
3873
|
+
raise typer.Exit(1)
|
|
3772
3874
|
|
|
3773
|
-
|
|
3774
|
-
|
|
3775
|
-
|
|
3776
|
-
if
|
|
3777
|
-
|
|
3875
|
+
cmd = [downloader, "--data_dir", str(target_dir)]
|
|
3876
|
+
if yes:
|
|
3877
|
+
cmd.append("-y")
|
|
3878
|
+
if force:
|
|
3879
|
+
cmd.append("-f")
|
|
3880
|
+
if not verbose:
|
|
3881
|
+
cmd.append("-q")
|
|
3882
|
+
|
|
3883
|
+
logger.info("Running eggNOG-mapper downloader: %s", " ".join(shlex.quote(arg) for arg in cmd))
|
|
3778
3884
|
try:
|
|
3779
|
-
|
|
3780
|
-
except
|
|
3781
|
-
|
|
3782
|
-
|
|
3783
|
-
|
|
3784
|
-
|
|
3785
|
-
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
|
|
3789
|
-
"Ensure eggnog-mapper is installed."
|
|
3885
|
+
subprocess.run(cmd, check=True)
|
|
3886
|
+
except subprocess.CalledProcessError as exc:
|
|
3887
|
+
emit_error(f"eggNOG-mapper downloader failed with exit code {exc.returncode}", logger)
|
|
3888
|
+
raise typer.Exit(exc.returncode)
|
|
3889
|
+
|
|
3890
|
+
if not _has_eggnog_core_files(target_dir):
|
|
3891
|
+
emit_error(
|
|
3892
|
+
f"eggNOG data download incomplete in {target_dir}. "
|
|
3893
|
+
"Re-run `moducomp setup` and ensure the core databases are downloaded.",
|
|
3894
|
+
logger,
|
|
3790
3895
|
)
|
|
3791
|
-
emit_error(message, logger)
|
|
3792
3896
|
raise typer.Exit(1)
|
|
3793
3897
|
|
|
3794
|
-
|
|
3795
|
-
|
|
3796
|
-
logger.info("Downloading eggNOG data: %s", downloader)
|
|
3797
|
-
|
|
3798
|
-
start_time = time.time()
|
|
3799
|
-
last_progress_time = start_time
|
|
3800
|
-
last_size = get_dir_size(data_dir)
|
|
3801
|
-
last_files = count_files(data_dir)
|
|
3802
|
-
progress_interval = 60
|
|
3803
|
-
|
|
3804
|
-
process = subprocess.Popen(
|
|
3805
|
-
cmd,
|
|
3806
|
-
stdout=subprocess.PIPE,
|
|
3807
|
-
stderr=subprocess.PIPE,
|
|
3808
|
-
text=True,
|
|
3809
|
-
bufsize=1,
|
|
3810
|
-
universal_newlines=True,
|
|
3811
|
-
)
|
|
3812
|
-
|
|
3813
|
-
stdout_queue: "queue.Queue[Tuple[str, str]]" = queue.Queue()
|
|
3814
|
-
stderr_queue: "queue.Queue[Tuple[str, str]]" = queue.Queue()
|
|
3815
|
-
|
|
3816
|
-
def stream_reader(stream, q, stream_type):
|
|
3817
|
-
try:
|
|
3818
|
-
for line in iter(stream.readline, ""):
|
|
3819
|
-
q.put((stream_type, line.rstrip("\n\r")))
|
|
3820
|
-
finally:
|
|
3821
|
-
try:
|
|
3822
|
-
stream.close()
|
|
3823
|
-
except Exception:
|
|
3824
|
-
pass
|
|
3825
|
-
|
|
3826
|
-
stdout_thread = threading.Thread(
|
|
3827
|
-
target=stream_reader,
|
|
3828
|
-
args=(process.stdout, stdout_queue, "stdout"),
|
|
3829
|
-
daemon=True,
|
|
3830
|
-
)
|
|
3831
|
-
stderr_thread = threading.Thread(
|
|
3832
|
-
target=stream_reader,
|
|
3833
|
-
args=(process.stderr, stderr_queue, "stderr"),
|
|
3834
|
-
daemon=True,
|
|
3835
|
-
)
|
|
3836
|
-
stdout_thread.start()
|
|
3837
|
-
stderr_thread.start()
|
|
3838
|
-
|
|
3839
|
-
output_level = logging.INFO if verbose else logging.DEBUG
|
|
3840
|
-
error_level = logging.WARNING if verbose else logging.DEBUG
|
|
3841
|
-
|
|
3842
|
-
while process.poll() is None or not stdout_queue.empty() or not stderr_queue.empty():
|
|
3843
|
-
now = time.time()
|
|
3844
|
-
|
|
3845
|
-
# Drain stdout
|
|
3846
|
-
try:
|
|
3847
|
-
while True:
|
|
3848
|
-
stream_type, line = stdout_queue.get_nowait()
|
|
3849
|
-
if line:
|
|
3850
|
-
if logger:
|
|
3851
|
-
_log_lines(logger, line, output_level)
|
|
3852
|
-
except queue.Empty:
|
|
3853
|
-
pass
|
|
3854
|
-
|
|
3855
|
-
# Drain stderr
|
|
3856
|
-
try:
|
|
3857
|
-
while True:
|
|
3858
|
-
stream_type, line = stderr_queue.get_nowait()
|
|
3859
|
-
if line:
|
|
3860
|
-
if logger:
|
|
3861
|
-
_log_lines(logger, line, error_level)
|
|
3862
|
-
except queue.Empty:
|
|
3863
|
-
pass
|
|
3864
|
-
|
|
3865
|
-
if now - last_progress_time >= progress_interval:
|
|
3866
|
-
try:
|
|
3867
|
-
current_size = get_dir_size(data_dir)
|
|
3868
|
-
current_files = count_files(data_dir)
|
|
3869
|
-
delta = current_size - last_size
|
|
3870
|
-
elapsed = now - last_progress_time
|
|
3871
|
-
speed = delta / elapsed if elapsed > 0 else 0.0
|
|
3872
|
-
file_delta = current_files - last_files
|
|
3873
|
-
msg = (
|
|
3874
|
-
f"Download progress: {format_bytes(current_size)} total "
|
|
3875
|
-
f"(+{format_bytes(delta)} in {int(elapsed)}s, "
|
|
3876
|
-
f"{format_bytes(speed)}/s, +{file_delta} files)"
|
|
3877
|
-
)
|
|
3878
|
-
logger.info(msg)
|
|
3879
|
-
last_size = current_size
|
|
3880
|
-
last_files = current_files
|
|
3881
|
-
last_progress_time = now
|
|
3882
|
-
except Exception as exc:
|
|
3883
|
-
logger.warning("Progress check failed: %s", exc)
|
|
3884
|
-
last_progress_time = now
|
|
3885
|
-
|
|
3886
|
-
time.sleep(0.2)
|
|
3887
|
-
|
|
3888
|
-
stdout_thread.join(timeout=1.0)
|
|
3889
|
-
stderr_thread.join(timeout=1.0)
|
|
3890
|
-
|
|
3891
|
-
returncode = process.returncode
|
|
3892
|
-
total_size = get_dir_size(data_dir)
|
|
3893
|
-
total_files = count_files(data_dir)
|
|
3894
|
-
total_elapsed = time.time() - start_time
|
|
3895
|
-
summary = (
|
|
3896
|
-
f"Download finished: {format_bytes(total_size)} in {int(total_elapsed)}s "
|
|
3897
|
-
f"across {total_files} files"
|
|
3898
|
-
)
|
|
3899
|
-
logger.info(summary)
|
|
3900
|
-
|
|
3901
|
-
if returncode != 0:
|
|
3902
|
-
raise typer.Exit(returncode)
|
|
3903
|
-
|
|
3904
|
-
|
|
3905
|
-
def download_eggnog_data_cli() -> None:
|
|
3906
|
-
"""Entry point for download-eggnog-data/download_eggnog_data.py scripts."""
|
|
3907
|
-
prog = Path(sys.argv[0]).name if sys.argv else "download-eggnog-data"
|
|
3908
|
-
app(
|
|
3909
|
-
prog_name=prog,
|
|
3910
|
-
args=["download-eggnog-data", *sys.argv[1:]],
|
|
3911
|
-
)
|
|
3912
|
-
|
|
3898
|
+
_set_configured_eggnog_dir(target_dir, logger)
|
|
3899
|
+
logger.info("Saved eggNOG data location for future runs: %s", target_dir)
|
|
3913
3900
|
|
|
3914
3901
|
|
|
3915
3902
|
@app.command()
|
|
@@ -28,8 +28,6 @@ dynamic = ["version", "description"]
|
|
|
28
28
|
|
|
29
29
|
[project.scripts]
|
|
30
30
|
moducomp = "moducomp.moducomp:app"
|
|
31
|
-
download-eggnog-data = "moducomp.moducomp:download_eggnog_data_cli"
|
|
32
|
-
"download_eggnog_data.py" = "moducomp.moducomp:download_eggnog_data_cli"
|
|
33
31
|
|
|
34
32
|
[project.urls]
|
|
35
33
|
Homepage = "https://github.com/NeLLi-team/moducomp"
|
|
@@ -55,9 +53,6 @@ kegg-pathways-completeness = "*"
|
|
|
55
53
|
[tool.pixi.tasks]
|
|
56
54
|
moducomp = "python -m moducomp --help"
|
|
57
55
|
test = "python -m moducomp test --ncpus 2 --calculate-complementarity 2"
|
|
58
|
-
|
|
56
|
+
setup = "python -m moducomp setup"
|
|
59
57
|
pipeline = "python -m moducomp pipeline --help"
|
|
60
58
|
analyze_ko_matrix = "python -m moducomp analyze-ko-matrix --help"
|
|
61
|
-
|
|
62
|
-
[tool.flit.sdist]
|
|
63
|
-
include = ["scripts/download_eggnog_data.py"]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
context:
|
|
2
|
-
version: 0.7.
|
|
2
|
+
version: 0.7.18
|
|
3
3
|
|
|
4
4
|
package:
|
|
5
5
|
name: moducomp
|
|
@@ -7,17 +7,14 @@ package:
|
|
|
7
7
|
|
|
8
8
|
source:
|
|
9
9
|
- url: https://pypi.org/packages/source/m/moducomp/moducomp-${{ version }}.tar.gz
|
|
10
|
-
sha256:
|
|
10
|
+
sha256: c2780812e19805007dbb85edb7fe5ca6e2d7f971c13114e5eb9ab9780cc48a28
|
|
11
11
|
|
|
12
12
|
build:
|
|
13
13
|
script:
|
|
14
14
|
- ${{ PYTHON }} -m pip install .
|
|
15
|
-
- install -m 0755 scripts/download_eggnog_data.py ${{ PREFIX }}/bin/download_eggnog_data.py
|
|
16
15
|
python:
|
|
17
16
|
entry_points:
|
|
18
17
|
- moducomp = moducomp.moducomp:app
|
|
19
|
-
- download-eggnog-data = moducomp.moducomp:download_eggnog_data_cli
|
|
20
|
-
- download_eggnog_data.py = moducomp.moducomp:download_eggnog_data_cli
|
|
21
18
|
noarch: python
|
|
22
19
|
|
|
23
20
|
requirements:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|