bactopia 2.1.5__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bactopia-2.1.5 → bactopia-2.2.0}/PKG-INFO +8 -7
- {bactopia-2.1.5 → bactopia-2.2.0}/README.md +5 -5
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/common.py +1 -1
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/search.py +83 -14
- bactopia-2.2.0/bactopia/databases/ena.py +118 -0
- bactopia-2.2.0/bactopia/databases/sra.py +103 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/lint/docs.py +1 -1
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/qc.py +10 -8
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/bactopia/llms.txt.j2 +1 -1
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/nextflow/nextflow.config.j2 +3 -3
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/module/tests/nextflow.config.j2 +1 -1
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/subworkflow/tests/nextflow.config.j2 +2 -2
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/workflow/nextflow.config.j2 +4 -4
- {bactopia-2.1.5 → bactopia-2.2.0}/pyproject.toml +3 -2
- bactopia-2.1.5/bactopia/databases/ena.py +0 -86
- {bactopia-2.1.5 → bactopia-2.2.0}/LICENSE +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/atb.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/atb/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/atb/atb_downloader.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/atb/atb_formatter.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/catalog.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/citations.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/datasets.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/docs.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/download.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/helpers/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/helpers/merge_schemas.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/lint.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pipeline/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pipeline/bracken_to_excel.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pipeline/check_assembly_accession.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pipeline/check_fastqs.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pipeline/cleanup_coverage.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pipeline/kraken_bracken_summary.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pipeline/mask_consensus.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pipeline/scrubber_summary.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pipeline/teton_prepare.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/prepare.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/prune.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pubmlst/build.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/pubmlst/setup.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/review.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/scaffold.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/status.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/summary.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/sysinfo.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/testing.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/update.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/cli/workflows.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/conda.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/databases/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/databases/ncbi.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/databases/pubmlst/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/databases/pubmlst/constants.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/databases/pubmlst/utils.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/lint/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/lint/citations.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/lint/models.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/lint/rules/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/lint/rules/module_rules.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/lint/rules/subworkflow_rules.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/lint/rules/workflow_rules.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/lint/runner.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/nf.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/outputs.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parse.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/amrfinderplus.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/annotator.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/ariba.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/assembler.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/blast.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/citations.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/coverage.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/error.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/gather.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/generic.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/kraken.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/mapping.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/mlst.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/nextflow.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/parsables.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/sketcher.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/variants.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/versions.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/parsers/workflows.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/reports/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/reports/templates/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/scaffold.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/summary.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/__init__.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/logos.py +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/nextflow/params.config.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/nextflow/process.config.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/module/main.nf.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/module/module.config.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/module/schema.json.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/module/tests/main.nf.test.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/module/tests/nf-test.config.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/subworkflow/main.nf.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/subworkflow/tests/main.nf.test.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/subworkflow/tests/nf-test.config.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/subworkflow/tests/nftignore.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/workflow/main.nf.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/workflow/tests/main.nf.test.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/workflow/tests/nf-test.config.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/workflow/tests/nftignore.j2 +0 -0
- {bactopia-2.1.5 → bactopia-2.2.0}/bactopia/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bactopia
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: A Python package for working with Bactopia
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -20,20 +20,21 @@ Requires-Dist: jinja2 (>=3.1.6)
|
|
|
20
20
|
Requires-Dist: openpyxl (>=3.1.0)
|
|
21
21
|
Requires-Dist: pandas (>=2.2.0)
|
|
22
22
|
Requires-Dist: psutil (>=5.9.0)
|
|
23
|
+
Requires-Dist: pysradb (>=2.2.0)
|
|
23
24
|
Requires-Dist: pyyaml (>=6.0)
|
|
24
25
|
Requires-Dist: rauth (>=0.7.3)
|
|
25
26
|
Requires-Dist: requests (>=2.28.2)
|
|
26
27
|
Requires-Dist: rich (>=13.3.1)
|
|
27
28
|
Requires-Dist: rich-click (>=1.6.1)
|
|
28
29
|
Requires-Dist: tqdm (>=4.66.5)
|
|
29
|
-
Project-URL: Homepage, https://bactopia.
|
|
30
|
+
Project-URL: Homepage, https://bactopia.io/
|
|
30
31
|
Project-URL: Repository, https://github.com/bactopia/bactopia-py
|
|
31
32
|
Description-Content-Type: text/markdown
|
|
32
33
|
|
|
33
34
|

|
|
34
35
|
|
|
35
36
|
# bactopia-py
|
|
36
|
-
A Python package for working with [Bactopia](https://bactopia.
|
|
37
|
+
A Python package for working with [Bactopia](https://bactopia.io/)
|
|
37
38
|
|
|
38
39
|
## Bactopia Subcommands
|
|
39
40
|
|
|
@@ -312,7 +313,7 @@ of nearly 2,000,000 bacterial genomes. Using available FASTQ files from the Euro
|
|
|
312
313
|
Archive (ENA) and Sequence Read Archive (SRA), the genomes were assembled using [Shovill] and made
|
|
313
314
|
publicly available from the [Iqbal Lab](https://github.com/iqbal-lab-org/AllTheBacteria).
|
|
314
315
|
|
|
315
|
-
To make it easy to utilize [Bactopia Tools](https://bactopia.
|
|
316
|
+
To make it easy to utilize [Bactopia Tools](https://bactopia.io/bactopia-tools/) with
|
|
316
317
|
assemblies from AllTheBacteria, `bactopia-atb-formatter` was created. This tool will create a
|
|
317
318
|
directory structure that resembles output from an actual Bactopia run.
|
|
318
319
|
|
|
@@ -350,7 +351,7 @@ directory structure that resembles output from an actual Bactopia run.
|
|
|
350
351
|
To demonstrate the usage of `bactopia-atb-formatter`, we will use assemblies for
|
|
351
352
|
_Legionella pneumophila_. The following steps will download the assemblies, build the
|
|
352
353
|
Bactopia directory structure, and then run [legsta](https://github.com/tseemann/legsta)
|
|
353
|
-
via the [Bactopia Tool](https://bactopia.
|
|
354
|
+
via the [Bactopia Tool](https://bactopia.io/bactopia-tools/legsta/).
|
|
354
355
|
|
|
355
356
|
#### Download the Assemblies
|
|
356
357
|
|
|
@@ -394,7 +395,7 @@ created for 5,393 assemblies and is ready for use with Bactopia Tools.
|
|
|
394
395
|
|
|
395
396
|
As mentioned above, we will use [legsta](https://github.com/tseemann/legsta) to analyze each
|
|
396
397
|
of the _Legionella pneumophila_ assemblies. To do this, we will use the
|
|
397
|
-
[legsta Bactopia Tool](https://bactopia.
|
|
398
|
+
[legsta Bactopia Tool](https://bactopia.io/bactopia-tools/legsta/).
|
|
398
399
|
|
|
399
400
|
```{bash}
|
|
400
401
|
# Run legsta (please utilize Docker or Singularity only for reproducibility)
|
|
@@ -431,7 +432,7 @@ CPU hours : 5.2
|
|
|
431
432
|
Succeeded : 5'395
|
|
432
433
|
```
|
|
433
434
|
|
|
434
|
-
That's it! Now you can take advantage of any of the [Bactopia Tools](https://bactopia.
|
|
435
|
+
That's it! Now you can take advantage of any of the [Bactopia Tools](https://bactopia.io/bactopia-tools/)
|
|
435
436
|
that utilize assemblies as inputs.
|
|
436
437
|
|
|
437
438
|
# PubMLST DB Builds
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|

|
|
2
2
|
|
|
3
3
|
# bactopia-py
|
|
4
|
-
A Python package for working with [Bactopia](https://bactopia.
|
|
4
|
+
A Python package for working with [Bactopia](https://bactopia.io/)
|
|
5
5
|
|
|
6
6
|
## Bactopia Subcommands
|
|
7
7
|
|
|
@@ -280,7 +280,7 @@ of nearly 2,000,000 bacterial genomes. Using available FASTQ files from the Euro
|
|
|
280
280
|
Archive (ENA) and Sequence Read Archive (SRA), the genomes were assembled using [Shovill] and made
|
|
281
281
|
publicly available from the [Iqbal Lab](https://github.com/iqbal-lab-org/AllTheBacteria).
|
|
282
282
|
|
|
283
|
-
To make it easy to utilize [Bactopia Tools](https://bactopia.
|
|
283
|
+
To make it easy to utilize [Bactopia Tools](https://bactopia.io/bactopia-tools/) with
|
|
284
284
|
assemblies from AllTheBacteria, `bactopia-atb-formatter` was created. This tool will create a
|
|
285
285
|
directory structure that resembles output from an actual Bactopia run.
|
|
286
286
|
|
|
@@ -318,7 +318,7 @@ directory structure that resembles output from an actual Bactopia run.
|
|
|
318
318
|
To demonstrate the usage of `bactopia-atb-formatter`, we will use assemblies for
|
|
319
319
|
_Legionella pneumophila_. The following steps will download the assemblies, build the
|
|
320
320
|
Bactopia directory structure, and then run [legsta](https://github.com/tseemann/legsta)
|
|
321
|
-
via the [Bactopia Tool](https://bactopia.
|
|
321
|
+
via the [Bactopia Tool](https://bactopia.io/bactopia-tools/legsta/).
|
|
322
322
|
|
|
323
323
|
#### Download the Assemblies
|
|
324
324
|
|
|
@@ -362,7 +362,7 @@ created for 5,393 assemblies and is ready for use with Bactopia Tools.
|
|
|
362
362
|
|
|
363
363
|
As mentioned above, we will use [legsta](https://github.com/tseemann/legsta) to analyze each
|
|
364
364
|
of the _Legionella pneumophila_ assemblies. To do this, we will use the
|
|
365
|
-
[legsta Bactopia Tool](https://bactopia.
|
|
365
|
+
[legsta Bactopia Tool](https://bactopia.io/bactopia-tools/legsta/).
|
|
366
366
|
|
|
367
367
|
```{bash}
|
|
368
368
|
# Run legsta (please utilize Docker or Singularity only for reproducibility)
|
|
@@ -399,7 +399,7 @@ CPU hours : 5.2
|
|
|
399
399
|
Succeeded : 5'395
|
|
400
400
|
```
|
|
401
401
|
|
|
402
|
-
That's it! Now you can take advantage of any of the [Bactopia Tools](https://bactopia.
|
|
402
|
+
That's it! Now you can take advantage of any of the [Bactopia Tools](https://bactopia.io/bactopia-tools/)
|
|
403
403
|
that utilize assemblies as inputs.
|
|
404
404
|
|
|
405
405
|
# PubMLST DB Builds
|
|
@@ -26,7 +26,7 @@ def common_options(fn):
|
|
|
26
26
|
def setup_logging(verbose: bool, silent: bool) -> None:
|
|
27
27
|
"""Configure root logger with RichHandler at the appropriate level."""
|
|
28
28
|
logging.basicConfig(
|
|
29
|
-
format="%(
|
|
29
|
+
format="%(message)s",
|
|
30
30
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
31
31
|
handlers=[
|
|
32
32
|
RichHandler(rich_tracebacks=True, console=rich.console.Console(stderr=True))
|
|
@@ -30,6 +30,8 @@ click.rich_click.OPTION_GROUPS = {
|
|
|
30
30
|
"name": "Query Options",
|
|
31
31
|
"options": [
|
|
32
32
|
"--exact-taxon",
|
|
33
|
+
"--provider",
|
|
34
|
+
"--only-provider",
|
|
33
35
|
"--limit",
|
|
34
36
|
"--accession-limit",
|
|
35
37
|
"--biosample-subset",
|
|
@@ -192,6 +194,9 @@ def parse_query(q, accession_limit, exact_taxon=False):
|
|
|
192
194
|
run_accessions = []
|
|
193
195
|
|
|
194
196
|
for query in queries:
|
|
197
|
+
query = query.strip()
|
|
198
|
+
if not query:
|
|
199
|
+
continue
|
|
195
200
|
try:
|
|
196
201
|
taxon_id = int(query)
|
|
197
202
|
if exact_taxon:
|
|
@@ -244,6 +249,18 @@ def parse_query(q, accession_limit, exact_taxon=False):
|
|
|
244
249
|
help="Taxon ID or Study, BioSample, or Run accession (can also be comma separated or a file of accessions)",
|
|
245
250
|
)
|
|
246
251
|
@click.option("--exact-taxon", is_flag=True, help="Exclude Taxon ID descendants")
|
|
252
|
+
@click.option(
|
|
253
|
+
"--provider",
|
|
254
|
+
default="ena",
|
|
255
|
+
show_default=True,
|
|
256
|
+
type=click.Choice(["ena", "sra"], case_sensitive=False),
|
|
257
|
+
help="Provider to query first, falls back to the other",
|
|
258
|
+
)
|
|
259
|
+
@click.option(
|
|
260
|
+
"--only-provider",
|
|
261
|
+
is_flag=True,
|
|
262
|
+
help="Only query the given --provider, skip fallback",
|
|
263
|
+
)
|
|
247
264
|
@click.option(
|
|
248
265
|
"--outdir", "-o", default="./", show_default=True, help="Directory to write output"
|
|
249
266
|
)
|
|
@@ -316,6 +333,8 @@ def parse_query(q, accession_limit, exact_taxon=False):
|
|
|
316
333
|
def search(
|
|
317
334
|
query,
|
|
318
335
|
exact_taxon,
|
|
336
|
+
provider,
|
|
337
|
+
only_provider,
|
|
319
338
|
outdir,
|
|
320
339
|
prefix,
|
|
321
340
|
limit,
|
|
@@ -379,12 +398,31 @@ def search(
|
|
|
379
398
|
accessions_file = f"{outdir}/{prefix}-accessions.txt".replace("//", "/")
|
|
380
399
|
filtered_file = f"{outdir}/{prefix}-filtered.txt".replace("//", "/")
|
|
381
400
|
summary_file = f"{outdir}/{prefix}-search.txt".replace("//", "/")
|
|
401
|
+
|
|
402
|
+
if not force:
|
|
403
|
+
existing = [
|
|
404
|
+
f
|
|
405
|
+
for f in [metadata_file, accessions_file, filtered_file, summary_file]
|
|
406
|
+
if Path(f).exists()
|
|
407
|
+
]
|
|
408
|
+
if existing:
|
|
409
|
+
logging.error(
|
|
410
|
+
f"Output files already exist: {', '.join(existing)}. "
|
|
411
|
+
"Use --force to overwrite."
|
|
412
|
+
)
|
|
413
|
+
sys.exit(1)
|
|
414
|
+
|
|
382
415
|
genome_sizes = get_ncbi_genome_size() if use_ncbi_genome_size else None
|
|
383
416
|
for query_type, ena_query, sra_query in queries:
|
|
384
417
|
logging.info(f"Submitting query (type - {query_type})")
|
|
385
418
|
is_accession = True if query_type.endswith("accession") else False
|
|
386
|
-
success, query_results = get_run_info(
|
|
387
|
-
sra_query,
|
|
419
|
+
success, query_results, source = get_run_info(
|
|
420
|
+
sra_query,
|
|
421
|
+
ena_query,
|
|
422
|
+
is_accession,
|
|
423
|
+
limit=limit,
|
|
424
|
+
provider=provider,
|
|
425
|
+
only_provider=only_provider,
|
|
388
426
|
)
|
|
389
427
|
results += query_results
|
|
390
428
|
if success:
|
|
@@ -395,8 +433,35 @@ def search(
|
|
|
395
433
|
genome_size=genome_size,
|
|
396
434
|
genome_sizes=genome_sizes,
|
|
397
435
|
)
|
|
436
|
+
|
|
437
|
+
# Fallback: provider returned results but none passed filtering
|
|
438
|
+
if not query_accessions and not only_provider:
|
|
439
|
+
fallback = "sra" if source == "ena" else "ena"
|
|
440
|
+
logging.info(
|
|
441
|
+
f"Accession found on {source.upper()}, but missing "
|
|
442
|
+
f"metadata, checking {fallback.upper()}..."
|
|
443
|
+
)
|
|
444
|
+
fb_success, fb_results, fb_source = get_run_info(
|
|
445
|
+
sra_query,
|
|
446
|
+
ena_query,
|
|
447
|
+
is_accession,
|
|
448
|
+
limit=limit,
|
|
449
|
+
provider=fallback,
|
|
450
|
+
only_provider=True,
|
|
451
|
+
)
|
|
452
|
+
if fb_success:
|
|
453
|
+
results += fb_results
|
|
454
|
+
source = fb_source
|
|
455
|
+
query_accessions, query_filtered = parse_accessions(
|
|
456
|
+
fb_results,
|
|
457
|
+
min_read_length=min_read_length,
|
|
458
|
+
min_base_count=min_base_count,
|
|
459
|
+
genome_size=genome_size,
|
|
460
|
+
genome_sizes=genome_sizes,
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
WARNING_MESSAGE = None
|
|
398
464
|
if len(query_accessions):
|
|
399
|
-
WARNING_MESSAGE = None
|
|
400
465
|
if query_type == "biosample" and biosample_subset > 0:
|
|
401
466
|
if len(query_accessions) > biosample_subset:
|
|
402
467
|
WARNING_MESSAGE = f"WARNING: Selected {biosample_subset} Experiment accession(s) from a total of {len(query_accessions)}"
|
|
@@ -404,20 +469,19 @@ def search(
|
|
|
404
469
|
query_accessions, biosample_subset
|
|
405
470
|
)
|
|
406
471
|
accessions = list(set(accessions + query_accessions))
|
|
407
|
-
filtered["min_base_count"] += query_filtered["min_base_count"]
|
|
408
|
-
filtered["min_read_length"] += query_filtered["min_read_length"]
|
|
409
|
-
filtered["technical"] += query_filtered["technical"]
|
|
410
|
-
for filtered_sample in query_filtered["filtered"]:
|
|
411
|
-
filtered["filtered"][filtered_sample["accession"]] = (
|
|
412
|
-
filtered_sample["reason"]
|
|
413
|
-
)
|
|
414
472
|
else:
|
|
415
473
|
if query_results:
|
|
416
|
-
WARNING_MESSAGE = f"WARNING: {query} did not return any Illumina or Ont results from
|
|
474
|
+
WARNING_MESSAGE = f"WARNING: {query} did not return any Illumina or Ont results from {source.upper()}."
|
|
417
475
|
else:
|
|
418
|
-
WARNING_MESSAGE = (
|
|
419
|
-
|
|
420
|
-
|
|
476
|
+
WARNING_MESSAGE = f"WARNING: {query} did not return any results from {source.upper()}."
|
|
477
|
+
|
|
478
|
+
filtered["min_base_count"] += query_filtered["min_base_count"]
|
|
479
|
+
filtered["min_read_length"] += query_filtered["min_read_length"]
|
|
480
|
+
filtered["technical"] += query_filtered["technical"]
|
|
481
|
+
for filtered_sample in query_filtered["filtered"]:
|
|
482
|
+
filtered["filtered"][filtered_sample["accession"]] = filtered_sample[
|
|
483
|
+
"reason"
|
|
484
|
+
]
|
|
421
485
|
|
|
422
486
|
# Create Summary
|
|
423
487
|
query_string = query
|
|
@@ -435,6 +499,7 @@ def search(
|
|
|
435
499
|
summary.append(
|
|
436
500
|
f"DATE: {datetime.datetime.now().replace(microsecond=0).isoformat()}"
|
|
437
501
|
)
|
|
502
|
+
summary.append(f"PROVIDER: {source.upper()}")
|
|
438
503
|
summary.append(f"LIMIT: {limit}")
|
|
439
504
|
summary.append(f"RESULTS: {len(results)} ({metadata_file})")
|
|
440
505
|
summary.append(
|
|
@@ -462,6 +527,10 @@ def search(
|
|
|
462
527
|
else:
|
|
463
528
|
logging.error(f"ERROR: Unable to retrieve metadata for query ({query})")
|
|
464
529
|
|
|
530
|
+
if not results:
|
|
531
|
+
logging.error("No results found, skipping output files.")
|
|
532
|
+
sys.exit(1)
|
|
533
|
+
|
|
465
534
|
# Output the results
|
|
466
535
|
logging.info(f"Writing results to {metadata_file}")
|
|
467
536
|
with open(metadata_file, "w") as output_fh:
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
ENA_URL = "https://www.ebi.ac.uk/ena/portal/api/search"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_ena_metadata(query: str, is_accession: bool, limit: int):
|
|
9
|
+
"""Fetch metadata from ENA.
|
|
10
|
+
https://docs.google.com/document/d/1CwoY84MuZ3SdKYocqssumghBF88PWxUZ/edit#heading=h.ag0eqy2wfin5
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
query (str): The query to search for.
|
|
14
|
+
is_accession (bool): If the query is an accession or not.
|
|
15
|
+
limit (int): The maximum number of records to return.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
list: Records associated with the accession.
|
|
19
|
+
"""
|
|
20
|
+
data = {
|
|
21
|
+
"dataPortal": "ena",
|
|
22
|
+
"dccDataOnly": "false",
|
|
23
|
+
"download": "false",
|
|
24
|
+
"result": "read_run",
|
|
25
|
+
"format": "tsv",
|
|
26
|
+
"limit": limit,
|
|
27
|
+
"fields": "all",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if is_accession:
|
|
31
|
+
data["includeAccessions"] = query
|
|
32
|
+
else:
|
|
33
|
+
data["query"] = (
|
|
34
|
+
f'"{query} AND library_source=GENOMIC AND '
|
|
35
|
+
"(library_strategy=OTHER OR library_strategy=WGS OR "
|
|
36
|
+
"library_strategy=WGA) AND (library_selection=MNase OR "
|
|
37
|
+
"library_selection=RANDOM OR library_selection=unspecified OR "
|
|
38
|
+
'library_selection="size fractionation")"'
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
headers = {"accept": "*/*", "Content-type": "application/x-www-form-urlencoded"}
|
|
42
|
+
|
|
43
|
+
r = requests.post(ENA_URL, headers=headers, data=data)
|
|
44
|
+
if r.status_code == requests.codes.ok:
|
|
45
|
+
data = []
|
|
46
|
+
col_names = None
|
|
47
|
+
for line in r.text.split("\n"):
|
|
48
|
+
cols = line.split("\t")
|
|
49
|
+
if line:
|
|
50
|
+
if col_names:
|
|
51
|
+
data.append(dict(zip(col_names, cols)))
|
|
52
|
+
else:
|
|
53
|
+
col_names = cols
|
|
54
|
+
return [True, data]
|
|
55
|
+
else:
|
|
56
|
+
return [False, [r.status_code, r.text]]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_run_info(
|
|
60
|
+
sra_query: str,
|
|
61
|
+
ena_query: str,
|
|
62
|
+
is_accession: bool,
|
|
63
|
+
limit: int = 1000000,
|
|
64
|
+
provider: str = "ena",
|
|
65
|
+
only_provider: bool = False,
|
|
66
|
+
) -> tuple:
|
|
67
|
+
"""Retrieve a list of samples available from ENA and/or SRA.
|
|
68
|
+
|
|
69
|
+
By default, the provider is queried first and the other is used as fallback. When
|
|
70
|
+
only_provider is True, no fallback is attempted.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
sra_query: A formatted query for SRA searches.
|
|
74
|
+
ena_query: A formatted query for ENA searches.
|
|
75
|
+
is_accession: If the query is an accession or not.
|
|
76
|
+
limit: The maximum number of records to return.
|
|
77
|
+
provider: Which provider to query first ("ena" or "sra").
|
|
78
|
+
only_provider: If True, skip fallback to the other provider.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
tuple: (success, data, source) where source is "ena", "sra", or "none".
|
|
82
|
+
"""
|
|
83
|
+
from bactopia.databases.sra import get_sra_metadata
|
|
84
|
+
|
|
85
|
+
fallback = "sra" if provider == "ena" else "ena"
|
|
86
|
+
|
|
87
|
+
def _query_ena():
|
|
88
|
+
logging.debug("Querying ENA for metadata...")
|
|
89
|
+
success, data = get_ena_metadata(ena_query, is_accession, limit=limit)
|
|
90
|
+
if success and data:
|
|
91
|
+
return True, data
|
|
92
|
+
if not success:
|
|
93
|
+
logging.warning(f"ENA query failed (status {data[0]}).")
|
|
94
|
+
else:
|
|
95
|
+
logging.debug("ENA query returned no results.")
|
|
96
|
+
return False, []
|
|
97
|
+
|
|
98
|
+
def _query_sra():
|
|
99
|
+
logging.debug("Querying SRA for metadata...")
|
|
100
|
+
return get_sra_metadata(sra_query, is_accession, limit=limit)
|
|
101
|
+
|
|
102
|
+
query_fn = {"ena": _query_ena, "sra": _query_sra}
|
|
103
|
+
|
|
104
|
+
success, data = query_fn[provider]()
|
|
105
|
+
if success:
|
|
106
|
+
return True, data, provider
|
|
107
|
+
|
|
108
|
+
if only_provider:
|
|
109
|
+
logging.error(f"{provider.upper()} returned no results (--only-provider).")
|
|
110
|
+
return False, [], "none"
|
|
111
|
+
|
|
112
|
+
logging.info(f"No results from {provider.upper()}, checking {fallback.upper()}...")
|
|
113
|
+
success, data = query_fn[fallback]()
|
|
114
|
+
if success:
|
|
115
|
+
return True, data, fallback
|
|
116
|
+
|
|
117
|
+
logging.error("Both ENA and SRA returned no results.")
|
|
118
|
+
return False, [], "none"
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from pysradb.sraweb import SRAweb
|
|
4
|
+
|
|
5
|
+
INSTRUMENT_PLATFORM_MAP = {
|
|
6
|
+
"illumina": "ILLUMINA",
|
|
7
|
+
"nextseq": "ILLUMINA",
|
|
8
|
+
"hiseq": "ILLUMINA",
|
|
9
|
+
"miseq": "ILLUMINA",
|
|
10
|
+
"novaseq": "ILLUMINA",
|
|
11
|
+
"miniseq": "ILLUMINA",
|
|
12
|
+
"genome analyzer": "ILLUMINA",
|
|
13
|
+
"minion": "OXFORD_NANOPORE",
|
|
14
|
+
"gridion": "OXFORD_NANOPORE",
|
|
15
|
+
"promethion": "OXFORD_NANOPORE",
|
|
16
|
+
"nanopore": "OXFORD_NANOPORE",
|
|
17
|
+
"pacbio": "PACBIO_SMRT",
|
|
18
|
+
"sequel": "PACBIO_SMRT",
|
|
19
|
+
"revio": "PACBIO_SMRT",
|
|
20
|
+
"ion torrent": "ION_TORRENT",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
SRA_TO_ENA_FIELDS = {
|
|
24
|
+
"run_total_bases": "base_count",
|
|
25
|
+
"run_total_spots": "read_count",
|
|
26
|
+
"organism_taxid": "tax_id",
|
|
27
|
+
"organism_name": "scientific_name",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def instrument_to_platform(instrument: str) -> str:
|
|
32
|
+
"""Map an SRA instrument model name to the ENA platform constant.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
instrument: Instrument model name from SRA (e.g. "Illumina MiniSeq").
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
str: Platform constant (e.g. "ILLUMINA") or the original value if unknown.
|
|
39
|
+
"""
|
|
40
|
+
lower = instrument.lower()
|
|
41
|
+
for key, platform in INSTRUMENT_PLATFORM_MAP.items():
|
|
42
|
+
if key in lower:
|
|
43
|
+
return platform
|
|
44
|
+
return instrument
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def normalize_sra_fields(records: list[dict]) -> list[dict]:
|
|
48
|
+
"""Rename SRA fields to their ENA equivalents so parse_accessions() works unchanged.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
records: List of dicts from pysradb search results.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
list[dict]: Records with critical fields renamed in place.
|
|
55
|
+
"""
|
|
56
|
+
for record in records:
|
|
57
|
+
for sra_field, ena_field in SRA_TO_ENA_FIELDS.items():
|
|
58
|
+
if sra_field in record:
|
|
59
|
+
record[ena_field] = record.pop(sra_field)
|
|
60
|
+
|
|
61
|
+
if "instrument" in record and "instrument_model_desc" not in record:
|
|
62
|
+
record["instrument_model_desc"] = instrument_to_platform(
|
|
63
|
+
record["instrument"]
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
layout = record.get("library_layout", "SINGLE").upper()
|
|
67
|
+
record["fastq_bytes"] = "0;0" if layout == "PAIRED" else "0"
|
|
68
|
+
|
|
69
|
+
return records
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_sra_metadata(query: str, is_accession: bool, limit: int) -> list:
|
|
73
|
+
"""Fetch metadata from SRA via pysradb.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
query: The query to search for (accession or NCBI query string).
|
|
77
|
+
is_accession: If the query is an accession or not.
|
|
78
|
+
limit: The maximum number of records to return.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
list: [success: bool, data: list[dict]]
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
db = SRAweb()
|
|
85
|
+
df = db.search_sra(
|
|
86
|
+
query,
|
|
87
|
+
detailed=True,
|
|
88
|
+
sample_attribute=True,
|
|
89
|
+
expand_sample_attributes=True,
|
|
90
|
+
)
|
|
91
|
+
if df is None or df.empty:
|
|
92
|
+
logging.debug(f"SRA query returned no results for: {query}")
|
|
93
|
+
return [False, []]
|
|
94
|
+
|
|
95
|
+
if len(df) > limit:
|
|
96
|
+
logging.debug(f"SRA returned {len(df)} results, truncating to {limit}")
|
|
97
|
+
df = df.head(limit)
|
|
98
|
+
|
|
99
|
+
records = df.to_dict(orient="records")
|
|
100
|
+
return [True, normalize_sra_fields(records)]
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logging.error(f"Error querying SRA: {e}")
|
|
103
|
+
return [False, []]
|
|
@@ -61,7 +61,7 @@ _NEXTFLOW_INFORMATIONAL_RE = re.compile(
|
|
|
61
61
|
re.IGNORECASE,
|
|
62
62
|
)
|
|
63
63
|
|
|
64
|
-
# nextflow.config: nextflowVersion = '>=
|
|
64
|
+
# nextflow.config: nextflowVersion = '>=26.04.0'
|
|
65
65
|
_NEXTFLOW_CONFIG_RE = re.compile(
|
|
66
66
|
r"nextflowVersion\s*=\s*['\"][^\d]*(\d+\.\d+(?:\.\d+)?)"
|
|
67
67
|
)
|
|
@@ -28,18 +28,20 @@ def parse(path: str, name: str) -> dict:
|
|
|
28
28
|
# Single end
|
|
29
29
|
r1 = Path(path)
|
|
30
30
|
else:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
r2_path = path.replace("-original.json", "_R2-original.json")
|
|
36
|
-
else:
|
|
37
|
-
r1_path = path.replace("-final.json", "_R1-final.json")
|
|
38
|
-
r2_path = path.replace("-final.json", "_R2-final.json")
|
|
31
|
+
suffix = "-original.json" if "original" in path else "-final.json"
|
|
32
|
+
|
|
33
|
+
r1_path = path.replace(suffix, f"_R1{suffix}")
|
|
34
|
+
r2_path = path.replace(suffix, f"_R2{suffix}")
|
|
39
35
|
|
|
40
36
|
if Path(r1_path).exists() and Path(r2_path).exists():
|
|
41
37
|
r1 = Path(r1_path)
|
|
42
38
|
r2 = Path(r2_path)
|
|
39
|
+
else:
|
|
40
|
+
for tag in ("_SE", "_ONT"):
|
|
41
|
+
alt_path = path.replace(suffix, f"{tag}{suffix}")
|
|
42
|
+
if Path(alt_path).exists():
|
|
43
|
+
r1 = Path(alt_path)
|
|
44
|
+
break
|
|
43
45
|
|
|
44
46
|
final_results = {"sample": name}
|
|
45
47
|
if r1:
|
|
@@ -79,6 +79,6 @@ Key module categories:
|
|
|
79
79
|
- [README.md](README.md): Project overview and quick start
|
|
80
80
|
- [CONTRIBUTING.md](CONTRIBUTING.md): Human contributor guide
|
|
81
81
|
- [CHANGELOG.md](CHANGELOG.md): Version history
|
|
82
|
-
- Full docs: https://bactopia.
|
|
82
|
+
- Full docs: https://bactopia.io/
|
|
83
83
|
|
|
84
84
|
<!-- Auto-generated by bactopia-catalog. Do not edit directly; edit the template at bactopia-py/bactopia/templates/bactopia/llms.txt.j2. -->
|
|
@@ -5,7 +5,7 @@ manifest {
|
|
|
5
5
|
homePage = 'https://github.com/bactopia/bactopia'
|
|
6
6
|
description = 'An extensive workflow for processing sequencing of bacterial genomes.'
|
|
7
7
|
mainScript = 'main.nf'
|
|
8
|
-
version = '4.0.
|
|
8
|
+
version = '4.0.1'
|
|
9
9
|
nextflowVersion = '>=26.04.0'
|
|
10
10
|
}
|
|
11
11
|
|
|
@@ -21,7 +21,7 @@ params {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
// Version
|
|
24
|
-
params.bactopia_version = '4.0.
|
|
24
|
+
params.bactopia_version = '4.0.1'
|
|
25
25
|
manifest.version = "${params.bactopia_version}"
|
|
26
26
|
|
|
27
27
|
// Includes
|
|
@@ -93,7 +93,7 @@ dag {
|
|
|
93
93
|
|
|
94
94
|
// Plugins
|
|
95
95
|
plugins {
|
|
96
|
-
id 'nf-bactopia@2.1.
|
|
96
|
+
id 'nf-bactopia@2.1.5'
|
|
97
97
|
}
|
|
98
98
|
|
|
99
99
|
bactopia {
|
{bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/module/tests/nextflow.config.j2
RENAMED
|
@@ -10,7 +10,7 @@ params {
|
|
|
10
10
|
ext = "fna"
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
bactopia_version = '4.0.
|
|
13
|
+
bactopia_version = '4.0.1'
|
|
14
14
|
bactopia_cache = System.getenv("BACTOPIA_CACHEDIR") ?: "${System.getenv('HOME')}/.bactopia"
|
|
15
15
|
condadir = "${params.bactopia_cache}/conda"
|
|
16
16
|
wf = params.workflow.name
|
{bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/subworkflow/tests/nextflow.config.j2
RENAMED
|
@@ -9,7 +9,7 @@ params {
|
|
|
9
9
|
description = "{{ description }}"
|
|
10
10
|
ext = "fna"
|
|
11
11
|
}
|
|
12
|
-
bactopia_version = '4.0.
|
|
12
|
+
bactopia_version = '4.0.1'
|
|
13
13
|
bactopia_cache = System.getenv("BACTOPIA_CACHEDIR") ?: "${System.getenv('HOME')}/.bactopia"
|
|
14
14
|
condadir = "${params.bactopia_cache}/conda"
|
|
15
15
|
wf = params.workflow.name
|
|
@@ -38,5 +38,5 @@ includeConfig "../../../conf/profiles.config"
|
|
|
38
38
|
|
|
39
39
|
// Plugin
|
|
40
40
|
plugins {
|
|
41
|
-
id 'nf-bactopia@2.
|
|
41
|
+
id 'nf-bactopia@2.1.5'
|
|
42
42
|
}
|
|
@@ -5,8 +5,8 @@ manifest {
|
|
|
5
5
|
homePage = 'https://github.com/bactopia/bactopia'
|
|
6
6
|
description = 'An extensive workflow for processing sequencing of bacterial genomes.'
|
|
7
7
|
mainScript = 'main.nf'
|
|
8
|
-
version = '4.0.
|
|
9
|
-
nextflowVersion = '>=
|
|
8
|
+
version = '4.0.1'
|
|
9
|
+
nextflowVersion = '>=26.04.0'
|
|
10
10
|
}
|
|
11
11
|
|
|
12
12
|
params {
|
|
@@ -19,7 +19,7 @@ params {
|
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
// Version
|
|
22
|
-
params.bactopia_version = '4.0.
|
|
22
|
+
params.bactopia_version = '4.0.1'
|
|
23
23
|
manifest.version = "${params.bactopia_version}"
|
|
24
24
|
|
|
25
25
|
// Includes
|
|
@@ -85,7 +85,7 @@ dag {
|
|
|
85
85
|
|
|
86
86
|
// Plugins
|
|
87
87
|
plugins {
|
|
88
|
-
id 'nf-bactopia@2.
|
|
88
|
+
id 'nf-bactopia@2.1.5'
|
|
89
89
|
}
|
|
90
90
|
|
|
91
91
|
bactopia {
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "bactopia"
|
|
3
|
-
version = "2.
|
|
3
|
+
version = "2.2.0"
|
|
4
4
|
description = "A Python package for working with Bactopia"
|
|
5
5
|
authors = [
|
|
6
6
|
"Robert A. Petit III <robbie.petit@gmail.com>",
|
|
7
7
|
]
|
|
8
8
|
license = "MIT"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
homepage = "https://bactopia.
|
|
10
|
+
homepage = "https://bactopia.io/"
|
|
11
11
|
repository = "https://github.com/bactopia/bactopia-py"
|
|
12
12
|
keywords = ["bioinformatics", "bacteria", "bactopia", "SRA", "ENA"]
|
|
13
13
|
|
|
@@ -57,6 +57,7 @@ pyyaml = ">=6.0"
|
|
|
57
57
|
biopython = ">=1.80"
|
|
58
58
|
openpyxl = ">=3.1.0"
|
|
59
59
|
psutil = ">=5.9.0"
|
|
60
|
+
pysradb = ">=2.2.0"
|
|
60
61
|
|
|
61
62
|
[tool.poetry.group.dev.dependencies]
|
|
62
63
|
ruff = "^0.9"
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import sys
|
|
3
|
-
|
|
4
|
-
import requests
|
|
5
|
-
|
|
6
|
-
ENA_URL = "https://www.ebi.ac.uk/ena/portal/api/search"
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def get_ena_metadata(query: str, is_accession: bool, limit: int):
|
|
10
|
-
"""Fetch metadata from ENA.
|
|
11
|
-
https://docs.google.com/document/d/1CwoY84MuZ3SdKYocqssumghBF88PWxUZ/edit#heading=h.ag0eqy2wfin5
|
|
12
|
-
|
|
13
|
-
Args:
|
|
14
|
-
query (str): The query to search for.
|
|
15
|
-
is_accession (bool): If the query is an accession or not.
|
|
16
|
-
limit (int): The maximum number of records to return.
|
|
17
|
-
|
|
18
|
-
Returns:
|
|
19
|
-
list: Records associated with the accession.
|
|
20
|
-
"""
|
|
21
|
-
data = {
|
|
22
|
-
"dataPortal": "ena",
|
|
23
|
-
"dccDataOnly": "false",
|
|
24
|
-
"download": "false",
|
|
25
|
-
"result": "read_run",
|
|
26
|
-
"format": "tsv",
|
|
27
|
-
"limit": limit,
|
|
28
|
-
"fields": "all",
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
if is_accession:
|
|
32
|
-
data["includeAccessions"] = query
|
|
33
|
-
else:
|
|
34
|
-
data["query"] = (
|
|
35
|
-
f'"{query} AND library_source=GENOMIC AND '
|
|
36
|
-
"(library_strategy=OTHER OR library_strategy=WGS OR "
|
|
37
|
-
"library_strategy=WGA) AND (library_selection=MNase OR "
|
|
38
|
-
"library_selection=RANDOM OR library_selection=unspecified OR "
|
|
39
|
-
'library_selection="size fractionation")"'
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
headers = {"accept": "*/*", "Content-type": "application/x-www-form-urlencoded"}
|
|
43
|
-
|
|
44
|
-
r = requests.post(ENA_URL, headers=headers, data=data)
|
|
45
|
-
if r.status_code == requests.codes.ok:
|
|
46
|
-
data = []
|
|
47
|
-
col_names = None
|
|
48
|
-
for line in r.text.split("\n"):
|
|
49
|
-
cols = line.split("\t")
|
|
50
|
-
if line:
|
|
51
|
-
if col_names:
|
|
52
|
-
data.append(dict(zip(col_names, cols)))
|
|
53
|
-
else:
|
|
54
|
-
col_names = cols
|
|
55
|
-
return [True, data]
|
|
56
|
-
else:
|
|
57
|
-
return [False, [r.status_code, r.text]]
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def get_run_info(
|
|
61
|
-
sra_query: str, ena_query: str, is_accession: bool, limit: int = 1000000
|
|
62
|
-
) -> tuple:
|
|
63
|
-
"""Retrieve a list of samples available from ENA.
|
|
64
|
-
|
|
65
|
-
The first attempt will be against ENA, and if that fails, SRA will be queried. This should
|
|
66
|
-
capture those samples not yet synced between ENA and SRA.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
sra_query (str): A formatted query for SRA searches.
|
|
70
|
-
ena_query (str): A formatted query for ENA searches.
|
|
71
|
-
is_accession (bool): If the query is an accession or not.
|
|
72
|
-
limit (int): The maximum number of records to return.
|
|
73
|
-
|
|
74
|
-
Returns:
|
|
75
|
-
tuple: Records associated with the accession.
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
logging.debug("Querying ENA for metadata...")
|
|
79
|
-
success, ena_data = get_ena_metadata(ena_query, is_accession, limit=limit)
|
|
80
|
-
if success:
|
|
81
|
-
return success, ena_data
|
|
82
|
-
else:
|
|
83
|
-
logging.error("There was an issue querying ENA, exiting...")
|
|
84
|
-
logging.error(f"STATUS: {ena_data[0]}")
|
|
85
|
-
logging.error(f"TEXT: {ena_data[1]}")
|
|
86
|
-
sys.exit(1)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/module/tests/nf-test.config.j2
RENAMED
|
File without changes
|
|
File without changes
|
{bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/subworkflow/tests/main.nf.test.j2
RENAMED
|
File without changes
|
{bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/subworkflow/tests/nf-test.config.j2
RENAMED
|
File without changes
|
{bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/subworkflow/tests/nftignore.j2
RENAMED
|
File without changes
|
|
File without changes
|
{bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/workflow/tests/main.nf.test.j2
RENAMED
|
File without changes
|
{bactopia-2.1.5 → bactopia-2.2.0}/bactopia/templates/scaffold/workflow/tests/nf-test.config.j2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|