genelastic 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genelastic/api/extends/example.py +2 -3
- genelastic/api/routes.py +160 -23
- genelastic/api/server.py +30 -22
- genelastic/api/settings.py +3 -2
- genelastic/common/__init__.py +36 -9
- genelastic/common/cli.py +51 -23
- genelastic/common/elastic.py +80 -49
- genelastic/common/exceptions.py +0 -2
- genelastic/common/types.py +20 -15
- genelastic/import_data/__init__.py +23 -5
- genelastic/import_data/analyses.py +17 -20
- genelastic/import_data/analysis.py +69 -65
- genelastic/import_data/bi_process.py +7 -5
- genelastic/import_data/bi_processes.py +8 -8
- genelastic/import_data/cli_gen_data.py +116 -0
- genelastic/import_data/cli_import.py +379 -0
- genelastic/import_data/{info.py → cli_info.py} +104 -75
- genelastic/import_data/cli_integrity.py +384 -0
- genelastic/import_data/cli_validate.py +54 -0
- genelastic/import_data/constants.py +11 -32
- genelastic/import_data/data_file.py +23 -20
- genelastic/import_data/filename_pattern.py +26 -32
- genelastic/import_data/import_bundle.py +56 -47
- genelastic/import_data/import_bundle_factory.py +166 -158
- genelastic/import_data/logger.py +22 -18
- genelastic/import_data/random_bundle.py +402 -0
- genelastic/import_data/tags.py +46 -26
- genelastic/import_data/wet_process.py +8 -4
- genelastic/import_data/wet_processes.py +13 -8
- genelastic/ui/__init__.py +0 -0
- genelastic/ui/server.py +87 -0
- genelastic/ui/settings.py +11 -0
- genelastic-0.7.0.dist-info/METADATA +105 -0
- genelastic-0.7.0.dist-info/RECORD +40 -0
- {genelastic-0.6.1.dist-info → genelastic-0.7.0.dist-info}/WHEEL +1 -1
- genelastic-0.7.0.dist-info/entry_points.txt +6 -0
- genelastic/import_data/gen_data.py +0 -194
- genelastic/import_data/import_data.py +0 -292
- genelastic/import_data/integrity.py +0 -290
- genelastic/import_data/validate_data.py +0 -43
- genelastic-0.6.1.dist-info/METADATA +0 -41
- genelastic-0.6.1.dist-info/RECORD +0 -36
- genelastic-0.6.1.dist-info/entry_points.txt +0 -6
- {genelastic-0.6.1.dist-info → genelastic-0.7.0.dist-info}/top_level.txt +0 -0
genelastic/ui/server.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from flask import Flask, render_template
|
|
3
|
+
|
|
4
|
+
app = Flask(__name__)
|
|
5
|
+
app.config.from_object("src.genelastic.ui.settings.Config")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@app.route("/")
|
|
9
|
+
def home() -> str:
|
|
10
|
+
api_url = app.config["GENUI_API_URL"]
|
|
11
|
+
try:
|
|
12
|
+
version_reponse = requests.get(f"{api_url}version", timeout=20)
|
|
13
|
+
version = version_reponse.json().get("version")
|
|
14
|
+
wet_processes_reponse = requests.get(
|
|
15
|
+
f"{api_url}wet_processes", timeout=20
|
|
16
|
+
)
|
|
17
|
+
wet_processes = wet_processes_reponse.json()
|
|
18
|
+
bi_processes_reponse = requests.get(
|
|
19
|
+
f"{api_url}bi_processes", timeout=20
|
|
20
|
+
)
|
|
21
|
+
bi_processes = bi_processes_reponse.json()
|
|
22
|
+
analyses_reponse = requests.get(f"{api_url}analyses", timeout=20)
|
|
23
|
+
analyses = analyses_reponse.json()
|
|
24
|
+
except requests.exceptions.RequestException:
|
|
25
|
+
version = "API not reachable"
|
|
26
|
+
wet_processes = []
|
|
27
|
+
bi_processes = []
|
|
28
|
+
analyses = []
|
|
29
|
+
return render_template(
|
|
30
|
+
"home.html",
|
|
31
|
+
version=version,
|
|
32
|
+
wet_processes=wet_processes,
|
|
33
|
+
bi_processes=bi_processes,
|
|
34
|
+
analyses=analyses,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@app.route("/analyses")
|
|
39
|
+
def show_analyses() -> str:
|
|
40
|
+
api_url = app.config["GENUI_API_URL"]
|
|
41
|
+
try:
|
|
42
|
+
analyses_reponse = requests.get(f"{api_url}analyses", timeout=20)
|
|
43
|
+
analyses = analyses_reponse.json()
|
|
44
|
+
except requests.exceptions.RequestException:
|
|
45
|
+
analyses = ["Error fetching data."]
|
|
46
|
+
|
|
47
|
+
return render_template("analyses.html", analyses=analyses)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@app.route("/bi_processes")
|
|
51
|
+
def show_bi_processes() -> str:
|
|
52
|
+
api_url = app.config["GENUI_API_URL"]
|
|
53
|
+
try:
|
|
54
|
+
bi_processes_reponse = requests.get(
|
|
55
|
+
f"{api_url}bi_processes", timeout=20
|
|
56
|
+
)
|
|
57
|
+
bi_processes = bi_processes_reponse.json()
|
|
58
|
+
except requests.exceptions.RequestException:
|
|
59
|
+
bi_processes = ["Error fetching data."]
|
|
60
|
+
|
|
61
|
+
return render_template("bi_processes.html", bi_processes=bi_processes)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@app.route("/wet_processes")
|
|
65
|
+
def show_wet_processes() -> str:
|
|
66
|
+
api_url = app.config["GENUI_API_URL"]
|
|
67
|
+
try:
|
|
68
|
+
wet_processes_reponse = requests.get(
|
|
69
|
+
f"{api_url}wet_processes", timeout=20
|
|
70
|
+
)
|
|
71
|
+
wet_processes = wet_processes_reponse.json()
|
|
72
|
+
except requests.exceptions.RequestException:
|
|
73
|
+
wet_processes = ["Error fetching data."]
|
|
74
|
+
|
|
75
|
+
return render_template("wet_processes.html", wet_processes=wet_processes)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@app.route("/version")
|
|
79
|
+
def show_version() -> str:
|
|
80
|
+
api_url = app.config["GENUI_API_URL"]
|
|
81
|
+
try:
|
|
82
|
+
version_reponse = requests.get(f"{api_url}version", timeout=20)
|
|
83
|
+
version = version_reponse.json().get("version", "Version not found")
|
|
84
|
+
except requests.exceptions.RequestException:
|
|
85
|
+
version = "Error fetching version."
|
|
86
|
+
|
|
87
|
+
return render_template("version.html", version=version)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: genelastic
|
|
3
|
+
Version: 0.7.0
|
|
4
|
+
Summary: Generate and store genetic data into an Elasticsearch database.
|
|
5
|
+
Author: CNRGH
|
|
6
|
+
Author-email: Pierrick ROGER <pierrick.roger@cnrgh.fr>, Maxime BLANCHON <maxime.blanchon@cnrgh.fr>
|
|
7
|
+
License: CeCILL
|
|
8
|
+
Keywords: CNRGH,genelastic,generation,storage,elasticsearch,database
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: elasticsearch
|
|
16
|
+
Requires-Dist: PyVCF3
|
|
17
|
+
Requires-Dist: schema
|
|
18
|
+
Requires-Dist: PyYAML
|
|
19
|
+
Requires-Dist: biophony>=1.2.1
|
|
20
|
+
Requires-Dist: colorlog
|
|
21
|
+
Provides-Extra: tests
|
|
22
|
+
Requires-Dist: pytest; extra == "tests"
|
|
23
|
+
Requires-Dist: mypy; extra == "tests"
|
|
24
|
+
Requires-Dist: coverage; extra == "tests"
|
|
25
|
+
Requires-Dist: yamllint; extra == "tests"
|
|
26
|
+
Requires-Dist: types-PyYAML; extra == "tests"
|
|
27
|
+
Requires-Dist: ruff<0.9,>=0.8.1; extra == "tests"
|
|
28
|
+
Requires-Dist: pre-commit<4.1,>=4.0.1; extra == "tests"
|
|
29
|
+
Requires-Dist: types-requests; extra == "tests"
|
|
30
|
+
Requires-Dist: ansible<10.5,>=10.4.0; extra == "tests"
|
|
31
|
+
Requires-Dist: ansible-lint<25,>=24.12.2; extra == "tests"
|
|
32
|
+
Provides-Extra: docs
|
|
33
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
34
|
+
Requires-Dist: sphinx-autoapi; extra == "docs"
|
|
35
|
+
Requires-Dist: furo; extra == "docs"
|
|
36
|
+
Provides-Extra: api
|
|
37
|
+
Requires-Dist: flask; extra == "api"
|
|
38
|
+
Requires-Dist: elasticsearch; extra == "api"
|
|
39
|
+
Requires-Dist: environs; extra == "api"
|
|
40
|
+
Requires-Dist: connexion[flask,swagger-ui,uvicorn]; extra == "api"
|
|
41
|
+
Provides-Extra: ui
|
|
42
|
+
Requires-Dist: flask; extra == "ui"
|
|
43
|
+
Requires-Dist: requests; extra == "ui"
|
|
44
|
+
Requires-Dist: environs; extra == "ui"
|
|
45
|
+
|
|
46
|
+
# genelastic
|
|
47
|
+
|
|
48
|
+
Storing of genetics data into an Elasticsearch database.
|
|
49
|
+
|
|
50
|
+
## Prerequisites
|
|
51
|
+
|
|
52
|
+
- `python` >= 3.11
|
|
53
|
+
- `make`
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
To install dependencies, run the following command:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
python -m venv .venv
|
|
61
|
+
source .venv/bin/activate
|
|
62
|
+
make install.deps
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Configuration
|
|
66
|
+
|
|
67
|
+
To start the **API server**, the following environment variables should be defined:
|
|
68
|
+
|
|
69
|
+
- `GENAPI_ES_URL`: URL of the Elasticsearch server,
|
|
70
|
+
- `GENAPI_ES_ENCODED_API_KEY`: Encoded API key,
|
|
71
|
+
- `GENAPI_ES_INDEX_PREFIX`: Prefix to identify indices of interest,
|
|
72
|
+
- `GENAPI_ES_CERT_FP`: Certificate fingerprint of the Elasticsearch server.
|
|
73
|
+
|
|
74
|
+
Then, run the following command:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
make start-api
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
To start the **UI server**, the following environment variables should be defined:
|
|
81
|
+
|
|
82
|
+
- `GENUI_API_URL`: URL of the API server.
|
|
83
|
+
|
|
84
|
+
Then, run the following command:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
make start-ui
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Developers
|
|
91
|
+
|
|
92
|
+
This project uses [pre-commit](https://pre-commit.com/) to manage Git hooks scripts. To install project hooks, run:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
pre-commit install
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
After that, each commit will succeed only if all hooks (defined in `.pre-commit-config.yaml`) pass.
|
|
99
|
+
|
|
100
|
+
If necessary (though not recommended),
|
|
101
|
+
you can skip these hooks by using the `--no-verify` / `-n` option when committing:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
git commit -m "My commit message" --no-verify # This commit will not run installed hooks.
|
|
105
|
+
```
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
genelastic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
genelastic/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
genelastic/api/routes.py,sha256=O_bZGhKex04tWauE1fYRXjNmR2smIA4JZShwcakbJJA,6938
|
|
4
|
+
genelastic/api/server.py,sha256=XkkHpv_28OrfrY6CMDnImBFlDiEQSA8f1BpSSn2LGL4,2248
|
|
5
|
+
genelastic/api/settings.py,sha256=vLfj8ASLqq9f6c3eGg30L_WSBiaOIpnCsk-S-TUZRlk,403
|
|
6
|
+
genelastic/api/extends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
genelastic/api/extends/example.py,sha256=5vOw2ToYWzOj-vYkOLbqoa1MCnXIvKuv7xW6qBQ-eHU,164
|
|
8
|
+
genelastic/common/__init__.py,sha256=GfM4XaiaHsA2v8N0T3VgnXpZsLXjTc1wIPpRi4BlkZ8,937
|
|
9
|
+
genelastic/common/cli.py,sha256=cDbgc2m20Y1NO0CZR1qaTrCttcf0cjFAP885tuPZsH0,1770
|
|
10
|
+
genelastic/common/elastic.py,sha256=0XpECyNMC9RzQN1X7u-0IXmHp_KbSjkmGizArLjxTS0,7260
|
|
11
|
+
genelastic/common/exceptions.py,sha256=l4sQBW91OuzV2R7DhrVzEMDrfsyKM-iVYufp2Cv6rR8,159
|
|
12
|
+
genelastic/common/types.py,sha256=JeQ6XzIemIF1QgvprGA0UxFHyJSKjEkgMaIumOvpbNg,1013
|
|
13
|
+
genelastic/import_data/__init__.py,sha256=Ir_fI0BrdC44JiiQaWm29_SyHbDe2CQsElKHgQxELCI,602
|
|
14
|
+
genelastic/import_data/analyses.py,sha256=yA4DKtXIUR7JTCp7PEndEjI6FOooW-6lR9vkqTZ0Izs,1861
|
|
15
|
+
genelastic/import_data/analysis.py,sha256=MuHYfIWFFwHfCp9tDkWTtVKkg5AasASwrdOUZ177EeM,7799
|
|
16
|
+
genelastic/import_data/bi_process.py,sha256=CBBGx6oO7KYzUypXzpaopGY4J_nAKcDi2UPJe7B63Bs,650
|
|
17
|
+
genelastic/import_data/bi_processes.py,sha256=Kv93NOntycEiB_BldoMxHNfM6sJvgHQvVqrUSSEb_VA,1365
|
|
18
|
+
genelastic/import_data/cli_gen_data.py,sha256=WYvSgiC30pDvhQuGMxtDKookUF9IQpjQP6UMANvvHSU,3012
|
|
19
|
+
genelastic/import_data/cli_import.py,sha256=Ej-EqaBUNoVHGCefzLsmFbKaXNTaOBs8xJsEdRhbbLo,11622
|
|
20
|
+
genelastic/import_data/cli_info.py,sha256=ue0Pf5cJvrmJ_bBZKG2CXkXDsQO3zQ6CIXhtUwpqXhU,7301
|
|
21
|
+
genelastic/import_data/cli_integrity.py,sha256=lOMA-I1iPXiJD4X80Xg91NKO-fa3Su0uSqBUtdMKU4Q,12282
|
|
22
|
+
genelastic/import_data/cli_validate.py,sha256=AcirmmJQWFDrpiPUNTYnpFjSnvD6YyStYJHIbfi1Ly0,1515
|
|
23
|
+
genelastic/import_data/constants.py,sha256=AKYXdDqWkDzvt-laZqbWlN6C9IoQSSqD70mpSWMU6NQ,760
|
|
24
|
+
genelastic/import_data/data_file.py,sha256=P5oe_yCVQQAALetrUs93Kl1XuqoLQS0iMkeEqPKaY7g,2661
|
|
25
|
+
genelastic/import_data/filename_pattern.py,sha256=3QXOF5ZG9vFA8KG7XXucpA7EIN3dqZmmtXnuFbh0bUQ,1709
|
|
26
|
+
genelastic/import_data/import_bundle.py,sha256=7W0hKwoxwGDtWEGV_NQvyesiNx1lCMpSaqKohpTLmtY,5030
|
|
27
|
+
genelastic/import_data/import_bundle_factory.py,sha256=COdMLTTrx_Y4svZAHVJ5Y0rHM4eQj027rFJiQ66u36M,9546
|
|
28
|
+
genelastic/import_data/logger.py,sha256=X12LBoNTmV6iR8vEPyYIMYSUeQ3LpTvDO69tQYDiOuA,1915
|
|
29
|
+
genelastic/import_data/random_bundle.py,sha256=Fk4oudK42pboGI14aaJjh8tTJxSxYNyF_89BBJ532RY,13821
|
|
30
|
+
genelastic/import_data/tags.py,sha256=N6_dGYqQy2QTN6AEzoEXPxRFmtq3GgTfpId-RjBstJY,4195
|
|
31
|
+
genelastic/import_data/wet_process.py,sha256=2SgN1yZFQA8Hb4ZNNeHpUhmeWlJDApqdjc_oq6ZF0jA,694
|
|
32
|
+
genelastic/import_data/wet_processes.py,sha256=mVUfFG0QUboTEoD3c-Hd1Z2_Tvid_yrKJG_XdC4n6oA,1535
|
|
33
|
+
genelastic/ui/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
genelastic/ui/server.py,sha256=7QX-54tT7CbpKje34kVVXnktI0wuZJOSLUChxeUGd20,2805
|
|
35
|
+
genelastic/ui/settings.py,sha256=hmgDNMVKk_OkKSBKyV8Eth57ULCTINwzeBCFAH7DdeU,218
|
|
36
|
+
genelastic-0.7.0.dist-info/METADATA,sha256=dj4cFcq7LXEv9v-turnGvHnK2KZ4zXrDxG0YKe11zPU,3127
|
|
37
|
+
genelastic-0.7.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
38
|
+
genelastic-0.7.0.dist-info/entry_points.txt,sha256=KHtfefg-7-v0MZ0jAcHrb6z3V6hq7UdCiYYIAlD6hIg,284
|
|
39
|
+
genelastic-0.7.0.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
|
|
40
|
+
genelastic-0.7.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
[console_scripts]
|
|
2
|
+
gnl-data = genelastic.import_data.cli_gen_data:main
|
|
3
|
+
gnl-import = genelastic.import_data.cli_import:main
|
|
4
|
+
gnl-info = genelastic.import_data.cli_info:main
|
|
5
|
+
gnl-integrity = genelastic.import_data.cli_integrity:main
|
|
6
|
+
gnl-validate = genelastic.import_data.cli_validate:main
|
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
# pylint: disable=missing-module-docstring
|
|
2
|
-
import argparse
|
|
3
|
-
import logging
|
|
4
|
-
import os
|
|
5
|
-
import random
|
|
6
|
-
import subprocess # nosec
|
|
7
|
-
import sys
|
|
8
|
-
from typing import Dict, List, Sequence, Collection
|
|
9
|
-
|
|
10
|
-
import yaml
|
|
11
|
-
from genelastic.common import add_verbose_control_args
|
|
12
|
-
|
|
13
|
-
from .logger import configure_logging
|
|
14
|
-
|
|
15
|
-
logger = logging.getLogger('genelastic')
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def read_args() -> argparse.Namespace:
|
|
19
|
-
# pylint: disable=R0801
|
|
20
|
-
"""Read arguments from command line."""
|
|
21
|
-
parser = argparse.ArgumentParser(description='Genetics data random generator.',
|
|
22
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
23
|
-
allow_abbrev=False)
|
|
24
|
-
add_verbose_control_args(parser)
|
|
25
|
-
parser.add_argument('-d', '--data-folder', dest='data_folder', required=True,
|
|
26
|
-
help='Data destination folder.')
|
|
27
|
-
parser.add_argument('--log-file', dest='log_file', help='Path to a log file.')
|
|
28
|
-
parser.add_argument('-n', '--chrom-nb', dest='chrom_nb', type=int, default=5,
|
|
29
|
-
help='Number of chromosomes to generate.')
|
|
30
|
-
parser.add_argument('-o', '--output-yaml-file', dest='output_file', default='-',
|
|
31
|
-
help='Output YAML file.')
|
|
32
|
-
parser.add_argument('-s', '--chrom-size', dest='chrom_size', type=int, default=2000,
|
|
33
|
-
help='Data size (number of nucleotides) for each chromosome.')
|
|
34
|
-
return parser.parse_args()
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def gen_cov_files(folder: str, nb_chrom: int, chrom_sz: int, prefix: str) -> List[str]:
|
|
38
|
-
"""Generate dummy coverage files. If an error occurs while generating coverage files, exit."""
|
|
39
|
-
files = []
|
|
40
|
-
chrom_end = chrom_sz - 1
|
|
41
|
-
|
|
42
|
-
for chrom in range(1, nb_chrom + 1):
|
|
43
|
-
output_path = os.path.join(folder, f"{prefix}_chr{chrom}_cov.tsv")
|
|
44
|
-
|
|
45
|
-
# gen-cov will output a coverage file to stdout.
|
|
46
|
-
gen_cov_cmd = ["gen-cov", "-c", str(chrom), "-p", f"0-{chrom_end}", "-d", "5-15",
|
|
47
|
-
"-r", "0.1"]
|
|
48
|
-
|
|
49
|
-
try:
|
|
50
|
-
with open(output_path, "w", encoding="utf-8") as f:
|
|
51
|
-
# Redirect the gen-cov output to a file.
|
|
52
|
-
subprocess.run(gen_cov_cmd, stdout=f, check=True) # nosec
|
|
53
|
-
|
|
54
|
-
except (subprocess.CalledProcessError, FileNotFoundError, OSError) as e:
|
|
55
|
-
logger.error(e)
|
|
56
|
-
sys.exit(1)
|
|
57
|
-
|
|
58
|
-
files.append(output_path)
|
|
59
|
-
|
|
60
|
-
return files
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def gen_vcf_files(folder: str, nb_chrom: int, chrom_sz: int, prefix: str) -> List[str]:
|
|
64
|
-
"""Generate dummy VCF files. If an error occurs while generating VCFs, exit."""
|
|
65
|
-
files = []
|
|
66
|
-
for chrom in range(1, nb_chrom + 1):
|
|
67
|
-
output_path = os.path.join(folder, f"{prefix}_chr{chrom}.vcf")
|
|
68
|
-
|
|
69
|
-
# gen-fasta will output a FASTA to stdout.
|
|
70
|
-
gen_fasta_cmd = ["gen-fasta", "-s", f"chr{chrom}", "-n", str(chrom_sz)]
|
|
71
|
-
# gen-vcf will output a VCF to stdout.
|
|
72
|
-
gen_vcf_cmd = ["gen-vcf", "--snp-rate", "0.02", "--ins-rate", "0.01", "--del-rate", "0.01"]
|
|
73
|
-
|
|
74
|
-
try:
|
|
75
|
-
# Pipe the output of gen-fasta to the stdin of gen-vcf.
|
|
76
|
-
with subprocess.Popen(gen_fasta_cmd, stdout=subprocess.PIPE) as gen_fasta_proc: # nosec
|
|
77
|
-
# Redirect the gen-vcf output to a file.
|
|
78
|
-
with open(output_path, "w", encoding="utf-8") as f:
|
|
79
|
-
subprocess.run(gen_vcf_cmd,
|
|
80
|
-
stdin=gen_fasta_proc.stdout, stdout=f, check=True) # nosec
|
|
81
|
-
except (subprocess.CalledProcessError, FileNotFoundError, OSError) as e:
|
|
82
|
-
logger.error(e)
|
|
83
|
-
sys.exit(1)
|
|
84
|
-
|
|
85
|
-
files.append(output_path)
|
|
86
|
-
|
|
87
|
-
return files
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def gen_name(chars: str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', n: int = 4) -> str:
|
|
91
|
-
"""Generate a random alphanumerical name."""
|
|
92
|
-
return ''.join(random.sample(list(chars), n))
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def gen_data(folder: str, nb_chrom: int, chrom_sz: int) -> (
|
|
96
|
-
Dict)[str, int | Sequence[Collection[str]]]:
|
|
97
|
-
"""Generate dummy analysis following the V3 YAML schema."""
|
|
98
|
-
# Set metadata
|
|
99
|
-
sample_name = "HG0003"
|
|
100
|
-
source = "CNRGH"
|
|
101
|
-
barcode = gen_name(n=6)
|
|
102
|
-
wet_process = "novaseqxplus-10b"
|
|
103
|
-
bi_process = "dragen-4123"
|
|
104
|
-
reference_genome = "hg38"
|
|
105
|
-
prefix = f'{sample_name}_{source}_{wet_process}_{bi_process}_{barcode}_{reference_genome}'
|
|
106
|
-
|
|
107
|
-
wet_processes = [{
|
|
108
|
-
"proc_id": "novaseqxplus-10b",
|
|
109
|
-
"manufacturer": "illumina",
|
|
110
|
-
"sequencer": "novaseqxplus",
|
|
111
|
-
"generic_kit": "truseq-illumina",
|
|
112
|
-
"fragmentation": 350,
|
|
113
|
-
"reads_size": 300,
|
|
114
|
-
"input_type": "gdna",
|
|
115
|
-
"amplification": "pcr-free",
|
|
116
|
-
"flowcell_type": "10b",
|
|
117
|
-
"sequencing_type": "wgs",
|
|
118
|
-
}]
|
|
119
|
-
|
|
120
|
-
bi_processes = [{
|
|
121
|
-
"proc_id": "dragen-4123",
|
|
122
|
-
"name": "dragen",
|
|
123
|
-
"pipeline_version": "4.1.2.3",
|
|
124
|
-
"steps": [
|
|
125
|
-
{"name": "basecalling", "cmd": "bclconvert", "version": "3.9.3.2"},
|
|
126
|
-
{"name": "trimming", "cmd": "dragen"},
|
|
127
|
-
{"name": "mapping", "cmd": "dragmap"},
|
|
128
|
-
{"name": "postmapping", "cmd": "dragen", "version": "4.1.23"},
|
|
129
|
-
{"name": "smallvarcalling", "cmd": "dragen", "version": "4.1.23"},
|
|
130
|
-
{"name": "svcalling", "cmd": "dragen", "version": "4.1.23"},
|
|
131
|
-
{"name": "secondary_qc", "cmd": "dragen", "version": "4.1.23"}
|
|
132
|
-
],
|
|
133
|
-
"sequencing_type": "wgs"
|
|
134
|
-
}]
|
|
135
|
-
|
|
136
|
-
analyses = [{
|
|
137
|
-
'file_prefix': '%S_%F_%W_%B_%A_%R_chr[0-9]+',
|
|
138
|
-
'sample_name': sample_name,
|
|
139
|
-
'source': source,
|
|
140
|
-
'barcode': barcode,
|
|
141
|
-
'wet_process': "novaseqxplus-10b",
|
|
142
|
-
'bi_process': "dragen-4123",
|
|
143
|
-
'reference_genome': reference_genome,
|
|
144
|
-
'flowcell': gen_name(n=8),
|
|
145
|
-
'lanes': [random.randint(1, 10)], # nosec
|
|
146
|
-
'seq_indices': ['DUAL219', 'DUAL222', 'DUAL225', 'DUAL228', 'DUAL289'],
|
|
147
|
-
'qc_comment': "",
|
|
148
|
-
'data_path': folder,
|
|
149
|
-
}]
|
|
150
|
-
|
|
151
|
-
gen_vcf_files(folder, nb_chrom=nb_chrom, chrom_sz=chrom_sz, prefix=prefix)
|
|
152
|
-
gen_cov_files(folder, nb_chrom=nb_chrom, chrom_sz=chrom_sz, prefix=prefix)
|
|
153
|
-
|
|
154
|
-
return {
|
|
155
|
-
'version': 3,
|
|
156
|
-
'analyses': analyses,
|
|
157
|
-
'bi_processes': bi_processes,
|
|
158
|
-
'wet_processes': wet_processes
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
# Write import bundle YAML
|
|
163
|
-
def write_yaml(file: str, data: Dict[str, int | Sequence[Collection[str]]]) -> None:
|
|
164
|
-
"""Write YAML to stdout or in a file."""
|
|
165
|
-
# Standard output
|
|
166
|
-
if file == '-':
|
|
167
|
-
print('---')
|
|
168
|
-
yaml.dump(data, sys.stdout)
|
|
169
|
-
|
|
170
|
-
# File
|
|
171
|
-
else:
|
|
172
|
-
with open(file, 'w', encoding="utf-8") as f:
|
|
173
|
-
print('---', file=f)
|
|
174
|
-
yaml.dump(data, f)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def main() -> None:
|
|
178
|
-
"""Entry point of the gen-data script."""
|
|
179
|
-
# Read command line arguments
|
|
180
|
-
args = read_args()
|
|
181
|
-
|
|
182
|
-
# Configure logging
|
|
183
|
-
configure_logging(args.verbose, log_file=args.log_file)
|
|
184
|
-
logger.debug("Arguments: %s", args)
|
|
185
|
-
|
|
186
|
-
# Generate data
|
|
187
|
-
data = gen_data(args.data_folder, nb_chrom=args.chrom_nb, chrom_sz=args.chrom_size)
|
|
188
|
-
|
|
189
|
-
# Write to stdout or file
|
|
190
|
-
write_yaml(args.output_file, data)
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
if __name__ == '__main__':
|
|
194
|
-
main()
|