genelastic 0.6.1__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genelastic/api/cli_start_api.py +18 -0
- genelastic/api/extends/example.py +2 -3
- genelastic/api/extends/example.yml +20 -0
- genelastic/api/routes.py +160 -23
- genelastic/api/server.py +42 -31
- genelastic/api/settings.py +5 -8
- genelastic/api/specification.yml +350 -0
- genelastic/common/__init__.py +41 -9
- genelastic/common/cli.py +103 -23
- genelastic/common/elastic.py +80 -49
- genelastic/common/exceptions.py +0 -2
- genelastic/common/server.py +51 -0
- genelastic/common/types.py +20 -15
- genelastic/import_data/__init__.py +23 -5
- genelastic/import_data/analyses.py +17 -20
- genelastic/import_data/analysis.py +69 -65
- genelastic/import_data/bi_process.py +7 -5
- genelastic/import_data/bi_processes.py +8 -8
- genelastic/import_data/cli_gen_data.py +143 -0
- genelastic/import_data/cli_import.py +379 -0
- genelastic/import_data/{info.py → cli_info.py} +104 -75
- genelastic/import_data/cli_integrity.py +384 -0
- genelastic/import_data/cli_validate.py +54 -0
- genelastic/import_data/constants.py +11 -32
- genelastic/import_data/data_file.py +23 -20
- genelastic/import_data/filename_pattern.py +26 -32
- genelastic/import_data/import_bundle.py +56 -47
- genelastic/import_data/import_bundle_factory.py +166 -158
- genelastic/import_data/logger.py +22 -18
- genelastic/import_data/random_bundle.py +425 -0
- genelastic/import_data/tags.py +46 -26
- genelastic/import_data/wet_process.py +8 -4
- genelastic/import_data/wet_processes.py +13 -8
- genelastic/ui/__init__.py +0 -0
- genelastic/ui/cli_start_ui.py +18 -0
- genelastic/ui/routes.py +86 -0
- genelastic/ui/server.py +14 -0
- genelastic/ui/settings.py +7 -0
- genelastic/ui/templates/analyses.html +11 -0
- genelastic/ui/templates/bi_processes.html +11 -0
- genelastic/ui/templates/home.html +4 -0
- genelastic/ui/templates/layout.html +34 -0
- genelastic/ui/templates/version.html +9 -0
- genelastic/ui/templates/wet_processes.html +11 -0
- genelastic-0.8.0.dist-info/METADATA +109 -0
- genelastic-0.8.0.dist-info/RECORD +52 -0
- {genelastic-0.6.1.dist-info → genelastic-0.8.0.dist-info}/WHEEL +1 -1
- genelastic-0.8.0.dist-info/entry_points.txt +8 -0
- genelastic/import_data/gen_data.py +0 -194
- genelastic/import_data/import_data.py +0 -292
- genelastic/import_data/integrity.py +0 -290
- genelastic/import_data/validate_data.py +0 -43
- genelastic-0.6.1.dist-info/METADATA +0 -41
- genelastic-0.6.1.dist-info/RECORD +0 -36
- genelastic-0.6.1.dist-info/entry_points.txt +0 -6
- {genelastic-0.6.1.dist-info → genelastic-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# pylint: disable=missing-module-docstring
|
|
2
1
|
import logging
|
|
3
2
|
import typing
|
|
4
3
|
|
|
@@ -6,19 +5,22 @@ from genelastic.common import BundleDict
|
|
|
6
5
|
|
|
7
6
|
from .wet_process import WetProcess
|
|
8
7
|
|
|
9
|
-
logger = logging.getLogger(
|
|
8
|
+
logger = logging.getLogger("genelastic")
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
class WetProcesses:
|
|
13
12
|
"""Class WetProcesses is a container of WetProces objects."""
|
|
14
13
|
|
|
15
14
|
def __init__(self) -> None:
|
|
16
|
-
|
|
15
|
+
"""Create an empty container."""
|
|
16
|
+
self._dict: dict[str, WetProcess] = {}
|
|
17
17
|
|
|
18
18
|
def __len__(self) -> int:
|
|
19
|
+
"""Return the number of WetProcess objects inside the container."""
|
|
19
20
|
return len(self._dict)
|
|
20
21
|
|
|
21
22
|
def __getitem__(self, key: str) -> WetProcess:
|
|
23
|
+
"""Return a WetProcess present in the container by its key."""
|
|
22
24
|
return self._dict[key]
|
|
23
25
|
|
|
24
26
|
def add(self, process: WetProcess) -> None:
|
|
@@ -26,20 +28,23 @@ class WetProcesses:
|
|
|
26
28
|
If a WetProces object with the same ID already exists in the container, the program exits.
|
|
27
29
|
"""
|
|
28
30
|
if process.id in self._dict:
|
|
29
|
-
|
|
31
|
+
msg = (
|
|
32
|
+
f"A wet process with the id '{process.id}' is already present."
|
|
33
|
+
)
|
|
34
|
+
raise ValueError(msg)
|
|
30
35
|
|
|
31
36
|
# Add one WetProcess object.
|
|
32
37
|
self._dict[process.id] = process
|
|
33
38
|
|
|
34
|
-
def get_process_ids(self) ->
|
|
39
|
+
def get_process_ids(self) -> set[str]:
|
|
35
40
|
"""Get a list of the wet processes IDs."""
|
|
36
41
|
return set(self._dict.keys())
|
|
37
42
|
|
|
38
43
|
@classmethod
|
|
39
|
-
def from_array_of_dicts(
|
|
40
|
-
|
|
44
|
+
def from_array_of_dicts(
|
|
45
|
+
cls, arr: typing.Sequence[BundleDict]
|
|
46
|
+
) -> typing.Self:
|
|
41
47
|
"""Build a WetProcesses instance."""
|
|
42
|
-
|
|
43
48
|
wet_processes = cls()
|
|
44
49
|
|
|
45
50
|
for d in arr:
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from genelastic.common import parse_server_launch_args
|
|
2
|
+
from genelastic.common.server import start_dev_server, start_prod_server
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main() -> None:
|
|
6
|
+
app_module = "genelastic.ui.server:app"
|
|
7
|
+
args = parse_server_launch_args("Start UI server.", 8001)
|
|
8
|
+
if args.env == "dev":
|
|
9
|
+
start_dev_server(app_module, args)
|
|
10
|
+
elif args.env == "prod":
|
|
11
|
+
start_prod_server(app_module, args)
|
|
12
|
+
else:
|
|
13
|
+
msg = f"Environment '{args.env}' is not implemented."
|
|
14
|
+
raise NotImplementedError(msg)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
if __name__ == "__main__":
|
|
18
|
+
main()
|
genelastic/ui/routes.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from flask import Blueprint, current_app, render_template
|
|
3
|
+
|
|
4
|
+
routes_bp = Blueprint("routes", __name__)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@routes_bp.route("/")
|
|
8
|
+
def home() -> str:
|
|
9
|
+
api_url = current_app.config["GENUI_API_URL"]
|
|
10
|
+
try:
|
|
11
|
+
version_reponse = requests.get(f"{api_url}version", timeout=20)
|
|
12
|
+
version = version_reponse.json().get("version")
|
|
13
|
+
wet_processes_reponse = requests.get(
|
|
14
|
+
f"{api_url}wet_processes", timeout=20
|
|
15
|
+
)
|
|
16
|
+
wet_processes = wet_processes_reponse.json()
|
|
17
|
+
bi_processes_reponse = requests.get(
|
|
18
|
+
f"{api_url}bi_processes", timeout=20
|
|
19
|
+
)
|
|
20
|
+
bi_processes = bi_processes_reponse.json()
|
|
21
|
+
analyses_reponse = requests.get(f"{api_url}analyses", timeout=20)
|
|
22
|
+
analyses = analyses_reponse.json()
|
|
23
|
+
except requests.exceptions.RequestException:
|
|
24
|
+
version = "API not reachable"
|
|
25
|
+
wet_processes = []
|
|
26
|
+
bi_processes = []
|
|
27
|
+
analyses = []
|
|
28
|
+
return render_template(
|
|
29
|
+
"home.html",
|
|
30
|
+
version=version,
|
|
31
|
+
wet_processes=wet_processes,
|
|
32
|
+
bi_processes=bi_processes,
|
|
33
|
+
analyses=analyses,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@routes_bp.route("/analyses")
|
|
38
|
+
def show_analyses() -> str:
|
|
39
|
+
api_url = current_app.config["GENUI_API_URL"]
|
|
40
|
+
try:
|
|
41
|
+
analyses_reponse = requests.get(f"{api_url}analyses", timeout=20)
|
|
42
|
+
analyses = analyses_reponse.json()
|
|
43
|
+
except requests.exceptions.RequestException:
|
|
44
|
+
analyses = ["Error fetching data."]
|
|
45
|
+
|
|
46
|
+
return render_template("analyses.html", analyses=analyses)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@routes_bp.route("/bi_processes")
|
|
50
|
+
def show_bi_processes() -> str:
|
|
51
|
+
api_url = current_app.config["GENUI_API_URL"]
|
|
52
|
+
try:
|
|
53
|
+
bi_processes_reponse = requests.get(
|
|
54
|
+
f"{api_url}bi_processes", timeout=20
|
|
55
|
+
)
|
|
56
|
+
bi_processes = bi_processes_reponse.json()
|
|
57
|
+
except requests.exceptions.RequestException:
|
|
58
|
+
bi_processes = ["Error fetching data."]
|
|
59
|
+
|
|
60
|
+
return render_template("bi_processes.html", bi_processes=bi_processes)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@routes_bp.route("/wet_processes")
|
|
64
|
+
def show_wet_processes() -> str:
|
|
65
|
+
api_url = current_app.config["GENUI_API_URL"]
|
|
66
|
+
try:
|
|
67
|
+
wet_processes_reponse = requests.get(
|
|
68
|
+
f"{api_url}wet_processes", timeout=20
|
|
69
|
+
)
|
|
70
|
+
wet_processes = wet_processes_reponse.json()
|
|
71
|
+
except requests.exceptions.RequestException:
|
|
72
|
+
wet_processes = ["Error fetching data."]
|
|
73
|
+
|
|
74
|
+
return render_template("wet_processes.html", wet_processes=wet_processes)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@routes_bp.route("/version")
|
|
78
|
+
def show_version() -> str:
|
|
79
|
+
api_url = current_app.config["GENUI_API_URL"]
|
|
80
|
+
try:
|
|
81
|
+
version_reponse = requests.get(f"{api_url}version", timeout=20)
|
|
82
|
+
version = version_reponse.json().get("version", "Version not found")
|
|
83
|
+
except requests.exceptions.RequestException:
|
|
84
|
+
version = "Error fetching version."
|
|
85
|
+
|
|
86
|
+
return render_template("version.html", version=version)
|
genelastic/ui/server.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from asgiref.wsgi import WsgiToAsgi
|
|
2
|
+
from flask import Flask
|
|
3
|
+
|
|
4
|
+
from .routes import routes_bp
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def create_app() -> WsgiToAsgi:
|
|
8
|
+
flask_app = Flask(__name__)
|
|
9
|
+
flask_app.config.from_object("genelastic.ui.settings")
|
|
10
|
+
flask_app.register_blueprint(routes_bp)
|
|
11
|
+
return WsgiToAsgi(flask_app) # type: ignore[no-untyped-call]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
app = create_app()
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{% extends "layout.html" %}
|
|
2
|
+
{% block title %}Bi Processes{% endblock %}
|
|
3
|
+
{% block content %}
|
|
4
|
+
<h2>List of Bi Processes</h2>
|
|
5
|
+
<ul>
|
|
6
|
+
{% for bi_process in bi_processes %}
|
|
7
|
+
<li>{{ bi_process }}</li>
|
|
8
|
+
{% endfor %}
|
|
9
|
+
</ul>
|
|
10
|
+
<a href="/">Back to Home</a>
|
|
11
|
+
{% endblock %}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
6
|
+
<title>{% block title %}Genelastic{% endblock %}</title>
|
|
7
|
+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css"
|
|
8
|
+
rel="stylesheet"
|
|
9
|
+
integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH"
|
|
10
|
+
crossorigin="anonymous"
|
|
11
|
+
>
|
|
12
|
+
|
|
13
|
+
</head>
|
|
14
|
+
<body>
|
|
15
|
+
<h1>Welcome to DEMO - genelastic UI</h1>
|
|
16
|
+
<nav>
|
|
17
|
+
<a href="/">Home</a>
|
|
18
|
+
<a href="/analyses">Analyses</a>
|
|
19
|
+
<a href="/wet_processes">Wet Processes</a>
|
|
20
|
+
<a href="/bi_processes">Bi Processes</a>
|
|
21
|
+
<a href="/version">Version</a>
|
|
22
|
+
</nav>
|
|
23
|
+
<hr>
|
|
24
|
+
<div id="content">
|
|
25
|
+
{% block content %}{% endblock %}
|
|
26
|
+
</div>
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"
|
|
30
|
+
integrity="sha384-YvpcrYf0tY3lHB60NNkmXc5s9fDVZLESaAA55NDzOxhy9GkcIdslK1eN7N6jIeHz"
|
|
31
|
+
crossorigin="anonymous">
|
|
32
|
+
</script>
|
|
33
|
+
</body>
|
|
34
|
+
</html>
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
{% extends "layout.html" %}
|
|
2
|
+
{% block title %}Version{% endblock %}
|
|
3
|
+
{% block content %}
|
|
4
|
+
<h2 class="text-center">Genelastic Version</h2>
|
|
5
|
+
<p class="text-center">{{ version }}</p>
|
|
6
|
+
<div class="mt-4 text-center">
|
|
7
|
+
<a href="/" class="btn btn-primary">Back to Home</a>
|
|
8
|
+
</div>
|
|
9
|
+
{% endblock %}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{% extends "layout.html" %}
|
|
2
|
+
{% block title %}Wet Processes{% endblock %}
|
|
3
|
+
{% block content %}
|
|
4
|
+
<h2>List of Wet Processes</h2>
|
|
5
|
+
<ul>
|
|
6
|
+
{% for wet_process in wet_processes %}
|
|
7
|
+
<li>{{ wet_process }}</li>
|
|
8
|
+
{% endfor %}
|
|
9
|
+
</ul>
|
|
10
|
+
<a href="/">Back to Home</a>
|
|
11
|
+
{% endblock %}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: genelastic
|
|
3
|
+
Version: 0.8.0
|
|
4
|
+
Summary: Generate and store genetic data into an Elasticsearch database.
|
|
5
|
+
Author: CNRGH
|
|
6
|
+
Author-email: Pierrick ROGER <pierrick.roger@cnrgh.fr>, Maxime BLANCHON <maxime.blanchon@cnrgh.fr>
|
|
7
|
+
License: CeCILL
|
|
8
|
+
Keywords: CNRGH,genelastic,generation,storage,elasticsearch,database
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: elasticsearch
|
|
16
|
+
Requires-Dist: PyVCF3
|
|
17
|
+
Requires-Dist: schema
|
|
18
|
+
Requires-Dist: PyYAML
|
|
19
|
+
Requires-Dist: biophony<1.4,>=1.3.0
|
|
20
|
+
Requires-Dist: colorlog
|
|
21
|
+
Provides-Extra: tests
|
|
22
|
+
Requires-Dist: pytest; extra == "tests"
|
|
23
|
+
Requires-Dist: mypy; extra == "tests"
|
|
24
|
+
Requires-Dist: coverage; extra == "tests"
|
|
25
|
+
Requires-Dist: yamllint; extra == "tests"
|
|
26
|
+
Requires-Dist: types-PyYAML; extra == "tests"
|
|
27
|
+
Requires-Dist: ruff<0.9,>=0.8.1; extra == "tests"
|
|
28
|
+
Requires-Dist: pre-commit<4.1,>=4.0.1; extra == "tests"
|
|
29
|
+
Requires-Dist: types-requests; extra == "tests"
|
|
30
|
+
Requires-Dist: ansible-core>=2.17.0; extra == "tests"
|
|
31
|
+
Requires-Dist: ansible-lint<25,>=24.12.2; extra == "tests"
|
|
32
|
+
Provides-Extra: docs
|
|
33
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
34
|
+
Requires-Dist: sphinx-autoapi; extra == "docs"
|
|
35
|
+
Requires-Dist: furo; extra == "docs"
|
|
36
|
+
Provides-Extra: api
|
|
37
|
+
Requires-Dist: flask; extra == "api"
|
|
38
|
+
Requires-Dist: elasticsearch; extra == "api"
|
|
39
|
+
Requires-Dist: environs; extra == "api"
|
|
40
|
+
Requires-Dist: connexion[flask,swagger-ui,uvicorn]; extra == "api"
|
|
41
|
+
Requires-Dist: gunicorn; extra == "api"
|
|
42
|
+
Provides-Extra: ui
|
|
43
|
+
Requires-Dist: flask; extra == "ui"
|
|
44
|
+
Requires-Dist: requests; extra == "ui"
|
|
45
|
+
Requires-Dist: environs; extra == "ui"
|
|
46
|
+
Requires-Dist: uvicorn; extra == "ui"
|
|
47
|
+
Requires-Dist: asgiref; extra == "ui"
|
|
48
|
+
Requires-Dist: gunicorn; extra == "ui"
|
|
49
|
+
|
|
50
|
+
# genelastic
|
|
51
|
+
|
|
52
|
+
Storing of genetics data into an Elasticsearch database.
|
|
53
|
+
|
|
54
|
+
## Prerequisites
|
|
55
|
+
|
|
56
|
+
- `python` >= 3.11
|
|
57
|
+
- `make`
|
|
58
|
+
|
|
59
|
+
## Installation
|
|
60
|
+
|
|
61
|
+
To install dependencies, run the following command:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
python -m venv .venv
|
|
65
|
+
source .venv/bin/activate
|
|
66
|
+
make install.deps
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Configuration
|
|
70
|
+
|
|
71
|
+
To start the **API server**, the following environment variables should be defined:
|
|
72
|
+
|
|
73
|
+
- `GENAPI_ES_URL`: URL of the Elasticsearch server,
|
|
74
|
+
- `GENAPI_ES_ENCODED_API_KEY`: Encoded API key,
|
|
75
|
+
- `GENAPI_ES_INDEX_PREFIX`: Prefix to identify indices of interest,
|
|
76
|
+
- `GENAPI_ES_CERT_FP`: Certificate fingerprint of the Elasticsearch server.
|
|
77
|
+
|
|
78
|
+
Then, run the following command:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
make start-api
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
To start the **UI server**, the following environment variables should be defined:
|
|
85
|
+
|
|
86
|
+
- `GENUI_API_URL`: URL of the API server.
|
|
87
|
+
|
|
88
|
+
Then, run the following command:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
make start-ui
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Developers
|
|
95
|
+
|
|
96
|
+
This project uses [pre-commit](https://pre-commit.com/) to manage Git hooks scripts. To install project hooks, run:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
pre-commit install
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
After that, each commit will succeed only if all hooks (defined in `.pre-commit-config.yaml`) pass.
|
|
103
|
+
|
|
104
|
+
If necessary (though not recommended),
|
|
105
|
+
you can skip these hooks by using the `--no-verify` / `-n` option when committing:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
git commit -m "My commit message" --no-verify # This commit will not run installed hooks.
|
|
109
|
+
```
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
genelastic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
genelastic/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
genelastic/api/cli_start_api.py,sha256=7sGgEDUC8qBSWxZsiNw3xirU7f8KP0e1guGsX0V8Fno,551
|
|
4
|
+
genelastic/api/routes.py,sha256=O_bZGhKex04tWauE1fYRXjNmR2smIA4JZShwcakbJJA,6938
|
|
5
|
+
genelastic/api/server.py,sha256=VRMsI9BD0OY5H9dXx39f6M64qpBIptbkxMeATvII-AY,2360
|
|
6
|
+
genelastic/api/settings.py,sha256=dbKQib2QQisxvi4riwJ67iB9imu-x7ZXOcKjeXuO0rU,316
|
|
7
|
+
genelastic/api/specification.yml,sha256=ZbjPxk5aud76yeVyqBe410g4hQJDeZImqswa1EMtD00,10116
|
|
8
|
+
genelastic/api/extends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
genelastic/api/extends/example.py,sha256=5vOw2ToYWzOj-vYkOLbqoa1MCnXIvKuv7xW6qBQ-eHU,164
|
|
10
|
+
genelastic/api/extends/example.yml,sha256=oStFpgLIOAgs01PCJWZEG1sCunAKAxLa4gNoopUy0Ko,543
|
|
11
|
+
genelastic/common/__init__.py,sha256=Rg9BZj8UJh24cRU7xvL67Ju3vLSpCEdEnZTEMrOqsx4,1012
|
|
12
|
+
genelastic/common/cli.py,sha256=uXd2alWple3lLi6nyvtFUH86qhlqUljzY8ry8rvajVI,3253
|
|
13
|
+
genelastic/common/elastic.py,sha256=0XpECyNMC9RzQN1X7u-0IXmHp_KbSjkmGizArLjxTS0,7260
|
|
14
|
+
genelastic/common/exceptions.py,sha256=l4sQBW91OuzV2R7DhrVzEMDrfsyKM-iVYufp2Cv6rR8,159
|
|
15
|
+
genelastic/common/server.py,sha256=o1AbarQ7JKEeYB5ZcvdZUdInSquLsq5dpFL--Jo7P5k,1401
|
|
16
|
+
genelastic/common/types.py,sha256=JeQ6XzIemIF1QgvprGA0UxFHyJSKjEkgMaIumOvpbNg,1013
|
|
17
|
+
genelastic/import_data/__init__.py,sha256=Ir_fI0BrdC44JiiQaWm29_SyHbDe2CQsElKHgQxELCI,602
|
|
18
|
+
genelastic/import_data/analyses.py,sha256=yA4DKtXIUR7JTCp7PEndEjI6FOooW-6lR9vkqTZ0Izs,1861
|
|
19
|
+
genelastic/import_data/analysis.py,sha256=MuHYfIWFFwHfCp9tDkWTtVKkg5AasASwrdOUZ177EeM,7799
|
|
20
|
+
genelastic/import_data/bi_process.py,sha256=CBBGx6oO7KYzUypXzpaopGY4J_nAKcDi2UPJe7B63Bs,650
|
|
21
|
+
genelastic/import_data/bi_processes.py,sha256=Kv93NOntycEiB_BldoMxHNfM6sJvgHQvVqrUSSEb_VA,1365
|
|
22
|
+
genelastic/import_data/cli_gen_data.py,sha256=teK5G7mcx6Y5Yi0idOdRyrnBNe1emKKLQKRSBElQQbg,3979
|
|
23
|
+
genelastic/import_data/cli_import.py,sha256=Ej-EqaBUNoVHGCefzLsmFbKaXNTaOBs8xJsEdRhbbLo,11622
|
|
24
|
+
genelastic/import_data/cli_info.py,sha256=ue0Pf5cJvrmJ_bBZKG2CXkXDsQO3zQ6CIXhtUwpqXhU,7301
|
|
25
|
+
genelastic/import_data/cli_integrity.py,sha256=lOMA-I1iPXiJD4X80Xg91NKO-fa3Su0uSqBUtdMKU4Q,12282
|
|
26
|
+
genelastic/import_data/cli_validate.py,sha256=AcirmmJQWFDrpiPUNTYnpFjSnvD6YyStYJHIbfi1Ly0,1515
|
|
27
|
+
genelastic/import_data/constants.py,sha256=AKYXdDqWkDzvt-laZqbWlN6C9IoQSSqD70mpSWMU6NQ,760
|
|
28
|
+
genelastic/import_data/data_file.py,sha256=P5oe_yCVQQAALetrUs93Kl1XuqoLQS0iMkeEqPKaY7g,2661
|
|
29
|
+
genelastic/import_data/filename_pattern.py,sha256=3QXOF5ZG9vFA8KG7XXucpA7EIN3dqZmmtXnuFbh0bUQ,1709
|
|
30
|
+
genelastic/import_data/import_bundle.py,sha256=7W0hKwoxwGDtWEGV_NQvyesiNx1lCMpSaqKohpTLmtY,5030
|
|
31
|
+
genelastic/import_data/import_bundle_factory.py,sha256=COdMLTTrx_Y4svZAHVJ5Y0rHM4eQj027rFJiQ66u36M,9546
|
|
32
|
+
genelastic/import_data/logger.py,sha256=X12LBoNTmV6iR8vEPyYIMYSUeQ3LpTvDO69tQYDiOuA,1915
|
|
33
|
+
genelastic/import_data/random_bundle.py,sha256=IR3XoKwKEfWAow9ZH8lb-vmCKvtnU6M-35de4JPiIgg,14811
|
|
34
|
+
genelastic/import_data/tags.py,sha256=N6_dGYqQy2QTN6AEzoEXPxRFmtq3GgTfpId-RjBstJY,4195
|
|
35
|
+
genelastic/import_data/wet_process.py,sha256=2SgN1yZFQA8Hb4ZNNeHpUhmeWlJDApqdjc_oq6ZF0jA,694
|
|
36
|
+
genelastic/import_data/wet_processes.py,sha256=mVUfFG0QUboTEoD3c-Hd1Z2_Tvid_yrKJG_XdC4n6oA,1535
|
|
37
|
+
genelastic/ui/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
+
genelastic/ui/cli_start_ui.py,sha256=4on9dTtBJ5SeD78kOWdOaCcKC0YDe201d-t9tjocYzw,549
|
|
39
|
+
genelastic/ui/routes.py,sha256=OHwWv80aA75Gk0q8RwzSv-ayYZNEeI6QYXJ6CzzHuy0,2852
|
|
40
|
+
genelastic/ui/server.py,sha256=MY3Hzt5BleeeG7n72gFjIhu3lYytST7ZSROBAcrjlDw,347
|
|
41
|
+
genelastic/ui/settings.py,sha256=5Xc9bxm42bwkgcRSb_klD0bCqhP4DrfMs4La63fCf0Q,164
|
|
42
|
+
genelastic/ui/templates/analyses.html,sha256=dhdTGQeFuJPjQX0Qh31K0hp5wrU54F3D4hLWlgPLW-w,247
|
|
43
|
+
genelastic/ui/templates/bi_processes.html,sha256=Z6jgXymgKjn1cOL9GccCcKWJPlr-28AzNmlcmZ62lXc,265
|
|
44
|
+
genelastic/ui/templates/home.html,sha256=rNxZFpcY6bXANEDjfs7-lRghI0UfHzaBUHi3r91knns,99
|
|
45
|
+
genelastic/ui/templates/layout.html,sha256=B5a2wzvvPD0muTA4ooStT8kvi8IeCl1nZFV9WAyeqdc,1023
|
|
46
|
+
genelastic/ui/templates/version.html,sha256=v2XxGSPlwWERr-o1a-pdIstR5TllCmGROXy5h2ArafI,286
|
|
47
|
+
genelastic/ui/templates/wet_processes.html,sha256=jM0X08_zk-a8insJDx8bnHKBLu5Xqa6D2-SauYwqEcI,270
|
|
48
|
+
genelastic-0.8.0.dist-info/METADATA,sha256=kK2kz13jf_cWKCxn1wv8c1Ta_ZP8pZsocaIZ7BcDhMY,3286
|
|
49
|
+
genelastic-0.8.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
50
|
+
genelastic-0.8.0.dist-info/entry_points.txt,sha256=IxtkWdVpPE8PnjblebCvKrdw4FnTKmvcHzNJfchkQ-c,381
|
|
51
|
+
genelastic-0.8.0.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
|
|
52
|
+
genelastic-0.8.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
[console_scripts]
|
|
2
|
+
gnl-data = genelastic.import_data.cli_gen_data:main
|
|
3
|
+
gnl-import = genelastic.import_data.cli_import:main
|
|
4
|
+
gnl-info = genelastic.import_data.cli_info:main
|
|
5
|
+
gnl-integrity = genelastic.import_data.cli_integrity:main
|
|
6
|
+
gnl-start-api = genelastic.api.cli_start_api:main
|
|
7
|
+
gnl-start-ui = genelastic.ui.cli_start_ui:main
|
|
8
|
+
gnl-validate = genelastic.import_data.cli_validate:main
|
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
# pylint: disable=missing-module-docstring
|
|
2
|
-
import argparse
|
|
3
|
-
import logging
|
|
4
|
-
import os
|
|
5
|
-
import random
|
|
6
|
-
import subprocess # nosec
|
|
7
|
-
import sys
|
|
8
|
-
from typing import Dict, List, Sequence, Collection
|
|
9
|
-
|
|
10
|
-
import yaml
|
|
11
|
-
from genelastic.common import add_verbose_control_args
|
|
12
|
-
|
|
13
|
-
from .logger import configure_logging
|
|
14
|
-
|
|
15
|
-
logger = logging.getLogger('genelastic')
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def read_args() -> argparse.Namespace:
|
|
19
|
-
# pylint: disable=R0801
|
|
20
|
-
"""Read arguments from command line."""
|
|
21
|
-
parser = argparse.ArgumentParser(description='Genetics data random generator.',
|
|
22
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
23
|
-
allow_abbrev=False)
|
|
24
|
-
add_verbose_control_args(parser)
|
|
25
|
-
parser.add_argument('-d', '--data-folder', dest='data_folder', required=True,
|
|
26
|
-
help='Data destination folder.')
|
|
27
|
-
parser.add_argument('--log-file', dest='log_file', help='Path to a log file.')
|
|
28
|
-
parser.add_argument('-n', '--chrom-nb', dest='chrom_nb', type=int, default=5,
|
|
29
|
-
help='Number of chromosomes to generate.')
|
|
30
|
-
parser.add_argument('-o', '--output-yaml-file', dest='output_file', default='-',
|
|
31
|
-
help='Output YAML file.')
|
|
32
|
-
parser.add_argument('-s', '--chrom-size', dest='chrom_size', type=int, default=2000,
|
|
33
|
-
help='Data size (number of nucleotides) for each chromosome.')
|
|
34
|
-
return parser.parse_args()
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def gen_cov_files(folder: str, nb_chrom: int, chrom_sz: int, prefix: str) -> List[str]:
|
|
38
|
-
"""Generate dummy coverage files. If an error occurs while generating coverage files, exit."""
|
|
39
|
-
files = []
|
|
40
|
-
chrom_end = chrom_sz - 1
|
|
41
|
-
|
|
42
|
-
for chrom in range(1, nb_chrom + 1):
|
|
43
|
-
output_path = os.path.join(folder, f"{prefix}_chr{chrom}_cov.tsv")
|
|
44
|
-
|
|
45
|
-
# gen-cov will output a coverage file to stdout.
|
|
46
|
-
gen_cov_cmd = ["gen-cov", "-c", str(chrom), "-p", f"0-{chrom_end}", "-d", "5-15",
|
|
47
|
-
"-r", "0.1"]
|
|
48
|
-
|
|
49
|
-
try:
|
|
50
|
-
with open(output_path, "w", encoding="utf-8") as f:
|
|
51
|
-
# Redirect the gen-cov output to a file.
|
|
52
|
-
subprocess.run(gen_cov_cmd, stdout=f, check=True) # nosec
|
|
53
|
-
|
|
54
|
-
except (subprocess.CalledProcessError, FileNotFoundError, OSError) as e:
|
|
55
|
-
logger.error(e)
|
|
56
|
-
sys.exit(1)
|
|
57
|
-
|
|
58
|
-
files.append(output_path)
|
|
59
|
-
|
|
60
|
-
return files
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def gen_vcf_files(folder: str, nb_chrom: int, chrom_sz: int, prefix: str) -> List[str]:
|
|
64
|
-
"""Generate dummy VCF files. If an error occurs while generating VCFs, exit."""
|
|
65
|
-
files = []
|
|
66
|
-
for chrom in range(1, nb_chrom + 1):
|
|
67
|
-
output_path = os.path.join(folder, f"{prefix}_chr{chrom}.vcf")
|
|
68
|
-
|
|
69
|
-
# gen-fasta will output a FASTA to stdout.
|
|
70
|
-
gen_fasta_cmd = ["gen-fasta", "-s", f"chr{chrom}", "-n", str(chrom_sz)]
|
|
71
|
-
# gen-vcf will output a VCF to stdout.
|
|
72
|
-
gen_vcf_cmd = ["gen-vcf", "--snp-rate", "0.02", "--ins-rate", "0.01", "--del-rate", "0.01"]
|
|
73
|
-
|
|
74
|
-
try:
|
|
75
|
-
# Pipe the output of gen-fasta to the stdin of gen-vcf.
|
|
76
|
-
with subprocess.Popen(gen_fasta_cmd, stdout=subprocess.PIPE) as gen_fasta_proc: # nosec
|
|
77
|
-
# Redirect the gen-vcf output to a file.
|
|
78
|
-
with open(output_path, "w", encoding="utf-8") as f:
|
|
79
|
-
subprocess.run(gen_vcf_cmd,
|
|
80
|
-
stdin=gen_fasta_proc.stdout, stdout=f, check=True) # nosec
|
|
81
|
-
except (subprocess.CalledProcessError, FileNotFoundError, OSError) as e:
|
|
82
|
-
logger.error(e)
|
|
83
|
-
sys.exit(1)
|
|
84
|
-
|
|
85
|
-
files.append(output_path)
|
|
86
|
-
|
|
87
|
-
return files
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def gen_name(chars: str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', n: int = 4) -> str:
|
|
91
|
-
"""Generate a random alphanumerical name."""
|
|
92
|
-
return ''.join(random.sample(list(chars), n))
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def gen_data(folder: str, nb_chrom: int, chrom_sz: int) -> (
|
|
96
|
-
Dict)[str, int | Sequence[Collection[str]]]:
|
|
97
|
-
"""Generate dummy analysis following the V3 YAML schema."""
|
|
98
|
-
# Set metadata
|
|
99
|
-
sample_name = "HG0003"
|
|
100
|
-
source = "CNRGH"
|
|
101
|
-
barcode = gen_name(n=6)
|
|
102
|
-
wet_process = "novaseqxplus-10b"
|
|
103
|
-
bi_process = "dragen-4123"
|
|
104
|
-
reference_genome = "hg38"
|
|
105
|
-
prefix = f'{sample_name}_{source}_{wet_process}_{bi_process}_{barcode}_{reference_genome}'
|
|
106
|
-
|
|
107
|
-
wet_processes = [{
|
|
108
|
-
"proc_id": "novaseqxplus-10b",
|
|
109
|
-
"manufacturer": "illumina",
|
|
110
|
-
"sequencer": "novaseqxplus",
|
|
111
|
-
"generic_kit": "truseq-illumina",
|
|
112
|
-
"fragmentation": 350,
|
|
113
|
-
"reads_size": 300,
|
|
114
|
-
"input_type": "gdna",
|
|
115
|
-
"amplification": "pcr-free",
|
|
116
|
-
"flowcell_type": "10b",
|
|
117
|
-
"sequencing_type": "wgs",
|
|
118
|
-
}]
|
|
119
|
-
|
|
120
|
-
bi_processes = [{
|
|
121
|
-
"proc_id": "dragen-4123",
|
|
122
|
-
"name": "dragen",
|
|
123
|
-
"pipeline_version": "4.1.2.3",
|
|
124
|
-
"steps": [
|
|
125
|
-
{"name": "basecalling", "cmd": "bclconvert", "version": "3.9.3.2"},
|
|
126
|
-
{"name": "trimming", "cmd": "dragen"},
|
|
127
|
-
{"name": "mapping", "cmd": "dragmap"},
|
|
128
|
-
{"name": "postmapping", "cmd": "dragen", "version": "4.1.23"},
|
|
129
|
-
{"name": "smallvarcalling", "cmd": "dragen", "version": "4.1.23"},
|
|
130
|
-
{"name": "svcalling", "cmd": "dragen", "version": "4.1.23"},
|
|
131
|
-
{"name": "secondary_qc", "cmd": "dragen", "version": "4.1.23"}
|
|
132
|
-
],
|
|
133
|
-
"sequencing_type": "wgs"
|
|
134
|
-
}]
|
|
135
|
-
|
|
136
|
-
analyses = [{
|
|
137
|
-
'file_prefix': '%S_%F_%W_%B_%A_%R_chr[0-9]+',
|
|
138
|
-
'sample_name': sample_name,
|
|
139
|
-
'source': source,
|
|
140
|
-
'barcode': barcode,
|
|
141
|
-
'wet_process': "novaseqxplus-10b",
|
|
142
|
-
'bi_process': "dragen-4123",
|
|
143
|
-
'reference_genome': reference_genome,
|
|
144
|
-
'flowcell': gen_name(n=8),
|
|
145
|
-
'lanes': [random.randint(1, 10)], # nosec
|
|
146
|
-
'seq_indices': ['DUAL219', 'DUAL222', 'DUAL225', 'DUAL228', 'DUAL289'],
|
|
147
|
-
'qc_comment': "",
|
|
148
|
-
'data_path': folder,
|
|
149
|
-
}]
|
|
150
|
-
|
|
151
|
-
gen_vcf_files(folder, nb_chrom=nb_chrom, chrom_sz=chrom_sz, prefix=prefix)
|
|
152
|
-
gen_cov_files(folder, nb_chrom=nb_chrom, chrom_sz=chrom_sz, prefix=prefix)
|
|
153
|
-
|
|
154
|
-
return {
|
|
155
|
-
'version': 3,
|
|
156
|
-
'analyses': analyses,
|
|
157
|
-
'bi_processes': bi_processes,
|
|
158
|
-
'wet_processes': wet_processes
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
# Write import bundle YAML
|
|
163
|
-
def write_yaml(file: str, data: Dict[str, int | Sequence[Collection[str]]]) -> None:
|
|
164
|
-
"""Write YAML to stdout or in a file."""
|
|
165
|
-
# Standard output
|
|
166
|
-
if file == '-':
|
|
167
|
-
print('---')
|
|
168
|
-
yaml.dump(data, sys.stdout)
|
|
169
|
-
|
|
170
|
-
# File
|
|
171
|
-
else:
|
|
172
|
-
with open(file, 'w', encoding="utf-8") as f:
|
|
173
|
-
print('---', file=f)
|
|
174
|
-
yaml.dump(data, f)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def main() -> None:
|
|
178
|
-
"""Entry point of the gen-data script."""
|
|
179
|
-
# Read command line arguments
|
|
180
|
-
args = read_args()
|
|
181
|
-
|
|
182
|
-
# Configure logging
|
|
183
|
-
configure_logging(args.verbose, log_file=args.log_file)
|
|
184
|
-
logger.debug("Arguments: %s", args)
|
|
185
|
-
|
|
186
|
-
# Generate data
|
|
187
|
-
data = gen_data(args.data_folder, nb_chrom=args.chrom_nb, chrom_sz=args.chrom_size)
|
|
188
|
-
|
|
189
|
-
# Write to stdout or file
|
|
190
|
-
write_yaml(args.output_file, data)
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
if __name__ == '__main__':
|
|
194
|
-
main()
|