genelastic 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genelastic/__init__.py +0 -13
- genelastic/api/__init__.py +0 -0
- genelastic/api/extends/__init__.py +0 -0
- genelastic/api/extends/example.py +6 -0
- genelastic/api/routes.py +221 -0
- genelastic/api/server.py +80 -0
- genelastic/api/settings.py +14 -0
- genelastic/common/__init__.py +39 -0
- genelastic/common/cli.py +63 -0
- genelastic/common/elastic.py +214 -0
- genelastic/common/exceptions.py +4 -0
- genelastic/common/types.py +25 -0
- genelastic/import_data/__init__.py +27 -0
- genelastic/{analyses.py → import_data/analyses.py} +19 -20
- genelastic/{analysis.py → import_data/analysis.py} +71 -66
- genelastic/{bi_process.py → import_data/bi_process.py} +8 -6
- genelastic/{bi_processes.py → import_data/bi_processes.py} +10 -9
- genelastic/import_data/cli_gen_data.py +116 -0
- genelastic/import_data/cli_import.py +379 -0
- genelastic/import_data/cli_info.py +256 -0
- genelastic/import_data/cli_integrity.py +384 -0
- genelastic/import_data/cli_validate.py +54 -0
- genelastic/import_data/constants.py +24 -0
- genelastic/{data_file.py → import_data/data_file.py} +26 -21
- genelastic/import_data/filename_pattern.py +57 -0
- genelastic/{import_bundle.py → import_data/import_bundle.py} +58 -48
- genelastic/import_data/import_bundle_factory.py +298 -0
- genelastic/{logger.py → import_data/logger.py} +22 -18
- genelastic/import_data/random_bundle.py +402 -0
- genelastic/{tags.py → import_data/tags.py} +48 -27
- genelastic/{wet_process.py → import_data/wet_process.py} +8 -4
- genelastic/{wet_processes.py → import_data/wet_processes.py} +15 -9
- genelastic/ui/__init__.py +0 -0
- genelastic/ui/server.py +87 -0
- genelastic/ui/settings.py +11 -0
- genelastic-0.7.0.dist-info/METADATA +105 -0
- genelastic-0.7.0.dist-info/RECORD +40 -0
- {genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/WHEEL +1 -1
- genelastic-0.7.0.dist-info/entry_points.txt +6 -0
- genelastic/common.py +0 -151
- genelastic/constants.py +0 -45
- genelastic/filename_pattern.py +0 -62
- genelastic/gen_data.py +0 -193
- genelastic/import_bundle_factory.py +0 -288
- genelastic/import_data.py +0 -294
- genelastic/info.py +0 -248
- genelastic/integrity.py +0 -324
- genelastic/validate_data.py +0 -41
- genelastic-0.6.0.dist-info/METADATA +0 -36
- genelastic-0.6.0.dist-info/RECORD +0 -25
- genelastic-0.6.0.dist-info/entry_points.txt +0 -6
- {genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/top_level.txt +0 -0
genelastic/ui/server.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from flask import Flask, render_template
|
|
3
|
+
|
|
4
|
+
app = Flask(__name__)
|
|
5
|
+
app.config.from_object("src.genelastic.ui.settings.Config")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@app.route("/")
|
|
9
|
+
def home() -> str:
|
|
10
|
+
api_url = app.config["GENUI_API_URL"]
|
|
11
|
+
try:
|
|
12
|
+
version_reponse = requests.get(f"{api_url}version", timeout=20)
|
|
13
|
+
version = version_reponse.json().get("version")
|
|
14
|
+
wet_processes_reponse = requests.get(
|
|
15
|
+
f"{api_url}wet_processes", timeout=20
|
|
16
|
+
)
|
|
17
|
+
wet_processes = wet_processes_reponse.json()
|
|
18
|
+
bi_processes_reponse = requests.get(
|
|
19
|
+
f"{api_url}bi_processes", timeout=20
|
|
20
|
+
)
|
|
21
|
+
bi_processes = bi_processes_reponse.json()
|
|
22
|
+
analyses_reponse = requests.get(f"{api_url}analyses", timeout=20)
|
|
23
|
+
analyses = analyses_reponse.json()
|
|
24
|
+
except requests.exceptions.RequestException:
|
|
25
|
+
version = "API not reachable"
|
|
26
|
+
wet_processes = []
|
|
27
|
+
bi_processes = []
|
|
28
|
+
analyses = []
|
|
29
|
+
return render_template(
|
|
30
|
+
"home.html",
|
|
31
|
+
version=version,
|
|
32
|
+
wet_processes=wet_processes,
|
|
33
|
+
bi_processes=bi_processes,
|
|
34
|
+
analyses=analyses,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@app.route("/analyses")
|
|
39
|
+
def show_analyses() -> str:
|
|
40
|
+
api_url = app.config["GENUI_API_URL"]
|
|
41
|
+
try:
|
|
42
|
+
analyses_reponse = requests.get(f"{api_url}analyses", timeout=20)
|
|
43
|
+
analyses = analyses_reponse.json()
|
|
44
|
+
except requests.exceptions.RequestException:
|
|
45
|
+
analyses = ["Error fetching data."]
|
|
46
|
+
|
|
47
|
+
return render_template("analyses.html", analyses=analyses)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@app.route("/bi_processes")
|
|
51
|
+
def show_bi_processes() -> str:
|
|
52
|
+
api_url = app.config["GENUI_API_URL"]
|
|
53
|
+
try:
|
|
54
|
+
bi_processes_reponse = requests.get(
|
|
55
|
+
f"{api_url}bi_processes", timeout=20
|
|
56
|
+
)
|
|
57
|
+
bi_processes = bi_processes_reponse.json()
|
|
58
|
+
except requests.exceptions.RequestException:
|
|
59
|
+
bi_processes = ["Error fetching data."]
|
|
60
|
+
|
|
61
|
+
return render_template("bi_processes.html", bi_processes=bi_processes)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@app.route("/wet_processes")
|
|
65
|
+
def show_wet_processes() -> str:
|
|
66
|
+
api_url = app.config["GENUI_API_URL"]
|
|
67
|
+
try:
|
|
68
|
+
wet_processes_reponse = requests.get(
|
|
69
|
+
f"{api_url}wet_processes", timeout=20
|
|
70
|
+
)
|
|
71
|
+
wet_processes = wet_processes_reponse.json()
|
|
72
|
+
except requests.exceptions.RequestException:
|
|
73
|
+
wet_processes = ["Error fetching data."]
|
|
74
|
+
|
|
75
|
+
return render_template("wet_processes.html", wet_processes=wet_processes)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@app.route("/version")
|
|
79
|
+
def show_version() -> str:
|
|
80
|
+
api_url = app.config["GENUI_API_URL"]
|
|
81
|
+
try:
|
|
82
|
+
version_reponse = requests.get(f"{api_url}version", timeout=20)
|
|
83
|
+
version = version_reponse.json().get("version", "Version not found")
|
|
84
|
+
except requests.exceptions.RequestException:
|
|
85
|
+
version = "Error fetching version."
|
|
86
|
+
|
|
87
|
+
return render_template("version.html", version=version)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: genelastic
|
|
3
|
+
Version: 0.7.0
|
|
4
|
+
Summary: Generate and store genetic data into an Elasticsearch database.
|
|
5
|
+
Author: CNRGH
|
|
6
|
+
Author-email: Pierrick ROGER <pierrick.roger@cnrgh.fr>, Maxime BLANCHON <maxime.blanchon@cnrgh.fr>
|
|
7
|
+
License: CeCILL
|
|
8
|
+
Keywords: CNRGH,genelastic,generation,storage,elasticsearch,database
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: elasticsearch
|
|
16
|
+
Requires-Dist: PyVCF3
|
|
17
|
+
Requires-Dist: schema
|
|
18
|
+
Requires-Dist: PyYAML
|
|
19
|
+
Requires-Dist: biophony>=1.2.1
|
|
20
|
+
Requires-Dist: colorlog
|
|
21
|
+
Provides-Extra: tests
|
|
22
|
+
Requires-Dist: pytest; extra == "tests"
|
|
23
|
+
Requires-Dist: mypy; extra == "tests"
|
|
24
|
+
Requires-Dist: coverage; extra == "tests"
|
|
25
|
+
Requires-Dist: yamllint; extra == "tests"
|
|
26
|
+
Requires-Dist: types-PyYAML; extra == "tests"
|
|
27
|
+
Requires-Dist: ruff<0.9,>=0.8.1; extra == "tests"
|
|
28
|
+
Requires-Dist: pre-commit<4.1,>=4.0.1; extra == "tests"
|
|
29
|
+
Requires-Dist: types-requests; extra == "tests"
|
|
30
|
+
Requires-Dist: ansible<10.5,>=10.4.0; extra == "tests"
|
|
31
|
+
Requires-Dist: ansible-lint<25,>=24.12.2; extra == "tests"
|
|
32
|
+
Provides-Extra: docs
|
|
33
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
34
|
+
Requires-Dist: sphinx-autoapi; extra == "docs"
|
|
35
|
+
Requires-Dist: furo; extra == "docs"
|
|
36
|
+
Provides-Extra: api
|
|
37
|
+
Requires-Dist: flask; extra == "api"
|
|
38
|
+
Requires-Dist: elasticsearch; extra == "api"
|
|
39
|
+
Requires-Dist: environs; extra == "api"
|
|
40
|
+
Requires-Dist: connexion[flask,swagger-ui,uvicorn]; extra == "api"
|
|
41
|
+
Provides-Extra: ui
|
|
42
|
+
Requires-Dist: flask; extra == "ui"
|
|
43
|
+
Requires-Dist: requests; extra == "ui"
|
|
44
|
+
Requires-Dist: environs; extra == "ui"
|
|
45
|
+
|
|
46
|
+
# genelastic
|
|
47
|
+
|
|
48
|
+
Storing of genetics data into an Elasticsearch database.
|
|
49
|
+
|
|
50
|
+
## Prerequisites
|
|
51
|
+
|
|
52
|
+
- `python` >= 3.11
|
|
53
|
+
- `make`
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
To install dependencies, run the following command:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
python -m venv .venv
|
|
61
|
+
source .venv/bin/activate
|
|
62
|
+
make install.deps
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Configuration
|
|
66
|
+
|
|
67
|
+
To start the **API server**, the following environment variables should be defined:
|
|
68
|
+
|
|
69
|
+
- `GENAPI_ES_URL`: URL of the Elasticsearch server,
|
|
70
|
+
- `GENAPI_ES_ENCODED_API_KEY`: Encoded API key,
|
|
71
|
+
- `GENAPI_ES_INDEX_PREFIX`: Prefix to identify indices of interest,
|
|
72
|
+
- `GENAPI_ES_CERT_FP`: Certificate fingerprint of the Elasticsearch server.
|
|
73
|
+
|
|
74
|
+
Then, run the following command:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
make start-api
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
To start the **UI server**, the following environment variables should be defined:
|
|
81
|
+
|
|
82
|
+
- `GENUI_API_URL`: URL of the API server.
|
|
83
|
+
|
|
84
|
+
Then, run the following command:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
make start-ui
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Developers
|
|
91
|
+
|
|
92
|
+
This project uses [pre-commit](https://pre-commit.com/) to manage Git hooks scripts. To install project hooks, run:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
pre-commit install
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
After that, each commit will succeed only if all hooks (defined in `.pre-commit-config.yaml`) pass.
|
|
99
|
+
|
|
100
|
+
If necessary (though not recommended),
|
|
101
|
+
you can skip these hooks by using the `--no-verify` / `-n` option when committing:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
git commit -m "My commit message" --no-verify # This commit will not run installed hooks.
|
|
105
|
+
```
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
genelastic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
genelastic/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
genelastic/api/routes.py,sha256=O_bZGhKex04tWauE1fYRXjNmR2smIA4JZShwcakbJJA,6938
|
|
4
|
+
genelastic/api/server.py,sha256=XkkHpv_28OrfrY6CMDnImBFlDiEQSA8f1BpSSn2LGL4,2248
|
|
5
|
+
genelastic/api/settings.py,sha256=vLfj8ASLqq9f6c3eGg30L_WSBiaOIpnCsk-S-TUZRlk,403
|
|
6
|
+
genelastic/api/extends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
genelastic/api/extends/example.py,sha256=5vOw2ToYWzOj-vYkOLbqoa1MCnXIvKuv7xW6qBQ-eHU,164
|
|
8
|
+
genelastic/common/__init__.py,sha256=GfM4XaiaHsA2v8N0T3VgnXpZsLXjTc1wIPpRi4BlkZ8,937
|
|
9
|
+
genelastic/common/cli.py,sha256=cDbgc2m20Y1NO0CZR1qaTrCttcf0cjFAP885tuPZsH0,1770
|
|
10
|
+
genelastic/common/elastic.py,sha256=0XpECyNMC9RzQN1X7u-0IXmHp_KbSjkmGizArLjxTS0,7260
|
|
11
|
+
genelastic/common/exceptions.py,sha256=l4sQBW91OuzV2R7DhrVzEMDrfsyKM-iVYufp2Cv6rR8,159
|
|
12
|
+
genelastic/common/types.py,sha256=JeQ6XzIemIF1QgvprGA0UxFHyJSKjEkgMaIumOvpbNg,1013
|
|
13
|
+
genelastic/import_data/__init__.py,sha256=Ir_fI0BrdC44JiiQaWm29_SyHbDe2CQsElKHgQxELCI,602
|
|
14
|
+
genelastic/import_data/analyses.py,sha256=yA4DKtXIUR7JTCp7PEndEjI6FOooW-6lR9vkqTZ0Izs,1861
|
|
15
|
+
genelastic/import_data/analysis.py,sha256=MuHYfIWFFwHfCp9tDkWTtVKkg5AasASwrdOUZ177EeM,7799
|
|
16
|
+
genelastic/import_data/bi_process.py,sha256=CBBGx6oO7KYzUypXzpaopGY4J_nAKcDi2UPJe7B63Bs,650
|
|
17
|
+
genelastic/import_data/bi_processes.py,sha256=Kv93NOntycEiB_BldoMxHNfM6sJvgHQvVqrUSSEb_VA,1365
|
|
18
|
+
genelastic/import_data/cli_gen_data.py,sha256=WYvSgiC30pDvhQuGMxtDKookUF9IQpjQP6UMANvvHSU,3012
|
|
19
|
+
genelastic/import_data/cli_import.py,sha256=Ej-EqaBUNoVHGCefzLsmFbKaXNTaOBs8xJsEdRhbbLo,11622
|
|
20
|
+
genelastic/import_data/cli_info.py,sha256=ue0Pf5cJvrmJ_bBZKG2CXkXDsQO3zQ6CIXhtUwpqXhU,7301
|
|
21
|
+
genelastic/import_data/cli_integrity.py,sha256=lOMA-I1iPXiJD4X80Xg91NKO-fa3Su0uSqBUtdMKU4Q,12282
|
|
22
|
+
genelastic/import_data/cli_validate.py,sha256=AcirmmJQWFDrpiPUNTYnpFjSnvD6YyStYJHIbfi1Ly0,1515
|
|
23
|
+
genelastic/import_data/constants.py,sha256=AKYXdDqWkDzvt-laZqbWlN6C9IoQSSqD70mpSWMU6NQ,760
|
|
24
|
+
genelastic/import_data/data_file.py,sha256=P5oe_yCVQQAALetrUs93Kl1XuqoLQS0iMkeEqPKaY7g,2661
|
|
25
|
+
genelastic/import_data/filename_pattern.py,sha256=3QXOF5ZG9vFA8KG7XXucpA7EIN3dqZmmtXnuFbh0bUQ,1709
|
|
26
|
+
genelastic/import_data/import_bundle.py,sha256=7W0hKwoxwGDtWEGV_NQvyesiNx1lCMpSaqKohpTLmtY,5030
|
|
27
|
+
genelastic/import_data/import_bundle_factory.py,sha256=COdMLTTrx_Y4svZAHVJ5Y0rHM4eQj027rFJiQ66u36M,9546
|
|
28
|
+
genelastic/import_data/logger.py,sha256=X12LBoNTmV6iR8vEPyYIMYSUeQ3LpTvDO69tQYDiOuA,1915
|
|
29
|
+
genelastic/import_data/random_bundle.py,sha256=Fk4oudK42pboGI14aaJjh8tTJxSxYNyF_89BBJ532RY,13821
|
|
30
|
+
genelastic/import_data/tags.py,sha256=N6_dGYqQy2QTN6AEzoEXPxRFmtq3GgTfpId-RjBstJY,4195
|
|
31
|
+
genelastic/import_data/wet_process.py,sha256=2SgN1yZFQA8Hb4ZNNeHpUhmeWlJDApqdjc_oq6ZF0jA,694
|
|
32
|
+
genelastic/import_data/wet_processes.py,sha256=mVUfFG0QUboTEoD3c-Hd1Z2_Tvid_yrKJG_XdC4n6oA,1535
|
|
33
|
+
genelastic/ui/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
genelastic/ui/server.py,sha256=7QX-54tT7CbpKje34kVVXnktI0wuZJOSLUChxeUGd20,2805
|
|
35
|
+
genelastic/ui/settings.py,sha256=hmgDNMVKk_OkKSBKyV8Eth57ULCTINwzeBCFAH7DdeU,218
|
|
36
|
+
genelastic-0.7.0.dist-info/METADATA,sha256=dj4cFcq7LXEv9v-turnGvHnK2KZ4zXrDxG0YKe11zPU,3127
|
|
37
|
+
genelastic-0.7.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
38
|
+
genelastic-0.7.0.dist-info/entry_points.txt,sha256=KHtfefg-7-v0MZ0jAcHrb6z3V6hq7UdCiYYIAlD6hIg,284
|
|
39
|
+
genelastic-0.7.0.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
|
|
40
|
+
genelastic-0.7.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
[console_scripts]
|
|
2
|
+
gnl-data = genelastic.import_data.cli_gen_data:main
|
|
3
|
+
gnl-import = genelastic.import_data.cli_import:main
|
|
4
|
+
gnl-info = genelastic.import_data.cli_info:main
|
|
5
|
+
gnl-integrity = genelastic.import_data.cli_integrity:main
|
|
6
|
+
gnl-validate = genelastic.import_data.cli_validate:main
|
genelastic/common.py
DELETED
|
@@ -1,151 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module: common
|
|
3
|
-
|
|
4
|
-
This module contains custom types and functions shared by multiple genelastic scripts.
|
|
5
|
-
"""
|
|
6
|
-
import argparse
|
|
7
|
-
import sys
|
|
8
|
-
import typing
|
|
9
|
-
import logging
|
|
10
|
-
|
|
11
|
-
import elastic_transport
|
|
12
|
-
import elasticsearch
|
|
13
|
-
|
|
14
|
-
logger = logging.getLogger('genelastic')
|
|
15
|
-
|
|
16
|
-
AnalysisMetaData: typing.TypeAlias = typing.Dict[str, str | int]
|
|
17
|
-
WetProcessesData: typing.TypeAlias = typing.Dict[str, str | int | float]
|
|
18
|
-
BioInfoProcessData: typing.TypeAlias = typing.Dict[str, str | typing.List[str]]
|
|
19
|
-
BundleDict: typing.TypeAlias = typing.Dict[str, typing.Any]
|
|
20
|
-
|
|
21
|
-
AnalysisDocument: typing.TypeAlias = typing.Dict[str, str | None | AnalysisMetaData]
|
|
22
|
-
MetadataDocument: typing.TypeAlias = typing.Dict[str, int | str | typing.List[typing.Any | None]]
|
|
23
|
-
ProcessDocument: typing.TypeAlias = (typing.Dict[str, str] |
|
|
24
|
-
WetProcessesData |
|
|
25
|
-
BioInfoProcessData)
|
|
26
|
-
BulkItems: typing.TypeAlias = typing.List[typing.Dict[str, str |
|
|
27
|
-
MetadataDocument |
|
|
28
|
-
AnalysisDocument |
|
|
29
|
-
ProcessDocument]]
|
|
30
|
-
Bucket: typing.TypeAlias = typing.Dict[str, typing.Dict[typing.Any, typing.Any]]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def connect_to_es(host: str, port: int, usr: str, pwd: str) -> elasticsearch.Elasticsearch:
|
|
34
|
-
"""Connect to a remote Elasticsearch database."""
|
|
35
|
-
addr = f"https://{host}:{port}"
|
|
36
|
-
logger.info("Trying to connect to Elasticsearch at %s.", addr)
|
|
37
|
-
|
|
38
|
-
try:
|
|
39
|
-
es = elasticsearch.Elasticsearch(
|
|
40
|
-
addr,
|
|
41
|
-
# ssl_assert_fingerprint=args.es_cert_fp,
|
|
42
|
-
# ca_certs=args.es_cert,
|
|
43
|
-
verify_certs=False,
|
|
44
|
-
basic_auth=(usr, pwd)
|
|
45
|
-
)
|
|
46
|
-
logger.info(es.info())
|
|
47
|
-
except elastic_transport.TransportError as e:
|
|
48
|
-
logger.error(e.message)
|
|
49
|
-
sys.exit(1)
|
|
50
|
-
return es
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def run_composite_aggregation(es: elasticsearch.Elasticsearch,
|
|
54
|
-
index: str, query: typing.Dict[str, typing.Any]) \
|
|
55
|
-
-> typing.List[Bucket]:
|
|
56
|
-
"""
|
|
57
|
-
Executes a composite aggregation on an Elasticsearch index and returns all paginated results.
|
|
58
|
-
|
|
59
|
-
:param es: Elasticsearch client instance.
|
|
60
|
-
:param index: Name of the index to query.
|
|
61
|
-
:param query: Aggregation query to run.
|
|
62
|
-
:return: List of aggregation results.
|
|
63
|
-
"""
|
|
64
|
-
# Extract the aggregation name from the query dict.
|
|
65
|
-
agg_name = next(iter(query["aggs"]))
|
|
66
|
-
all_buckets: typing.List[Bucket] = []
|
|
67
|
-
|
|
68
|
-
try:
|
|
69
|
-
logger.debug("Running composite aggregation query %s on index '%s'.", query, index)
|
|
70
|
-
response = es.search(index=index, body=query)
|
|
71
|
-
except elasticsearch.NotFoundError as e:
|
|
72
|
-
raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
|
|
73
|
-
|
|
74
|
-
while True:
|
|
75
|
-
# Extract buckets from the response.
|
|
76
|
-
buckets: typing.List[Bucket] = response['aggregations'][agg_name]['buckets']
|
|
77
|
-
all_buckets.extend(buckets)
|
|
78
|
-
|
|
79
|
-
# Check if there are more results to fetch.
|
|
80
|
-
if 'after_key' in response['aggregations'][agg_name]:
|
|
81
|
-
after_key = response['aggregations'][agg_name]['after_key']
|
|
82
|
-
query['aggs'][agg_name]['composite']['after'] = after_key
|
|
83
|
-
try:
|
|
84
|
-
logger.debug("Running query %s on index '%s'.", query, index)
|
|
85
|
-
response = es.search(index=index, body=query) # Fetch the next page of results.
|
|
86
|
-
except elasticsearch.NotFoundError as e:
|
|
87
|
-
raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
|
|
88
|
-
else:
|
|
89
|
-
break
|
|
90
|
-
|
|
91
|
-
return all_buckets
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def get_process_ids(es: elasticsearch.Elasticsearch, index: str, proc_field_name: str) \
|
|
95
|
-
-> typing.Set[str]:
|
|
96
|
-
"""Return a set of process IDs."""
|
|
97
|
-
process_ids = set()
|
|
98
|
-
|
|
99
|
-
query = {
|
|
100
|
-
"size": 0,
|
|
101
|
-
"aggs": {
|
|
102
|
-
"get_proc_ids": {
|
|
103
|
-
"composite": {
|
|
104
|
-
"sources": {"proc_id": {"terms": {"field": f"{proc_field_name}.keyword"}}},
|
|
105
|
-
"size": 1000,
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
buckets: typing.List[Bucket] = run_composite_aggregation(es, index, query)
|
|
112
|
-
|
|
113
|
-
for bucket in buckets:
|
|
114
|
-
process_ids.add(bucket['key']['proc_id'])
|
|
115
|
-
|
|
116
|
-
return process_ids
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def add_verbose_control_args(parser: argparse.ArgumentParser) -> None:
|
|
120
|
-
"""
|
|
121
|
-
Add verbose control arguments to the parser.
|
|
122
|
-
Arguments are added to the parser by using its reference.
|
|
123
|
-
"""
|
|
124
|
-
parser.add_argument('-q', '--quiet', dest='verbose', action='store_const',
|
|
125
|
-
const=0, default=1,
|
|
126
|
-
help='Set verbosity to 0 (quiet mode).')
|
|
127
|
-
parser.add_argument('-v', '--verbose', dest='verbose', action='count',
|
|
128
|
-
default=1,
|
|
129
|
-
help=('Verbose level. -v for information, -vv for debug,' +
|
|
130
|
-
' -vvv for trace.'))
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def add_es_connection_args(parser: argparse.ArgumentParser) -> None:
|
|
134
|
-
"""
|
|
135
|
-
Add arguments to the parser needed to gather ElasticSearch server connection parameters.
|
|
136
|
-
Arguments are added to the parser by using its reference.
|
|
137
|
-
"""
|
|
138
|
-
parser.add_argument('--es-host', dest='es_host', default='localhost',
|
|
139
|
-
help='Address of Elasticsearch host.')
|
|
140
|
-
parser.add_argument('--es-port', type=int, default=9200, dest='es_port',
|
|
141
|
-
help='Elasticsearch port.')
|
|
142
|
-
parser.add_argument('--es-usr', dest='es_usr', default='elastic',
|
|
143
|
-
help='Elasticsearch user.')
|
|
144
|
-
parser.add_argument('--es-pwd', dest='es_pwd', required=True,
|
|
145
|
-
help='Elasticsearch password.')
|
|
146
|
-
parser.add_argument('--es-cert', dest='es_cert',
|
|
147
|
-
help='Elasticsearch certificate file.')
|
|
148
|
-
parser.add_argument('--es-cert-fp', dest='es_cert_fp',
|
|
149
|
-
help='Elasticsearch certificate fingerprint.')
|
|
150
|
-
parser.add_argument('--es-index-prefix', dest='es_index_prefix',
|
|
151
|
-
help='Add the given prefix to each index created during import.')
|
genelastic/constants.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module: constants
|
|
3
|
-
|
|
4
|
-
This module contains genelastic constants.
|
|
5
|
-
"""
|
|
6
|
-
import typing
|
|
7
|
-
|
|
8
|
-
ALLOWED_CATEGORIES: typing.Final[typing.List[str]] = ['vcf', 'cov']
|
|
9
|
-
|
|
10
|
-
BUNDLE_CURRENT_VERSION = 3
|
|
11
|
-
|
|
12
|
-
DEFAULT_TAG_REGEX = "[^_-]+"
|
|
13
|
-
DEFAULT_TAG_PREFIX = "%"
|
|
14
|
-
DEFAULT_TAG_SUFFIX = ""
|
|
15
|
-
|
|
16
|
-
DEFAULT_TAG2FIELD: typing.Final[typing.Dict[str, typing.Dict[str, str]]] = {
|
|
17
|
-
'%S': {
|
|
18
|
-
"field": 'sample_name',
|
|
19
|
-
"regex": DEFAULT_TAG_REGEX
|
|
20
|
-
},
|
|
21
|
-
'%F': {
|
|
22
|
-
"field": 'source',
|
|
23
|
-
"regex": DEFAULT_TAG_REGEX
|
|
24
|
-
},
|
|
25
|
-
'%W': {
|
|
26
|
-
"field": 'wet_process',
|
|
27
|
-
"regex": DEFAULT_TAG_REGEX
|
|
28
|
-
},
|
|
29
|
-
'%B': {
|
|
30
|
-
"field": 'bi_process',
|
|
31
|
-
"regex": DEFAULT_TAG_REGEX
|
|
32
|
-
},
|
|
33
|
-
'%D': {
|
|
34
|
-
"field": 'cov_depth',
|
|
35
|
-
"regex": DEFAULT_TAG_REGEX
|
|
36
|
-
},
|
|
37
|
-
'%A': {
|
|
38
|
-
"field": 'barcode',
|
|
39
|
-
"regex": DEFAULT_TAG_REGEX
|
|
40
|
-
},
|
|
41
|
-
'%R': {
|
|
42
|
-
"field": 'reference_genome',
|
|
43
|
-
"regex": DEFAULT_TAG_REGEX
|
|
44
|
-
}
|
|
45
|
-
}
|
genelastic/filename_pattern.py
DELETED
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
This module defines the FilenamePattern class, used to define a filename pattern
|
|
3
|
-
and extract metadata from file names using this pattern.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import logging
|
|
7
|
-
import re
|
|
8
|
-
from .common import AnalysisMetaData
|
|
9
|
-
|
|
10
|
-
logger = logging.getLogger('genelastic')
|
|
11
|
-
|
|
12
|
-
class FilenamePattern:
|
|
13
|
-
"""Class for defining a filename pattern.
|
|
14
|
-
The pattern is used to extract metadata from filenames
|
|
15
|
-
and verify filename conformity.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
# Initializer
|
|
19
|
-
def __init__(self, pattern: str) -> None:
|
|
20
|
-
"""
|
|
21
|
-
Initializes a FilenamePattern instance.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
pattern (str): The pattern string used for defining
|
|
25
|
-
the filename pattern.
|
|
26
|
-
"""
|
|
27
|
-
self._re = re.compile(pattern)
|
|
28
|
-
|
|
29
|
-
def extract_metadata(self, filename: str) -> AnalysisMetaData:
|
|
30
|
-
"""
|
|
31
|
-
Extracts metadata from the given filename based
|
|
32
|
-
on the defined pattern.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
filename (str): The filename from which metadata
|
|
36
|
-
needs to be extracted.
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
dict: A dictionary containing the extracted metadata.
|
|
40
|
-
|
|
41
|
-
Raises:
|
|
42
|
-
RuntimeError: If parsing of filename fails
|
|
43
|
-
with the defined pattern.
|
|
44
|
-
"""
|
|
45
|
-
m = self._re.search(filename)
|
|
46
|
-
if not m:
|
|
47
|
-
raise RuntimeError(f'Failed parsing filename "{filename}"' +
|
|
48
|
-
f'with pattern "{self._re.pattern}".')
|
|
49
|
-
return m.groupdict()
|
|
50
|
-
|
|
51
|
-
def matches_pattern(self, filename: str) -> bool:
|
|
52
|
-
"""
|
|
53
|
-
Checks if the given filename matches the defined pattern.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
filename (str): The filename to be checked.
|
|
57
|
-
|
|
58
|
-
Returns:
|
|
59
|
-
bool: True if the filename matches the pattern,
|
|
60
|
-
False otherwise.
|
|
61
|
-
"""
|
|
62
|
-
return bool(self._re.match(filename))
|