genelastic 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. genelastic/__init__.py +0 -13
  2. genelastic/api/__init__.py +0 -0
  3. genelastic/api/extends/__init__.py +0 -0
  4. genelastic/api/extends/example.py +6 -0
  5. genelastic/api/routes.py +221 -0
  6. genelastic/api/server.py +80 -0
  7. genelastic/api/settings.py +14 -0
  8. genelastic/common/__init__.py +39 -0
  9. genelastic/common/cli.py +63 -0
  10. genelastic/common/elastic.py +214 -0
  11. genelastic/common/exceptions.py +4 -0
  12. genelastic/common/types.py +25 -0
  13. genelastic/import_data/__init__.py +27 -0
  14. genelastic/{analyses.py → import_data/analyses.py} +19 -20
  15. genelastic/{analysis.py → import_data/analysis.py} +71 -66
  16. genelastic/{bi_process.py → import_data/bi_process.py} +8 -6
  17. genelastic/{bi_processes.py → import_data/bi_processes.py} +10 -9
  18. genelastic/import_data/cli_gen_data.py +116 -0
  19. genelastic/import_data/cli_import.py +379 -0
  20. genelastic/import_data/cli_info.py +256 -0
  21. genelastic/import_data/cli_integrity.py +384 -0
  22. genelastic/import_data/cli_validate.py +54 -0
  23. genelastic/import_data/constants.py +24 -0
  24. genelastic/{data_file.py → import_data/data_file.py} +26 -21
  25. genelastic/import_data/filename_pattern.py +57 -0
  26. genelastic/{import_bundle.py → import_data/import_bundle.py} +58 -48
  27. genelastic/import_data/import_bundle_factory.py +298 -0
  28. genelastic/{logger.py → import_data/logger.py} +22 -18
  29. genelastic/import_data/random_bundle.py +402 -0
  30. genelastic/{tags.py → import_data/tags.py} +48 -27
  31. genelastic/{wet_process.py → import_data/wet_process.py} +8 -4
  32. genelastic/{wet_processes.py → import_data/wet_processes.py} +15 -9
  33. genelastic/ui/__init__.py +0 -0
  34. genelastic/ui/server.py +87 -0
  35. genelastic/ui/settings.py +11 -0
  36. genelastic-0.7.0.dist-info/METADATA +105 -0
  37. genelastic-0.7.0.dist-info/RECORD +40 -0
  38. {genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/WHEEL +1 -1
  39. genelastic-0.7.0.dist-info/entry_points.txt +6 -0
  40. genelastic/common.py +0 -151
  41. genelastic/constants.py +0 -45
  42. genelastic/filename_pattern.py +0 -62
  43. genelastic/gen_data.py +0 -193
  44. genelastic/import_bundle_factory.py +0 -288
  45. genelastic/import_data.py +0 -294
  46. genelastic/info.py +0 -248
  47. genelastic/integrity.py +0 -324
  48. genelastic/validate_data.py +0 -41
  49. genelastic-0.6.0.dist-info/METADATA +0 -36
  50. genelastic-0.6.0.dist-info/RECORD +0 -25
  51. genelastic-0.6.0.dist-info/entry_points.txt +0 -6
  52. {genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,87 @@
1
+ import requests
2
+ from flask import Flask, render_template
3
+
4
+ app = Flask(__name__)
5
+ app.config.from_object("src.genelastic.ui.settings.Config")
6
+
7
+
8
+ @app.route("/")
9
+ def home() -> str:
10
+ api_url = app.config["GENUI_API_URL"]
11
+ try:
12
+ version_reponse = requests.get(f"{api_url}version", timeout=20)
13
+ version = version_reponse.json().get("version")
14
+ wet_processes_reponse = requests.get(
15
+ f"{api_url}wet_processes", timeout=20
16
+ )
17
+ wet_processes = wet_processes_reponse.json()
18
+ bi_processes_reponse = requests.get(
19
+ f"{api_url}bi_processes", timeout=20
20
+ )
21
+ bi_processes = bi_processes_reponse.json()
22
+ analyses_reponse = requests.get(f"{api_url}analyses", timeout=20)
23
+ analyses = analyses_reponse.json()
24
+ except requests.exceptions.RequestException:
25
+ version = "API not reachable"
26
+ wet_processes = []
27
+ bi_processes = []
28
+ analyses = []
29
+ return render_template(
30
+ "home.html",
31
+ version=version,
32
+ wet_processes=wet_processes,
33
+ bi_processes=bi_processes,
34
+ analyses=analyses,
35
+ )
36
+
37
+
38
+ @app.route("/analyses")
39
+ def show_analyses() -> str:
40
+ api_url = app.config["GENUI_API_URL"]
41
+ try:
42
+ analyses_reponse = requests.get(f"{api_url}analyses", timeout=20)
43
+ analyses = analyses_reponse.json()
44
+ except requests.exceptions.RequestException:
45
+ analyses = ["Error fetching data."]
46
+
47
+ return render_template("analyses.html", analyses=analyses)
48
+
49
+
50
+ @app.route("/bi_processes")
51
+ def show_bi_processes() -> str:
52
+ api_url = app.config["GENUI_API_URL"]
53
+ try:
54
+ bi_processes_reponse = requests.get(
55
+ f"{api_url}bi_processes", timeout=20
56
+ )
57
+ bi_processes = bi_processes_reponse.json()
58
+ except requests.exceptions.RequestException:
59
+ bi_processes = ["Error fetching data."]
60
+
61
+ return render_template("bi_processes.html", bi_processes=bi_processes)
62
+
63
+
64
+ @app.route("/wet_processes")
65
+ def show_wet_processes() -> str:
66
+ api_url = app.config["GENUI_API_URL"]
67
+ try:
68
+ wet_processes_reponse = requests.get(
69
+ f"{api_url}wet_processes", timeout=20
70
+ )
71
+ wet_processes = wet_processes_reponse.json()
72
+ except requests.exceptions.RequestException:
73
+ wet_processes = ["Error fetching data."]
74
+
75
+ return render_template("wet_processes.html", wet_processes=wet_processes)
76
+
77
+
78
+ @app.route("/version")
79
+ def show_version() -> str:
80
+ api_url = app.config["GENUI_API_URL"]
81
+ try:
82
+ version_reponse = requests.get(f"{api_url}version", timeout=20)
83
+ version = version_reponse.json().get("version", "Version not found")
84
+ except requests.exceptions.RequestException:
85
+ version = "Error fetching version."
86
+
87
+ return render_template("version.html", version=version)
@@ -0,0 +1,11 @@
1
+ from environs import Env
2
+
3
+ env = Env()
4
+ env.read_env()
5
+
6
+
7
+ class Config:
8
+ """Flask config class."""
9
+
10
+ # Charger toutes les variables d'environnement nécessaires
11
+ GENUI_API_URL = env.url("GENUI_API_URL").geturl()
@@ -0,0 +1,105 @@
1
+ Metadata-Version: 2.2
2
+ Name: genelastic
3
+ Version: 0.7.0
4
+ Summary: Generate and store genetic data into an Elasticsearch database.
5
+ Author: CNRGH
6
+ Author-email: Pierrick ROGER <pierrick.roger@cnrgh.fr>, Maxime BLANCHON <maxime.blanchon@cnrgh.fr>
7
+ License: CeCILL
8
+ Keywords: CNRGH,genelastic,generation,storage,elasticsearch,database
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Requires-Python: >=3.11
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: elasticsearch
16
+ Requires-Dist: PyVCF3
17
+ Requires-Dist: schema
18
+ Requires-Dist: PyYAML
19
+ Requires-Dist: biophony>=1.2.1
20
+ Requires-Dist: colorlog
21
+ Provides-Extra: tests
22
+ Requires-Dist: pytest; extra == "tests"
23
+ Requires-Dist: mypy; extra == "tests"
24
+ Requires-Dist: coverage; extra == "tests"
25
+ Requires-Dist: yamllint; extra == "tests"
26
+ Requires-Dist: types-PyYAML; extra == "tests"
27
+ Requires-Dist: ruff<0.9,>=0.8.1; extra == "tests"
28
+ Requires-Dist: pre-commit<4.1,>=4.0.1; extra == "tests"
29
+ Requires-Dist: types-requests; extra == "tests"
30
+ Requires-Dist: ansible<10.5,>=10.4.0; extra == "tests"
31
+ Requires-Dist: ansible-lint<25,>=24.12.2; extra == "tests"
32
+ Provides-Extra: docs
33
+ Requires-Dist: sphinx; extra == "docs"
34
+ Requires-Dist: sphinx-autoapi; extra == "docs"
35
+ Requires-Dist: furo; extra == "docs"
36
+ Provides-Extra: api
37
+ Requires-Dist: flask; extra == "api"
38
+ Requires-Dist: elasticsearch; extra == "api"
39
+ Requires-Dist: environs; extra == "api"
40
+ Requires-Dist: connexion[flask,swagger-ui,uvicorn]; extra == "api"
41
+ Provides-Extra: ui
42
+ Requires-Dist: flask; extra == "ui"
43
+ Requires-Dist: requests; extra == "ui"
44
+ Requires-Dist: environs; extra == "ui"
45
+
46
+ # genelastic
47
+
48
+ Storing of genetics data into an Elasticsearch database.
49
+
50
+ ## Prerequisites
51
+
52
+ - `python` >= 3.11
53
+ - `make`
54
+
55
+ ## Installation
56
+
57
+ To install dependencies, run the following command:
58
+
59
+ ```bash
60
+ python -m venv .venv
61
+ source .venv/bin/activate
62
+ make install.deps
63
+ ```
64
+
65
+ ## Configuration
66
+
67
+ To start the **API server**, the following environment variables should be defined:
68
+
69
+ - `GENAPI_ES_URL`: URL of the Elasticsearch server,
70
+ - `GENAPI_ES_ENCODED_API_KEY`: Encoded API key,
71
+ - `GENAPI_ES_INDEX_PREFIX`: Prefix to identify indices of interest,
72
+ - `GENAPI_ES_CERT_FP`: Certificate fingerprint of the Elasticsearch server.
73
+
74
+ Then, run the following command:
75
+
76
+ ```bash
77
+ make start-api
78
+ ```
79
+
80
+ To start the **UI server**, the following environment variables should be defined:
81
+
82
+ - `GENUI_API_URL`: URL of the API server.
83
+
84
+ Then, run the following command:
85
+
86
+ ```bash
87
+ make start-ui
88
+ ```
89
+
90
+ ## Developers
91
+
92
+ This project uses [pre-commit](https://pre-commit.com/) to manage Git hooks scripts. To install project hooks, run:
93
+
94
+ ```bash
95
+ pre-commit install
96
+ ```
97
+
98
+ After that, each commit will succeed only if all hooks (defined in `.pre-commit-config.yaml`) pass.
99
+
100
+ If necessary (though not recommended),
101
+ you can skip these hooks by using the `--no-verify` / `-n` option when committing:
102
+
103
+ ```bash
104
+ git commit -m "My commit message" --no-verify # This commit will not run installed hooks.
105
+ ```
@@ -0,0 +1,40 @@
1
+ genelastic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ genelastic/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ genelastic/api/routes.py,sha256=O_bZGhKex04tWauE1fYRXjNmR2smIA4JZShwcakbJJA,6938
4
+ genelastic/api/server.py,sha256=XkkHpv_28OrfrY6CMDnImBFlDiEQSA8f1BpSSn2LGL4,2248
5
+ genelastic/api/settings.py,sha256=vLfj8ASLqq9f6c3eGg30L_WSBiaOIpnCsk-S-TUZRlk,403
6
+ genelastic/api/extends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ genelastic/api/extends/example.py,sha256=5vOw2ToYWzOj-vYkOLbqoa1MCnXIvKuv7xW6qBQ-eHU,164
8
+ genelastic/common/__init__.py,sha256=GfM4XaiaHsA2v8N0T3VgnXpZsLXjTc1wIPpRi4BlkZ8,937
9
+ genelastic/common/cli.py,sha256=cDbgc2m20Y1NO0CZR1qaTrCttcf0cjFAP885tuPZsH0,1770
10
+ genelastic/common/elastic.py,sha256=0XpECyNMC9RzQN1X7u-0IXmHp_KbSjkmGizArLjxTS0,7260
11
+ genelastic/common/exceptions.py,sha256=l4sQBW91OuzV2R7DhrVzEMDrfsyKM-iVYufp2Cv6rR8,159
12
+ genelastic/common/types.py,sha256=JeQ6XzIemIF1QgvprGA0UxFHyJSKjEkgMaIumOvpbNg,1013
13
+ genelastic/import_data/__init__.py,sha256=Ir_fI0BrdC44JiiQaWm29_SyHbDe2CQsElKHgQxELCI,602
14
+ genelastic/import_data/analyses.py,sha256=yA4DKtXIUR7JTCp7PEndEjI6FOooW-6lR9vkqTZ0Izs,1861
15
+ genelastic/import_data/analysis.py,sha256=MuHYfIWFFwHfCp9tDkWTtVKkg5AasASwrdOUZ177EeM,7799
16
+ genelastic/import_data/bi_process.py,sha256=CBBGx6oO7KYzUypXzpaopGY4J_nAKcDi2UPJe7B63Bs,650
17
+ genelastic/import_data/bi_processes.py,sha256=Kv93NOntycEiB_BldoMxHNfM6sJvgHQvVqrUSSEb_VA,1365
18
+ genelastic/import_data/cli_gen_data.py,sha256=WYvSgiC30pDvhQuGMxtDKookUF9IQpjQP6UMANvvHSU,3012
19
+ genelastic/import_data/cli_import.py,sha256=Ej-EqaBUNoVHGCefzLsmFbKaXNTaOBs8xJsEdRhbbLo,11622
20
+ genelastic/import_data/cli_info.py,sha256=ue0Pf5cJvrmJ_bBZKG2CXkXDsQO3zQ6CIXhtUwpqXhU,7301
21
+ genelastic/import_data/cli_integrity.py,sha256=lOMA-I1iPXiJD4X80Xg91NKO-fa3Su0uSqBUtdMKU4Q,12282
22
+ genelastic/import_data/cli_validate.py,sha256=AcirmmJQWFDrpiPUNTYnpFjSnvD6YyStYJHIbfi1Ly0,1515
23
+ genelastic/import_data/constants.py,sha256=AKYXdDqWkDzvt-laZqbWlN6C9IoQSSqD70mpSWMU6NQ,760
24
+ genelastic/import_data/data_file.py,sha256=P5oe_yCVQQAALetrUs93Kl1XuqoLQS0iMkeEqPKaY7g,2661
25
+ genelastic/import_data/filename_pattern.py,sha256=3QXOF5ZG9vFA8KG7XXucpA7EIN3dqZmmtXnuFbh0bUQ,1709
26
+ genelastic/import_data/import_bundle.py,sha256=7W0hKwoxwGDtWEGV_NQvyesiNx1lCMpSaqKohpTLmtY,5030
27
+ genelastic/import_data/import_bundle_factory.py,sha256=COdMLTTrx_Y4svZAHVJ5Y0rHM4eQj027rFJiQ66u36M,9546
28
+ genelastic/import_data/logger.py,sha256=X12LBoNTmV6iR8vEPyYIMYSUeQ3LpTvDO69tQYDiOuA,1915
29
+ genelastic/import_data/random_bundle.py,sha256=Fk4oudK42pboGI14aaJjh8tTJxSxYNyF_89BBJ532RY,13821
30
+ genelastic/import_data/tags.py,sha256=N6_dGYqQy2QTN6AEzoEXPxRFmtq3GgTfpId-RjBstJY,4195
31
+ genelastic/import_data/wet_process.py,sha256=2SgN1yZFQA8Hb4ZNNeHpUhmeWlJDApqdjc_oq6ZF0jA,694
32
+ genelastic/import_data/wet_processes.py,sha256=mVUfFG0QUboTEoD3c-Hd1Z2_Tvid_yrKJG_XdC4n6oA,1535
33
+ genelastic/ui/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ genelastic/ui/server.py,sha256=7QX-54tT7CbpKje34kVVXnktI0wuZJOSLUChxeUGd20,2805
35
+ genelastic/ui/settings.py,sha256=hmgDNMVKk_OkKSBKyV8Eth57ULCTINwzeBCFAH7DdeU,218
36
+ genelastic-0.7.0.dist-info/METADATA,sha256=dj4cFcq7LXEv9v-turnGvHnK2KZ4zXrDxG0YKe11zPU,3127
37
+ genelastic-0.7.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
38
+ genelastic-0.7.0.dist-info/entry_points.txt,sha256=KHtfefg-7-v0MZ0jAcHrb6z3V6hq7UdCiYYIAlD6hIg,284
39
+ genelastic-0.7.0.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
40
+ genelastic-0.7.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,6 @@
1
+ [console_scripts]
2
+ gnl-data = genelastic.import_data.cli_gen_data:main
3
+ gnl-import = genelastic.import_data.cli_import:main
4
+ gnl-info = genelastic.import_data.cli_info:main
5
+ gnl-integrity = genelastic.import_data.cli_integrity:main
6
+ gnl-validate = genelastic.import_data.cli_validate:main
genelastic/common.py DELETED
@@ -1,151 +0,0 @@
1
- """
2
- Module: common
3
-
4
- This module contains custom types and functions shared by multiple genelastic scripts.
5
- """
6
- import argparse
7
- import sys
8
- import typing
9
- import logging
10
-
11
- import elastic_transport
12
- import elasticsearch
13
-
14
- logger = logging.getLogger('genelastic')
15
-
16
- AnalysisMetaData: typing.TypeAlias = typing.Dict[str, str | int]
17
- WetProcessesData: typing.TypeAlias = typing.Dict[str, str | int | float]
18
- BioInfoProcessData: typing.TypeAlias = typing.Dict[str, str | typing.List[str]]
19
- BundleDict: typing.TypeAlias = typing.Dict[str, typing.Any]
20
-
21
- AnalysisDocument: typing.TypeAlias = typing.Dict[str, str | None | AnalysisMetaData]
22
- MetadataDocument: typing.TypeAlias = typing.Dict[str, int | str | typing.List[typing.Any | None]]
23
- ProcessDocument: typing.TypeAlias = (typing.Dict[str, str] |
24
- WetProcessesData |
25
- BioInfoProcessData)
26
- BulkItems: typing.TypeAlias = typing.List[typing.Dict[str, str |
27
- MetadataDocument |
28
- AnalysisDocument |
29
- ProcessDocument]]
30
- Bucket: typing.TypeAlias = typing.Dict[str, typing.Dict[typing.Any, typing.Any]]
31
-
32
-
33
- def connect_to_es(host: str, port: int, usr: str, pwd: str) -> elasticsearch.Elasticsearch:
34
- """Connect to a remote Elasticsearch database."""
35
- addr = f"https://{host}:{port}"
36
- logger.info("Trying to connect to Elasticsearch at %s.", addr)
37
-
38
- try:
39
- es = elasticsearch.Elasticsearch(
40
- addr,
41
- # ssl_assert_fingerprint=args.es_cert_fp,
42
- # ca_certs=args.es_cert,
43
- verify_certs=False,
44
- basic_auth=(usr, pwd)
45
- )
46
- logger.info(es.info())
47
- except elastic_transport.TransportError as e:
48
- logger.error(e.message)
49
- sys.exit(1)
50
- return es
51
-
52
-
53
- def run_composite_aggregation(es: elasticsearch.Elasticsearch,
54
- index: str, query: typing.Dict[str, typing.Any]) \
55
- -> typing.List[Bucket]:
56
- """
57
- Executes a composite aggregation on an Elasticsearch index and returns all paginated results.
58
-
59
- :param es: Elasticsearch client instance.
60
- :param index: Name of the index to query.
61
- :param query: Aggregation query to run.
62
- :return: List of aggregation results.
63
- """
64
- # Extract the aggregation name from the query dict.
65
- agg_name = next(iter(query["aggs"]))
66
- all_buckets: typing.List[Bucket] = []
67
-
68
- try:
69
- logger.debug("Running composite aggregation query %s on index '%s'.", query, index)
70
- response = es.search(index=index, body=query)
71
- except elasticsearch.NotFoundError as e:
72
- raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
73
-
74
- while True:
75
- # Extract buckets from the response.
76
- buckets: typing.List[Bucket] = response['aggregations'][agg_name]['buckets']
77
- all_buckets.extend(buckets)
78
-
79
- # Check if there are more results to fetch.
80
- if 'after_key' in response['aggregations'][agg_name]:
81
- after_key = response['aggregations'][agg_name]['after_key']
82
- query['aggs'][agg_name]['composite']['after'] = after_key
83
- try:
84
- logger.debug("Running query %s on index '%s'.", query, index)
85
- response = es.search(index=index, body=query) # Fetch the next page of results.
86
- except elasticsearch.NotFoundError as e:
87
- raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
88
- else:
89
- break
90
-
91
- return all_buckets
92
-
93
-
94
- def get_process_ids(es: elasticsearch.Elasticsearch, index: str, proc_field_name: str) \
95
- -> typing.Set[str]:
96
- """Return a set of process IDs."""
97
- process_ids = set()
98
-
99
- query = {
100
- "size": 0,
101
- "aggs": {
102
- "get_proc_ids": {
103
- "composite": {
104
- "sources": {"proc_id": {"terms": {"field": f"{proc_field_name}.keyword"}}},
105
- "size": 1000,
106
- }
107
- }
108
- }
109
- }
110
-
111
- buckets: typing.List[Bucket] = run_composite_aggregation(es, index, query)
112
-
113
- for bucket in buckets:
114
- process_ids.add(bucket['key']['proc_id'])
115
-
116
- return process_ids
117
-
118
-
119
- def add_verbose_control_args(parser: argparse.ArgumentParser) -> None:
120
- """
121
- Add verbose control arguments to the parser.
122
- Arguments are added to the parser by using its reference.
123
- """
124
- parser.add_argument('-q', '--quiet', dest='verbose', action='store_const',
125
- const=0, default=1,
126
- help='Set verbosity to 0 (quiet mode).')
127
- parser.add_argument('-v', '--verbose', dest='verbose', action='count',
128
- default=1,
129
- help=('Verbose level. -v for information, -vv for debug,' +
130
- ' -vvv for trace.'))
131
-
132
-
133
- def add_es_connection_args(parser: argparse.ArgumentParser) -> None:
134
- """
135
- Add arguments to the parser needed to gather ElasticSearch server connection parameters.
136
- Arguments are added to the parser by using its reference.
137
- """
138
- parser.add_argument('--es-host', dest='es_host', default='localhost',
139
- help='Address of Elasticsearch host.')
140
- parser.add_argument('--es-port', type=int, default=9200, dest='es_port',
141
- help='Elasticsearch port.')
142
- parser.add_argument('--es-usr', dest='es_usr', default='elastic',
143
- help='Elasticsearch user.')
144
- parser.add_argument('--es-pwd', dest='es_pwd', required=True,
145
- help='Elasticsearch password.')
146
- parser.add_argument('--es-cert', dest='es_cert',
147
- help='Elasticsearch certificate file.')
148
- parser.add_argument('--es-cert-fp', dest='es_cert_fp',
149
- help='Elasticsearch certificate fingerprint.')
150
- parser.add_argument('--es-index-prefix', dest='es_index_prefix',
151
- help='Add the given prefix to each index created during import.')
genelastic/constants.py DELETED
@@ -1,45 +0,0 @@
1
- """
2
- Module: constants
3
-
4
- This module contains genelastic constants.
5
- """
6
- import typing
7
-
8
- ALLOWED_CATEGORIES: typing.Final[typing.List[str]] = ['vcf', 'cov']
9
-
10
- BUNDLE_CURRENT_VERSION = 3
11
-
12
- DEFAULT_TAG_REGEX = "[^_-]+"
13
- DEFAULT_TAG_PREFIX = "%"
14
- DEFAULT_TAG_SUFFIX = ""
15
-
16
- DEFAULT_TAG2FIELD: typing.Final[typing.Dict[str, typing.Dict[str, str]]] = {
17
- '%S': {
18
- "field": 'sample_name',
19
- "regex": DEFAULT_TAG_REGEX
20
- },
21
- '%F': {
22
- "field": 'source',
23
- "regex": DEFAULT_TAG_REGEX
24
- },
25
- '%W': {
26
- "field": 'wet_process',
27
- "regex": DEFAULT_TAG_REGEX
28
- },
29
- '%B': {
30
- "field": 'bi_process',
31
- "regex": DEFAULT_TAG_REGEX
32
- },
33
- '%D': {
34
- "field": 'cov_depth',
35
- "regex": DEFAULT_TAG_REGEX
36
- },
37
- '%A': {
38
- "field": 'barcode',
39
- "regex": DEFAULT_TAG_REGEX
40
- },
41
- '%R': {
42
- "field": 'reference_genome',
43
- "regex": DEFAULT_TAG_REGEX
44
- }
45
- }
@@ -1,62 +0,0 @@
1
- """
2
- This module defines the FilenamePattern class, used to define a filename pattern
3
- and extract metadata from file names using this pattern.
4
- """
5
-
6
- import logging
7
- import re
8
- from .common import AnalysisMetaData
9
-
10
- logger = logging.getLogger('genelastic')
11
-
12
- class FilenamePattern:
13
- """Class for defining a filename pattern.
14
- The pattern is used to extract metadata from filenames
15
- and verify filename conformity.
16
- """
17
-
18
- # Initializer
19
- def __init__(self, pattern: str) -> None:
20
- """
21
- Initializes a FilenamePattern instance.
22
-
23
- Args:
24
- pattern (str): The pattern string used for defining
25
- the filename pattern.
26
- """
27
- self._re = re.compile(pattern)
28
-
29
- def extract_metadata(self, filename: str) -> AnalysisMetaData:
30
- """
31
- Extracts metadata from the given filename based
32
- on the defined pattern.
33
-
34
- Args:
35
- filename (str): The filename from which metadata
36
- needs to be extracted.
37
-
38
- Returns:
39
- dict: A dictionary containing the extracted metadata.
40
-
41
- Raises:
42
- RuntimeError: If parsing of filename fails
43
- with the defined pattern.
44
- """
45
- m = self._re.search(filename)
46
- if not m:
47
- raise RuntimeError(f'Failed parsing filename "{filename}"' +
48
- f'with pattern "{self._re.pattern}".')
49
- return m.groupdict()
50
-
51
- def matches_pattern(self, filename: str) -> bool:
52
- """
53
- Checks if the given filename matches the defined pattern.
54
-
55
- Args:
56
- filename (str): The filename to be checked.
57
-
58
- Returns:
59
- bool: True if the filename matches the pattern,
60
- False otherwise.
61
- """
62
- return bool(self._re.match(filename))