genelastic 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. genelastic/api/extends/example.py +2 -3
  2. genelastic/api/routes.py +160 -23
  3. genelastic/api/server.py +30 -22
  4. genelastic/api/settings.py +3 -2
  5. genelastic/common/__init__.py +36 -9
  6. genelastic/common/cli.py +51 -23
  7. genelastic/common/elastic.py +80 -49
  8. genelastic/common/exceptions.py +0 -2
  9. genelastic/common/types.py +20 -15
  10. genelastic/import_data/__init__.py +23 -5
  11. genelastic/import_data/analyses.py +17 -20
  12. genelastic/import_data/analysis.py +69 -65
  13. genelastic/import_data/bi_process.py +7 -5
  14. genelastic/import_data/bi_processes.py +8 -8
  15. genelastic/import_data/cli_gen_data.py +116 -0
  16. genelastic/import_data/cli_import.py +379 -0
  17. genelastic/import_data/{info.py → cli_info.py} +104 -75
  18. genelastic/import_data/cli_integrity.py +384 -0
  19. genelastic/import_data/cli_validate.py +54 -0
  20. genelastic/import_data/constants.py +11 -32
  21. genelastic/import_data/data_file.py +23 -20
  22. genelastic/import_data/filename_pattern.py +26 -32
  23. genelastic/import_data/import_bundle.py +56 -47
  24. genelastic/import_data/import_bundle_factory.py +166 -158
  25. genelastic/import_data/logger.py +22 -18
  26. genelastic/import_data/random_bundle.py +402 -0
  27. genelastic/import_data/tags.py +46 -26
  28. genelastic/import_data/wet_process.py +8 -4
  29. genelastic/import_data/wet_processes.py +13 -8
  30. genelastic/ui/__init__.py +0 -0
  31. genelastic/ui/server.py +87 -0
  32. genelastic/ui/settings.py +11 -0
  33. genelastic-0.7.0.dist-info/METADATA +105 -0
  34. genelastic-0.7.0.dist-info/RECORD +40 -0
  35. {genelastic-0.6.1.dist-info → genelastic-0.7.0.dist-info}/WHEEL +1 -1
  36. genelastic-0.7.0.dist-info/entry_points.txt +6 -0
  37. genelastic/import_data/gen_data.py +0 -194
  38. genelastic/import_data/import_data.py +0 -292
  39. genelastic/import_data/integrity.py +0 -290
  40. genelastic/import_data/validate_data.py +0 -43
  41. genelastic-0.6.1.dist-info/METADATA +0 -41
  42. genelastic-0.6.1.dist-info/RECORD +0 -36
  43. genelastic-0.6.1.dist-info/entry_points.txt +0 -6
  44. {genelastic-0.6.1.dist-info → genelastic-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
1
- # pylint: disable=missing-module-docstring
2
- from flask import jsonify, Response
1
+ from flask import Response, jsonify
3
2
 
4
3
 
5
4
  def ping_2() -> Response:
6
5
  """Test route to verify that the server is online."""
7
- return jsonify({'message': 'pong_2'})
6
+ return jsonify({"message": "pong_2"})
genelastic/api/routes.py CHANGED
@@ -1,45 +1,56 @@
1
- # pylint: disable=missing-module-docstring
1
+ from importlib.metadata import version
2
2
  from pathlib import Path
3
- from flask import jsonify, current_app, Response
3
+ from typing import Any
4
+
5
+ from flask import Response, current_app, jsonify
4
6
 
5
7
 
6
8
  def ping() -> Response:
7
9
  """Test route to verify that the server is online."""
8
- return jsonify({'message': 'pong'})
10
+ return jsonify({"message": "pong"})
9
11
 
10
12
 
11
13
  def list_indices() -> Response:
12
14
  """Route to list Elasticsearch indexes."""
13
- return current_app.elastic_query_conn.get_indices() # type: ignore
15
+ return current_app.elastic_query_conn.get_indices() # type: ignore[attr-defined, no-any-return]
14
16
 
15
17
 
16
18
  def retrieve_document(index_id: str, document_id: str) -> Response:
17
19
  """Route to retrieve a document by its ID."""
18
- document = (current_app.elastic_query_conn # type: ignore
19
- .get_document_by_id(index_id, document_id))
20
+ document = current_app.elastic_query_conn.get_document_by_id( # type: ignore[attr-defined]
21
+ index_id, document_id
22
+ )
20
23
  return jsonify(document)
21
24
 
22
25
 
23
26
  def list_wet_processes() -> Response:
24
27
  """Route to list wet processes."""
25
- wet_processes_index = f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-wet_processes"
26
- result = (current_app.elastic_query_conn # type: ignore
27
- .get_field_values(wet_processes_index, "proc_id"))
28
+ wet_processes_index = (
29
+ f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-wet_processes"
30
+ )
31
+ result = current_app.elastic_query_conn.get_field_values( # type: ignore[attr-defined]
32
+ wet_processes_index, "proc_id"
33
+ )
28
34
  return jsonify(list(result))
29
35
 
30
36
 
31
37
  def list_bi_processes() -> Response:
32
38
  """Route to list bi processes."""
33
- bi_processes_index = f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-bi_processes"
34
- result = (current_app.elastic_query_conn # type: ignore
35
- .get_field_values(bi_processes_index, "name"))
39
+ bi_processes_index = (
40
+ f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-bi_processes"
41
+ )
42
+ result = current_app.elastic_query_conn.get_field_values( # type: ignore[attr-defined]
43
+ bi_processes_index, "proc_id"
44
+ )
36
45
  return jsonify(list(result))
37
46
 
38
47
 
39
48
  def list_analyses() -> Response:
40
49
  """Route to list analyses."""
41
50
  analyses_index = f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-analyses"
42
- result = current_app.elastic_query_conn.get_field_values(analyses_index, "path") # type: ignore
51
+ result = current_app.elastic_query_conn.get_field_values( # type: ignore[attr-defined]
52
+ analyses_index, "path"
53
+ )
43
54
  filenames = [Path(path).name for path in result]
44
55
  return jsonify(filenames)
45
56
 
@@ -55,11 +66,10 @@ def list_analyses_wet_processes(proc_id: str) -> Response:
55
66
  }
56
67
  }
57
68
  }
58
- result = []
59
- response = (current_app.elastic_query_conn # type: ignore
60
- .client.search(index=analyses_index, body=search_query))
61
- for hit in response['hits']['hits']:
62
- result.append(hit['_source']['path'])
69
+ response = current_app.elastic_query_conn.client.search( # type: ignore[attr-defined]
70
+ index=analyses_index, body=search_query
71
+ )
72
+ result = [hit["_source"]["path"] for hit in response["hits"]["hits"]]
63
73
 
64
74
  return jsonify(result)
65
75
 
@@ -75,10 +85,137 @@ def list_analyses_bi_processes(proc_id: str) -> Response:
75
85
  }
76
86
  }
77
87
  }
78
- result = []
79
- response = (current_app.elastic_query_conn # type: ignore
80
- .client.search(index=analyses_index, body=search_query))
81
- for hit in response['hits']['hits']:
82
- result.append(hit['_source']['path'])
88
+ response = current_app.elastic_query_conn.client.search( # type: ignore[attr-defined]
89
+ index=analyses_index, body=search_query
90
+ )
91
+ result = [hit["_source"]["path"] for hit in response["hits"]["hits"]]
83
92
 
84
93
  return jsonify(result)
94
+
95
+
96
+ def list_snv_documents() -> Response:
97
+ """Route to list all documents containing a mutation at a single position (SNV)."""
98
+ index_pattern = "genelastic-file-*"
99
+ target_value = "SNV"
100
+
101
+ search_query = {
102
+ "aggs": {
103
+ "snv_docs": {
104
+ "composite": {
105
+ "sources": [
106
+ {"alt_value": {"terms": {"field": "alt.keyword"}}}
107
+ ],
108
+ "size": 1000,
109
+ }
110
+ }
111
+ },
112
+ "query": {"term": {"alt.keyword": target_value}},
113
+ "size": 0,
114
+ }
115
+
116
+ all_documents = []
117
+ buckets = current_app.elastic_query_conn.run_composite_aggregation( # type: ignore[attr-defined]
118
+ index_pattern, search_query
119
+ )
120
+
121
+ for bucket in buckets:
122
+ alt_value = bucket["key"]["alt_value"]
123
+
124
+ search_query_docs = {
125
+ "query": {"term": {"alt.keyword": alt_value}},
126
+ "size": 1000,
127
+ }
128
+
129
+ response = current_app.elastic_query_conn.client.search( # type: ignore[attr-defined]
130
+ index=index_pattern, body=search_query_docs
131
+ )
132
+
133
+ all_documents.extend(response["hits"]["hits"])
134
+
135
+ return jsonify(all_documents)
136
+
137
+
138
+ def build_snv_search_query(
139
+ target_alt: str, target_svtype: str
140
+ ) -> dict[str, Any]:
141
+ """Helper function to build the search query for SNV documents with specified alt and SVTYPE."""
142
+ return {
143
+ "query": {
144
+ "bool": {
145
+ "must": [
146
+ {"term": {"alt.keyword": target_alt}},
147
+ {"term": {"info.SVTYPE.keyword": target_svtype}},
148
+ ]
149
+ }
150
+ },
151
+ "size": 1000,
152
+ }
153
+
154
+
155
+ def build_snv_mutation_search_query(
156
+ target_svtypes: list[str],
157
+ ) -> dict[str, Any]:
158
+ """Helper function to build the search query for SNV mutations with specified SVTYPE values."""
159
+ return {
160
+ "query": {
161
+ "bool": {
162
+ "must": [
163
+ {"term": {"alt.keyword": "SNV"}},
164
+ {"terms": {"info.SVTYPE.keyword": target_svtypes}},
165
+ ]
166
+ }
167
+ },
168
+ "size": 1000,
169
+ }
170
+
171
+
172
+ def list_snv_insertion_documents() -> Response:
173
+ """Route to list all documents containing an insertion (INS) at a single position (SNV)."""
174
+ index_pattern = "genelastic-file-*"
175
+ search_query = build_snv_search_query(target_alt="SNV", target_svtype="INS")
176
+
177
+ response = current_app.elastic_query_conn.client.search( # type: ignore[attr-defined]
178
+ index=index_pattern, body=search_query
179
+ )
180
+
181
+ all_documents = [hit["_source"] for hit in response["hits"]["hits"]]
182
+
183
+ return jsonify(all_documents)
184
+
185
+
186
+ def list_snv_deletion_documents() -> Response:
187
+ """Route to list all documents containing a deletion (DEL) at a single position (SNV)."""
188
+ index_pattern = "genelastic-file-*"
189
+ search_query = build_snv_search_query(target_alt="SNV", target_svtype="DEL")
190
+
191
+ response = current_app.elastic_query_conn.client.search( # type: ignore[attr-defined]
192
+ index=index_pattern, body=search_query
193
+ )
194
+
195
+ all_documents = [hit["_source"] for hit in response["hits"]["hits"]]
196
+
197
+ return jsonify(all_documents)
198
+
199
+
200
+ def list_snv_mutation_documents() -> Response:
201
+ """Route to list all documents containing a mutation at a single position (SNV)."""
202
+ index_pattern = "genelastic-file-*"
203
+ target_svtypes = ["INS", "DEL"]
204
+
205
+ search_query = build_snv_mutation_search_query(
206
+ target_svtypes=target_svtypes
207
+ )
208
+
209
+ response = current_app.elastic_query_conn.client.search( # type: ignore[attr-defined]
210
+ index=index_pattern, body=search_query
211
+ )
212
+
213
+ all_documents = [hit["_source"] for hit in response["hits"]["hits"]]
214
+
215
+ return jsonify(all_documents)
216
+
217
+
218
+ def get_genelastic_version() -> Response:
219
+ """Retourne la version du package genelastic."""
220
+ top_level_package = __package__.split(".")[0]
221
+ return jsonify({"version": version(top_level_package)})
genelastic/api/server.py CHANGED
@@ -1,15 +1,16 @@
1
- # pylint: disable=missing-module-docstring
2
- from typing import Any
3
1
  from pathlib import Path
2
+ from typing import Any
3
+
4
+ import connexion
4
5
  import yaml
5
- import connexion # type: ignore
6
+
6
7
  from genelastic.common import ElasticQueryConn
7
8
 
8
9
 
9
- def load_yaml(file_path: Path) -> Any:
10
+ def load_yaml(file_path: Path) -> Any: # noqa: ANN401
10
11
  """Load a YAML file and return its content."""
11
12
  content = None
12
- with open(file_path, encoding='utf-8') as f:
13
+ with Path.open(file_path, encoding="utf-8") as f:
13
14
  try:
14
15
  content = yaml.safe_load(f)
15
16
  except yaml.YAMLError as exc:
@@ -17,50 +18,57 @@ def load_yaml(file_path: Path) -> Any:
17
18
  return content
18
19
 
19
20
 
20
- def aggregate_openapi_specs(main_spec_file: Path, additional_spec_path: Path) -> Any:
21
+ def aggregate_openapi_specs(
22
+ main_spec_file: Path, additional_spec_path: Path
23
+ ) -> Any: # noqa: ANN401
21
24
  """Aggregate OpenAPI specifications from a main file and a directory
22
- of additional specifications."""
25
+ of additional specifications.
26
+ """
23
27
  main_spec = load_yaml(main_spec_file)
24
28
  try:
25
29
  entries = additional_spec_path.iterdir()
26
30
  except OSError as exc:
27
31
  raise SystemExit(exc) from exc
28
32
 
29
- if not 'paths' in main_spec:
30
- main_spec['paths'] = []
33
+ if "paths" not in main_spec:
34
+ main_spec["paths"] = []
31
35
 
32
36
  for entry in entries:
33
37
  if not entry.is_file():
34
38
  continue
35
39
 
36
- if not entry.suffix in [".yml", ".yaml"]:
40
+ if entry.suffix not in [".yml", ".yaml"]:
37
41
  continue
38
42
 
39
43
  content = load_yaml(entry)
40
44
 
41
- if 'paths' in content:
42
- main_spec['paths'].update(content['paths'])
45
+ if "paths" in content:
46
+ main_spec["paths"].update(content["paths"])
43
47
 
44
48
  return main_spec
45
49
 
46
50
 
47
51
  # Initialiser l'application Connexion
48
52
  connexion_app = connexion.FlaskApp(__name__)
49
- connexion_app.app.config.from_object('src.genelastic.api.settings.Config')
53
+ connexion_app.app.config.from_object("src.genelastic.api.settings.Config")
50
54
 
51
55
  # Initialiser le client Elasticsearch
52
- es_url = connexion_app.app.config['GENAPI_ES_URL']
53
- es_cert_fp = connexion_app.app.config['GENAPI_ES_CERT_FP']
54
- es_api_key = connexion_app.app.config['GENAPI_ES_ENCODED_API_KEY']
56
+ es_url = connexion_app.app.config["GENAPI_ES_URL"]
57
+ es_cert_fp = connexion_app.app.config["GENAPI_ES_CERT_FP"]
58
+ es_api_key = connexion_app.app.config["GENAPI_ES_ENCODED_API_KEY"]
55
59
 
56
- connexion_app.app.elastic_query_conn = ElasticQueryConn(es_url, es_cert_fp, api_key=es_api_key)
60
+ connexion_app.app.elastic_query_conn = ElasticQueryConn(
61
+ es_url, es_cert_fp, api_key=es_api_key
62
+ )
57
63
 
58
- connexion_app.app.logger.debug("Successfully connected to Elasticsearch server: %s",
59
- connexion_app.app.elastic_query_conn.client.info())
64
+ connexion_app.app.logger.debug(
65
+ "Successfully connected to Elasticsearch server: %s",
66
+ connexion_app.app.elastic_query_conn.client.info(),
67
+ )
60
68
 
61
69
  # Chemins des fichiers YAML
62
- main_yaml_file = Path(__file__).parents[0] / 'specification.yml'
63
- additional_yaml_dir = Path(__file__).parents[0] / 'extends'
70
+ main_yaml_file = Path(__file__).parents[0] / "specification.yml"
71
+ additional_yaml_dir = Path(__file__).parents[0] / "extends"
64
72
 
65
73
  # Charger et combiner les fichiers YAML
66
74
  yaml_spec = aggregate_openapi_specs(main_yaml_file, additional_yaml_dir)
@@ -68,5 +76,5 @@ yaml_spec = aggregate_openapi_specs(main_yaml_file, additional_yaml_dir)
68
76
  # Ajouter la spécification vers OpenAPI
69
77
  connexion_app.add_api(yaml_spec)
70
78
 
71
- if __name__ == '__main__':
79
+ if __name__ == "__main__":
72
80
  connexion_app.run(debug=True)
@@ -1,11 +1,12 @@
1
- # pylint: disable=missing-module-docstring
2
1
  from environs import Env
3
2
 
4
3
  env = Env()
5
4
  env.read_env()
6
5
 
7
- # pylint: disable=missing-class-docstring,too-few-public-methods
6
+
8
7
  class Config:
8
+ """Flask config class."""
9
+
9
10
  # Charger toutes les variables d'environnement nécessaires
10
11
  GENAPI_ES_URL = env.url("GENAPI_ES_URL").geturl()
11
12
  GENAPI_ES_ENCODED_API_KEY = env.str("GENAPI_ES_ENCODED_API_KEY")
@@ -1,12 +1,39 @@
1
1
  """Genelastic package for common code between API and import scripts."""
2
- from .elastic import ElasticQueryConn, ElasticImportConn
3
- from .types import (BundleDict, AnalysisMetaData, BioInfoProcessData, WetProcessesData,
4
- MetadataDocument, AnalysisDocument, BulkItems, ProcessDocument, Bucket)
5
- from .cli import add_verbose_control_args, add_es_connection_args
2
+
3
+ from .cli import add_es_connection_args, add_verbose_control_args
4
+ from .elastic import ElasticImportConn, ElasticQueryConn
6
5
  from .exceptions import DBIntegrityError
6
+ from .types import (
7
+ AnalysisDocument,
8
+ AnalysisMetaData,
9
+ BioInfoProcessData,
10
+ Bucket,
11
+ BulkItems,
12
+ BundleDict,
13
+ MetadataDocument,
14
+ ProcessDocument,
15
+ RandomAnalysisData,
16
+ RandomBiProcessData,
17
+ RandomWetProcessData,
18
+ WetProcessesData,
19
+ )
7
20
 
8
- __all__ = ['ElasticQueryConn', 'ElasticImportConn', 'BundleDict', 'AnalysisMetaData',
9
- 'BioInfoProcessData', 'WetProcessesData', 'MetadataDocument', 'AnalysisDocument',
10
- 'BulkItems', 'ProcessDocument', 'Bucket', 'add_verbose_control_args',
11
- 'add_es_connection_args', 'DBIntegrityError'
12
- ]
21
+ __all__ = [
22
+ "AnalysisDocument",
23
+ "AnalysisMetaData",
24
+ "BioInfoProcessData",
25
+ "Bucket",
26
+ "BulkItems",
27
+ "BundleDict",
28
+ "DBIntegrityError",
29
+ "ElasticImportConn",
30
+ "ElasticQueryConn",
31
+ "MetadataDocument",
32
+ "ProcessDocument",
33
+ "RandomAnalysisData",
34
+ "RandomBiProcessData",
35
+ "RandomWetProcessData",
36
+ "WetProcessesData",
37
+ "add_es_connection_args",
38
+ "add_verbose_control_args",
39
+ ]
genelastic/common/cli.py CHANGED
@@ -1,35 +1,63 @@
1
1
  """Utility functions for CLI scripts."""
2
+
2
3
  import argparse
3
4
 
4
5
 
5
6
  def add_verbose_control_args(parser: argparse.ArgumentParser) -> None:
6
- """
7
- Add verbose control arguments to the parser.
7
+ """Add verbose control arguments to the parser.
8
8
  Arguments are added to the parser by using its reference.
9
9
  """
10
- parser.add_argument('-q', '--quiet', dest='verbose', action='store_const',
11
- const=0, default=1,
12
- help='Set verbosity to 0 (quiet mode).')
13
- parser.add_argument('-v', '--verbose', dest='verbose', action='count',
14
- default=1,
15
- help=('Verbose level. -v for information, -vv for debug,' +
16
- ' -vvv for trace.'))
10
+ parser.add_argument(
11
+ "-q",
12
+ "--quiet",
13
+ dest="verbose",
14
+ action="store_const",
15
+ const=0,
16
+ default=1,
17
+ help="Set verbosity to 0 (quiet mode).",
18
+ )
19
+ parser.add_argument(
20
+ "-v",
21
+ "--verbose",
22
+ dest="verbose",
23
+ action="count",
24
+ default=1,
25
+ help=(
26
+ "Verbose level. -v for information, -vv for debug, -vvv for trace."
27
+ ),
28
+ )
17
29
 
18
30
 
19
31
  def add_es_connection_args(parser: argparse.ArgumentParser) -> None:
20
- """
21
- Add arguments to the parser needed to gather ElasticSearch server connection parameters.
32
+ """Add arguments to the parser needed to gather ElasticSearch server connection parameters.
22
33
  Arguments are added to the parser by using its reference.
23
34
  """
24
- parser.add_argument('--es-host', dest='es_host', default='localhost',
25
- help='Address of Elasticsearch host.')
26
- parser.add_argument('--es-port', type=int, default=9200, dest='es_port',
27
- help='Elasticsearch port.')
28
- parser.add_argument('--es-usr', dest='es_usr', default='elastic',
29
- help='Elasticsearch user.')
30
- parser.add_argument('--es-pwd', dest='es_pwd', required=True,
31
- help='Elasticsearch password.')
32
- parser.add_argument('--es-cert-fp', dest='es_cert_fp',
33
- help='Elasticsearch sha256 certificate fingerprint.')
34
- parser.add_argument('--es-index-prefix', dest='es_index_prefix',
35
- help='Add the given prefix to each index created during import.')
35
+ parser.add_argument(
36
+ "--es-host",
37
+ dest="es_host",
38
+ default="localhost",
39
+ help="Address of Elasticsearch host.",
40
+ )
41
+ parser.add_argument(
42
+ "--es-port",
43
+ type=int,
44
+ default=9200,
45
+ dest="es_port",
46
+ help="Elasticsearch port.",
47
+ )
48
+ parser.add_argument(
49
+ "--es-usr", dest="es_usr", default="elastic", help="Elasticsearch user."
50
+ )
51
+ parser.add_argument(
52
+ "--es-pwd", dest="es_pwd", required=True, help="Elasticsearch password."
53
+ )
54
+ parser.add_argument(
55
+ "--es-cert-fp",
56
+ dest="es_cert_fp",
57
+ help="Elasticsearch sha256 certificate fingerprint.",
58
+ )
59
+ parser.add_argument(
60
+ "--es-index-prefix",
61
+ dest="es_index_prefix",
62
+ help="Add the given prefix to each index created during import.",
63
+ )