genelastic 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. genelastic/__init__.py +0 -13
  2. genelastic/api/__init__.py +0 -0
  3. genelastic/api/extends/__init__.py +0 -0
  4. genelastic/api/extends/example.py +7 -0
  5. genelastic/api/routes.py +84 -0
  6. genelastic/api/server.py +72 -0
  7. genelastic/api/settings.py +13 -0
  8. genelastic/common/__init__.py +12 -0
  9. genelastic/common/cli.py +35 -0
  10. genelastic/common/elastic.py +183 -0
  11. genelastic/common/exceptions.py +6 -0
  12. genelastic/common/types.py +20 -0
  13. genelastic/import_data/__init__.py +9 -0
  14. genelastic/{analyses.py → import_data/analyses.py} +3 -1
  15. genelastic/{analysis.py → import_data/analysis.py} +3 -2
  16. genelastic/{bi_process.py → import_data/bi_process.py} +1 -1
  17. genelastic/{bi_processes.py → import_data/bi_processes.py} +2 -1
  18. genelastic/{data_file.py → import_data/data_file.py} +3 -1
  19. genelastic/{filename_pattern.py → import_data/filename_pattern.py} +2 -1
  20. genelastic/{gen_data.py → import_data/gen_data.py} +3 -2
  21. genelastic/{import_bundle.py → import_data/import_bundle.py} +2 -1
  22. genelastic/{import_bundle_factory.py → import_data/import_bundle_factory.py} +3 -1
  23. genelastic/{import_data.py → import_data/import_data.py} +49 -51
  24. genelastic/{info.py → import_data/info.py} +29 -50
  25. genelastic/{integrity.py → import_data/integrity.py} +53 -87
  26. genelastic/{tags.py → import_data/tags.py} +2 -1
  27. genelastic/{validate_data.py → import_data/validate_data.py} +6 -4
  28. genelastic/{wet_processes.py → import_data/wet_processes.py} +2 -1
  29. {genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/METADATA +7 -2
  30. genelastic-0.6.1.dist-info/RECORD +36 -0
  31. {genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/WHEEL +1 -1
  32. genelastic-0.6.1.dist-info/entry_points.txt +6 -0
  33. genelastic/common.py +0 -151
  34. genelastic-0.6.0.dist-info/RECORD +0 -25
  35. genelastic-0.6.0.dist-info/entry_points.txt +0 -6
  36. /genelastic/{constants.py → import_data/constants.py} +0 -0
  37. /genelastic/{logger.py → import_data/logger.py} +0 -0
  38. /genelastic/{wet_process.py → import_data/wet_process.py} +0 -0
  39. {genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/top_level.txt +0 -0
@@ -2,24 +2,16 @@
2
2
  import argparse
3
3
  import logging
4
4
  import typing
5
-
6
- import elasticsearch
7
- import urllib3
8
5
  from elasticsearch import NotFoundError
9
6
 
10
- from .common import (add_verbose_control_args, add_es_connection_args,
11
- connect_to_es, get_process_ids, Bucket, run_composite_aggregation)
12
- from .logger import configure_logging
7
+ from genelastic.common import (ElasticQueryConn, DBIntegrityError, Bucket,
8
+ add_verbose_control_args, add_es_connection_args)
13
9
 
10
+ from .logger import configure_logging
14
11
 
15
12
  logger = logging.getLogger('genelastic')
16
13
  logging.getLogger('elastic_transport').setLevel(logging.WARNING) # Disable excessive logging
17
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
18
14
 
19
- class DBIntegrityError(Exception):
20
- """Represents an integrity error,
21
- raised when the database content does not match the expected data schema.
22
- """
23
15
 
24
16
  def read_args() -> argparse.Namespace:
25
17
  """Read arguments from command line."""
@@ -32,49 +24,14 @@ def read_args() -> argparse.Namespace:
32
24
  return parser.parse_args()
33
25
 
34
26
 
35
- def ensure_unique(es: elasticsearch.Elasticsearch, index: str, field: str) -> None:
36
- """
37
- Ensure that all values of a field in an index are all unique.
38
-
39
- :param es: Elasticsearch database instance.
40
- :param index: Name of the index.
41
- :param field: Field name to check for value uniqueness.
42
- :raises DBIntegrityError: Some values of the given field are duplicated in the index.
43
- """
44
-
45
- logger.info("Ensuring that the field '%s' in the index '%s' only contains unique values...",
46
- field, index)
47
- query = {
48
- "size": 0,
49
- "aggs": {
50
- "duplicate_proc_ids": {
51
- "terms": {
52
- "field": f"{field}.keyword",
53
- "size": 10000,
54
- "min_doc_count": 2
55
- }
56
- }
57
- }
58
- }
59
- buckets: typing.List[Bucket] = run_composite_aggregation(es, index, query)
60
- duplicated_processes: typing.Set[str] = set(map(lambda bucket: str(bucket["key"]), buckets))
61
-
62
- if len(duplicated_processes) > 0:
63
- raise DBIntegrityError(f"Found non-unique value for field {field} in index '{index}': "
64
- f"{", ".join(duplicated_processes)}.")
65
-
66
- logger.info("All values of field '%s' in index '%s' are unique.",
67
- field, index)
68
-
69
-
70
- def check_for_undefined_file_indices(es: elasticsearch.Elasticsearch, analyses_index: str) -> None:
27
+ def check_for_undefined_file_indices(es_query_conn: ElasticQueryConn, analyses_index: str) -> None:
71
28
  """
72
29
  Check for potentially undefined files indices in the analyses index.
73
30
 
74
- :param es: Elasticsearch database instance.
31
+ :param es_query_conn: Elasticsearch database instance.
75
32
  :param analyses_index: Name of the index where analyses are stored.
76
- :raises DBIntegrityError: Some files indices are used in the analyses index but
77
- are undefined.
33
+ :raises genelastic.common.DBIntegrityError:
34
+ Some files indices are used in the analyses index but are undefined.
78
35
  """
79
36
  logger.info("Checking for references to undefined file indices in the index '%s'...",
80
37
  analyses_index)
@@ -93,13 +50,13 @@ def check_for_undefined_file_indices(es: elasticsearch.Elasticsearch, analyses_i
93
50
  }
94
51
  }
95
52
 
96
- buckets: typing.List[Bucket] = run_composite_aggregation(es, analyses_index, query)
53
+ buckets: typing.List[Bucket] = es_query_conn.run_composite_aggregation(analyses_index, query)
97
54
 
98
55
  for bucket in buckets:
99
56
  file_index = bucket['key']['file_index']
100
57
 
101
58
  try:
102
- es.indices.get(index=file_index)
59
+ es_query_conn.client.indices.get(index=file_index)
103
60
  logger.debug("File index %s used in index '%s' is defined.",
104
61
  file_index, analyses_index)
105
62
  except NotFoundError:
@@ -115,12 +72,12 @@ def check_for_undefined_file_indices(es: elasticsearch.Elasticsearch, analyses_i
115
72
  logger.info("All defined file indices are referenced.")
116
73
 
117
74
 
118
- def get_undefined_processes(es: elasticsearch.Elasticsearch, analyses_index: str,
75
+ def get_undefined_processes(es_query_conn: ElasticQueryConn, analyses_index: str,
119
76
  process_index: str, field: str) -> typing.Set[str]:
120
77
  """
121
78
  Return a set of undefined processes IDs in an index.
122
79
 
123
- :param es: Elasticsearch database instance.
80
+ :param es_query_conn: Elasticsearch database instance.
124
81
  :param analyses_index: Name of the index where analyses are stored.
125
82
  :param process_index: Name of the index to check for undefined processes.
126
83
  :param field: Field name used to retrieve the process ID.
@@ -131,37 +88,39 @@ def get_undefined_processes(es: elasticsearch.Elasticsearch, analyses_index: str
131
88
  "aggs": {
132
89
  "get_analyses_processes": {
133
90
  "composite": {
134
- "sources": { "process": {"terms": {"field": f"{field}.keyword"}}},
91
+ "sources": {"process": {"terms": {"field": f"{field}.keyword"}}},
135
92
  "size": 1000,
136
93
  }
137
94
  }
138
95
  }
139
96
  }
140
97
 
141
- buckets: typing.List[Bucket] = run_composite_aggregation(es, analyses_index, query)
98
+ buckets: typing.List[Bucket] = es_query_conn.run_composite_aggregation(analyses_index, query)
142
99
 
143
100
  used_processes = set(map(lambda bucket: bucket["key"]["process"], buckets))
144
101
  logger.debug("Used values for field '%s' in index '%s': %s",
145
102
  field, analyses_index, used_processes)
146
103
 
147
- defined_processes = get_process_ids(es, process_index, "proc_id")
104
+ defined_processes = es_query_conn.get_field_values(process_index, "proc_id")
148
105
  logger.debug("Defined values in index '%s': %s", process_index, defined_processes)
149
106
 
150
107
  return used_processes.difference(defined_processes)
151
108
 
152
109
 
153
- def check_for_undefined_wet_processes(es: elasticsearch.Elasticsearch,
110
+ def check_for_undefined_wet_processes(es_query_conn: ElasticQueryConn,
154
111
  analyses_index: str, wet_process_index: str) -> None:
155
112
  """
156
113
  Check that each wet process used in the analyses index is defined.
157
114
 
158
- :param es: Elasticsearch database instance.
115
+ :param es_query_conn: Elasticsearch database instance.
159
116
  :param analyses_index: Name of the index where analyses are stored.
160
117
  :param wet_process_index: Name of the index where wet processes are stored.
161
- :raises DBIntegrityError: Some wet processes used in the analyses index are undefined.
118
+ :raises genelastic.common.DBIntegrityError:
119
+ Some wet processes used in the analyses index are undefined.
162
120
  """
163
121
  logger.info("Checking for undefined wet processes used in index '%s'...", analyses_index)
164
- undefined_wet_processes = get_undefined_processes(es, analyses_index, wet_process_index,
122
+ undefined_wet_processes = get_undefined_processes(es_query_conn,
123
+ analyses_index, wet_process_index,
165
124
  "metadata.wet_process")
166
125
 
167
126
  if len(undefined_wet_processes) > 0:
@@ -171,18 +130,21 @@ def check_for_undefined_wet_processes(es: elasticsearch.Elasticsearch,
171
130
  logger.info("All wet processes used in index '%s' are defined.", wet_process_index)
172
131
 
173
132
 
174
- def check_for_undefined_bi_processes(es: elasticsearch.Elasticsearch,
133
+ def check_for_undefined_bi_processes(es_query_conn: ElasticQueryConn,
175
134
  analyses_index: str, bi_process_index: str) -> None:
176
135
  """
177
136
  Check that each bio info process used in the analyses index is defined.
178
137
 
179
- :param es: Elasticsearch database instance.
138
+ :param es_query_conn: Elasticsearch database instance.
180
139
  :param analyses_index: Name of the index where analyses are stored.
181
140
  :param bi_process_index: Name of the index where bio info processes are stored.
182
- :raises DBIntegrityError: Some bio info processes used in the analyses index are undefined.
141
+ :raises genelastic.common.DBIntegrityError:
142
+ Some bio info processes used in the analyses index are undefined.
183
143
  """
184
- logger.info("Checking for undefined bio info processes used in index '%s'...", analyses_index)
185
- undefined_bi_processes = get_undefined_processes(es, analyses_index, bi_process_index,
144
+ logger.info("Checking for undefined bio info processes used in index '%s'...",
145
+ analyses_index)
146
+ undefined_bi_processes = get_undefined_processes(es_query_conn, analyses_index,
147
+ bi_process_index,
186
148
  "metadata.bi_process")
187
149
 
188
150
  if len(undefined_bi_processes) > 0:
@@ -193,18 +155,19 @@ def check_for_undefined_bi_processes(es: elasticsearch.Elasticsearch,
193
155
  logger.info("All bio info processes used in index '%s' are defined.", bi_process_index)
194
156
 
195
157
 
196
- def check_for_unused_file_indices(es: elasticsearch.Elasticsearch,
158
+ def check_for_unused_file_indices(es_query_conn: ElasticQueryConn,
197
159
  analyses_index: str, index_prefix: str) -> int:
198
160
  """
199
161
  Check that each of the file indices are used in at least one analysis.
200
162
 
201
- :param es: Elasticsearch database instance.
163
+ :param es_query_conn: Elasticsearch database instance.
202
164
  :param analyses_index: Name of the index where analyses are stored.
203
165
  :param index_prefix: Prefix given to all the indices of the ElasticSearch database.
204
166
  :returns: 1 if some file indices exists but are unused in the analyses index,
205
167
  and 0 otherwise.
206
168
  """
207
- json_indices = es.cat.indices(index=f"{index_prefix}-file-*", format="json").body
169
+ json_indices = (es_query_conn.client.cat.
170
+ indices(index=f"{index_prefix}-file-*", format="json").body)
208
171
  found_file_indices = set(map(lambda x: x["index"], json_indices))
209
172
 
210
173
  query = {
@@ -219,7 +182,7 @@ def check_for_unused_file_indices(es: elasticsearch.Elasticsearch,
219
182
  }
220
183
  }
221
184
 
222
- buckets: typing.List[Bucket] = run_composite_aggregation(es, analyses_index, query)
185
+ buckets: typing.List[Bucket] = es_query_conn.run_composite_aggregation(analyses_index, query)
223
186
 
224
187
  used_files_indices = set(map(lambda bucket: bucket['key']['file_index'], buckets))
225
188
  unused_files_indices = found_file_indices.difference(used_files_indices)
@@ -233,12 +196,12 @@ def check_for_unused_file_indices(es: elasticsearch.Elasticsearch,
233
196
  return 0
234
197
 
235
198
 
236
- def check_for_unused_wet_processes(es: elasticsearch.Elasticsearch, analyses_index: str,
199
+ def check_for_unused_wet_processes(es_query_conn: ElasticQueryConn, analyses_index: str,
237
200
  wet_proc_index: str) -> int:
238
201
  """
239
202
  Check for defined wet processes that are not used in the analyses index.
240
203
 
241
- :param es: Elasticsearch database instance.
204
+ :param es_query_conn: Elasticsearch database instance.
242
205
  :param analyses_index: Name of the index where analyses are stored.
243
206
  :param wet_proc_index: Name of the index where wet processes are stored.
244
207
  :returns: 1 if some wet process are defined but unused in the analyses index,
@@ -246,10 +209,10 @@ def check_for_unused_wet_processes(es: elasticsearch.Elasticsearch, analyses_ind
246
209
  """
247
210
  logger.info("Checking for unused wet processes in the index '%s'...", wet_proc_index)
248
211
 
249
- defined_wet_procs = get_process_ids(es, wet_proc_index, "proc_id")
212
+ defined_wet_procs = es_query_conn.get_field_values(wet_proc_index, "proc_id")
250
213
  logger.debug("Found the following defined wet processes: %s", defined_wet_procs)
251
214
 
252
- used_wet_procs = get_process_ids(es, analyses_index, "metadata.wet_process")
215
+ used_wet_procs = es_query_conn.get_field_values(analyses_index, "metadata.wet_process")
253
216
  logger.debug("Following processes are used in the index '%s': %s",
254
217
  analyses_index, used_wet_procs)
255
218
 
@@ -262,12 +225,12 @@ def check_for_unused_wet_processes(es: elasticsearch.Elasticsearch, analyses_ind
262
225
  return 0
263
226
 
264
227
 
265
- def check_for_unused_bi_processes(es: elasticsearch.Elasticsearch, analyses_index: str,
228
+ def check_for_unused_bi_processes(es_query_conn: ElasticQueryConn, analyses_index: str,
266
229
  bi_proc_index: str) -> int:
267
230
  """
268
231
  Check for defined bio info processes that are not used in the analyses index.
269
232
 
270
- :param es: Elasticsearch database instance.
233
+ :param es_query_conn: Elasticsearch database instance.
271
234
  :param analyses_index: Name of the index where analyses are stored.
272
235
  :param bi_proc_index: Name of the index where bio info processes are stored.
273
236
  :returns: 1 if some wet process are defined but unused in the analyses index,
@@ -275,10 +238,10 @@ def check_for_unused_bi_processes(es: elasticsearch.Elasticsearch, analyses_inde
275
238
  """
276
239
  logger.info("Checking for unused bio info processes in the index '%s'...", bi_proc_index)
277
240
 
278
- defined_bi_procs = get_process_ids(es, bi_proc_index, "proc_id")
241
+ defined_bi_procs = es_query_conn.get_field_values(bi_proc_index, "proc_id")
279
242
  logger.debug("Found the following defined bio info processes: %s", defined_bi_procs)
280
243
 
281
- used_bi_procs = get_process_ids(es, analyses_index, "metadata.bi_process")
244
+ used_bi_procs = es_query_conn.get_field_values(analyses_index, "metadata.bi_process")
282
245
  logger.debug("Following processes are used in the index '%s': %s",
283
246
  analyses_index, used_bi_procs)
284
247
 
@@ -302,22 +265,25 @@ def main() -> None:
302
265
  wet_processes_index = f"{args.es_index_prefix}-wet_processes"
303
266
  bi_processes_index = f"{args.es_index_prefix}-bi_processes"
304
267
 
305
- es = connect_to_es(host=args.es_host, port=args.es_port, usr=args.es_usr, pwd=args.es_pwd)
268
+ addr = f"https://{args.es_host}:{args.es_port}"
269
+ logger.info("Trying to connect to Elasticsearch at %s...", addr)
270
+ es_query_conn = ElasticQueryConn(addr, args.es_cert_fp,
271
+ basic_auth=(args.es_usr, args.es_pwd))
306
272
 
307
273
  # Fatal errors
308
274
  try:
309
- ensure_unique(es, wet_processes_index, "proc_id")
310
- ensure_unique(es, bi_processes_index, "proc_id")
311
- check_for_undefined_file_indices(es, analyses_index)
312
- check_for_undefined_wet_processes(es, analyses_index, wet_processes_index)
313
- check_for_undefined_bi_processes(es, analyses_index, bi_processes_index)
275
+ es_query_conn.ensure_unique(wet_processes_index, "proc_id")
276
+ es_query_conn.ensure_unique(bi_processes_index, "proc_id")
277
+ check_for_undefined_file_indices(es_query_conn, analyses_index)
278
+ check_for_undefined_wet_processes(es_query_conn, analyses_index, wet_processes_index)
279
+ check_for_undefined_bi_processes(es_query_conn, analyses_index, bi_processes_index)
314
280
  except DBIntegrityError as e:
315
281
  raise SystemExit(e) from e
316
282
 
317
283
  # Warnings
318
- check_for_unused_wet_processes(es, analyses_index, wet_processes_index)
319
- check_for_unused_bi_processes(es, analyses_index, bi_processes_index)
320
- check_for_unused_file_indices(es, analyses_index, args.es_index_prefix)
284
+ check_for_unused_wet_processes(es_query_conn, analyses_index, wet_processes_index)
285
+ check_for_unused_bi_processes(es_query_conn, analyses_index, bi_processes_index)
286
+ check_for_unused_file_indices(es_query_conn, analyses_index, args.es_index_prefix)
321
287
 
322
288
 
323
289
  if __name__ == '__main__':
@@ -3,7 +3,8 @@ import logging
3
3
  import re
4
4
  import typing
5
5
 
6
- from .common import BundleDict
6
+ from genelastic.common import BundleDict
7
+
7
8
  from .constants import DEFAULT_TAG2FIELD, DEFAULT_TAG_SUFFIX, DEFAULT_TAG_PREFIX
8
9
 
9
10
  logger = logging.getLogger('genelastic')
@@ -2,11 +2,12 @@
2
2
  import argparse
3
3
  import logging
4
4
 
5
- from schema import SchemaError # type: ignore[import-untyped]
5
+ from schema import SchemaError # type: ignore
6
+
7
+ from genelastic.common import add_verbose_control_args
6
8
 
7
- from . import make_import_bundle_from_files
8
- from .common import add_verbose_control_args
9
9
  from .logger import configure_logging
10
+ from .import_bundle_factory import make_import_bundle_from_files
10
11
 
11
12
  logger = logging.getLogger('genelastic')
12
13
 
@@ -15,7 +16,8 @@ def read_args() -> argparse.Namespace:
15
16
  """Read arguments from command line."""
16
17
  parser = argparse.ArgumentParser(description="Ensure that YAML files "
17
18
  "follow the genelastic YAML bundle schema.",
18
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
19
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
20
+ allow_abbrev=False)
19
21
  add_verbose_control_args(parser)
20
22
  parser.add_argument('files', type=str, nargs="+", default=None,
21
23
  help="YAML files to validate.")
@@ -2,7 +2,8 @@
2
2
  import logging
3
3
  import typing
4
4
 
5
- from .common import BundleDict
5
+ from genelastic.common import BundleDict
6
+
6
7
  from .wet_process import WetProcess
7
8
 
8
9
  logger = logging.getLogger('genelastic')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: genelastic
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: Generate and store genetic data into an Elasticsearch database.
5
5
  Author: CNRGH
6
6
  Author-email: Pierrick ROGER <pierrick.roger@cnrgh.fr>, Maxime BLANCHON <maxime.blanchon@cnrgh.fr>
@@ -18,6 +18,11 @@ Requires-Dist: schema
18
18
  Requires-Dist: PyYAML
19
19
  Requires-Dist: biophony >=1.0.1
20
20
  Requires-Dist: colorlog
21
+ Provides-Extra: api
22
+ Requires-Dist: flask ; extra == 'api'
23
+ Requires-Dist: elasticsearch ; extra == 'api'
24
+ Requires-Dist: environs ; extra == 'api'
25
+ Requires-Dist: connexion[flask,swagger-ui,uvicorn] ; extra == 'api'
21
26
  Provides-Extra: docs
22
27
  Requires-Dist: sphinx ; extra == 'docs'
23
28
  Requires-Dist: sphinx-autoapi ; extra == 'docs'
@@ -25,7 +30,7 @@ Requires-Dist: furo ; extra == 'docs'
25
30
  Provides-Extra: tests
26
31
  Requires-Dist: pytest ; extra == 'tests'
27
32
  Requires-Dist: mypy ; extra == 'tests'
28
- Requires-Dist: pylint <3.3,>=3.2 ; extra == 'tests'
33
+ Requires-Dist: pylint ; extra == 'tests'
29
34
  Requires-Dist: bandit ; extra == 'tests'
30
35
  Requires-Dist: coverage ; extra == 'tests'
31
36
  Requires-Dist: yamllint ; extra == 'tests'
@@ -0,0 +1,36 @@
1
+ genelastic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ genelastic/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ genelastic/api/routes.py,sha256=FicBE_HStV6u8-Q9k6ABNLJNBwRFPsSTjAoTc0JnocU,2882
4
+ genelastic/api/server.py,sha256=oJREb8LfPM9O3vd8grTnZhQptYcIYXY-qFlFH1Z7G-8,2271
5
+ genelastic/api/settings.py,sha256=A6idvtaaT5Q-v78S8EKiE1LjYdLOvaXyxx7KrREq_9c,479
6
+ genelastic/api/extends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ genelastic/api/extends/example.py,sha256=mVOG6HzkxZ2uzAHIlG6OqCJstED6Ie421O6SIBFmU2w,207
8
+ genelastic/common/__init__.py,sha256=cK_dmQbI4pf7GQqTwdqbUqUMQcJuF9tBGpE0JR1EUp0,747
9
+ genelastic/common/cli.py,sha256=t2Lk8I1ZyF5LlLnZu03JT4Z58_Yl5l2UMdKIyDuhqg8,1738
10
+ genelastic/common/elastic.py,sha256=uDnc03jqvflBeUiAkevJq_oZnsKDxXOul6x0pF4d_wg,6956
11
+ genelastic/common/exceptions.py,sha256=YSXqF2f29x9rKZYRT-5wko0ySGgggvNBnUV-8n2hoc4,203
12
+ genelastic/common/types.py,sha256=RBHZwW3wNYIM4KG9APWUuvXp1oztjlMFpuFhzoi26UI,1061
13
+ genelastic/import_data/__init__.py,sha256=uczwevd0ikG6GsA9Lkjei19TPCk0hny6iacKFje1w7w,413
14
+ genelastic/import_data/analyses.py,sha256=jS9dRJveWiE06eRQT0tcra_UWwTVfjK1lDliWnv9nNA,1974
15
+ genelastic/import_data/analysis.py,sha256=qHPi7iAiMxhy9Ljjv1qndmzUX9G3yFfcH4Mu8LX_ujQ,7918
16
+ genelastic/import_data/bi_process.py,sha256=WZ4cqLnD0wyzCQdTpb6Zj11BN9B7ytaX7MBF2CFqlhg,708
17
+ genelastic/import_data/bi_processes.py,sha256=scZgak_Ihp5UYgYBQcdPxVt7bnh7cESt-PJ4xx_pnWw,1416
18
+ genelastic/import_data/constants.py,sha256=Y-3i8VlMFTcS158tsbBjvjsCvnWXVXE-Y3A2QOgcoOE,935
19
+ genelastic/import_data/data_file.py,sha256=e7iEqltECVDTMxBc7JcUHOfv434_thFQlcLlYIEiD_I,2721
20
+ genelastic/import_data/filename_pattern.py,sha256=w4sX9lCcTLcA2zgXE6lMQOHQSMEi5FgW_nVUzlmjpvE,1991
21
+ genelastic/import_data/gen_data.py,sha256=tVms8CsKvxtxXlS1o6jEKpy1AJi1waI5MerZgQQitrc,6979
22
+ genelastic/import_data/import_bundle.py,sha256=FMfw-ZwywWEXkRwaRdsj_E1VmoXiEbPrx5Wf8MUpx1Y,4876
23
+ genelastic/import_data/import_bundle_factory.py,sha256=otaWF8NqimfAf9-1fenDAeU63e_6oR6-Ugdj0JsBt8w,9092
24
+ genelastic/import_data/import_data.py,sha256=86YDW06XcqGCfKuizolDHwGnOjeN_i6x7NnfU6lAENQ,11788
25
+ genelastic/import_data/info.py,sha256=naUAqMqIwo6L36KsLVGcE-d1G35PDvkaV-qrIUcSBQ0,7328
26
+ genelastic/import_data/integrity.py,sha256=7mN-py67k2wVWOxnYCv0orabeL6TZ3O8wn3yk1Rw3vA,12207
27
+ genelastic/import_data/logger.py,sha256=eV_LACPjkIg3G_D5g0oTcIRZL86E_iQ2UM_t0CwEkUI,1835
28
+ genelastic/import_data/tags.py,sha256=815hsW-cpqX09vG3a4W9uWhRCMNMtpedJMrHQxJw6zg,3924
29
+ genelastic/import_data/validate_data.py,sha256=u8-FNcofP0crx_jKdM8NRjfm8WK7_WwkBX_y0pM1TBc,1604
30
+ genelastic/import_data/wet_process.py,sha256=uhsZrpDHUiP6-Y6f6_3xcsvqDl0ew_-9aY8vFr3kB3A,693
31
+ genelastic/import_data/wet_processes.py,sha256=rWHX3RY4_mQd5JXHrzPCno6-uKVx8MmYxAQl_n9xftM,1366
32
+ genelastic-0.6.1.dist-info/METADATA,sha256=uocOr4DpI4aJvimcMdBTt5DBZpBP4o14J9S_vHWMVZw,1537
33
+ genelastic-0.6.1.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
34
+ genelastic-0.6.1.dist-info/entry_points.txt,sha256=tPM55ca4ft8XNNFqRFJFtoQ0gTYmFi4Yww4R4qiVbjw,264
35
+ genelastic-0.6.1.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
36
+ genelastic-0.6.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,6 @@
1
+ [console_scripts]
2
+ db_info = genelastic.import_data.info:main
3
+ db_integrity = genelastic.import_data.integrity:main
4
+ gen-data = genelastic.import_data.gen_data:main
5
+ import = genelastic.import_data.import_data:main
6
+ validate = genelastic.import_data.validate_data:main
genelastic/common.py DELETED
@@ -1,151 +0,0 @@
1
- """
2
- Module: common
3
-
4
- This module contains custom types and functions shared by multiple genelastic scripts.
5
- """
6
- import argparse
7
- import sys
8
- import typing
9
- import logging
10
-
11
- import elastic_transport
12
- import elasticsearch
13
-
14
- logger = logging.getLogger('genelastic')
15
-
16
- AnalysisMetaData: typing.TypeAlias = typing.Dict[str, str | int]
17
- WetProcessesData: typing.TypeAlias = typing.Dict[str, str | int | float]
18
- BioInfoProcessData: typing.TypeAlias = typing.Dict[str, str | typing.List[str]]
19
- BundleDict: typing.TypeAlias = typing.Dict[str, typing.Any]
20
-
21
- AnalysisDocument: typing.TypeAlias = typing.Dict[str, str | None | AnalysisMetaData]
22
- MetadataDocument: typing.TypeAlias = typing.Dict[str, int | str | typing.List[typing.Any | None]]
23
- ProcessDocument: typing.TypeAlias = (typing.Dict[str, str] |
24
- WetProcessesData |
25
- BioInfoProcessData)
26
- BulkItems: typing.TypeAlias = typing.List[typing.Dict[str, str |
27
- MetadataDocument |
28
- AnalysisDocument |
29
- ProcessDocument]]
30
- Bucket: typing.TypeAlias = typing.Dict[str, typing.Dict[typing.Any, typing.Any]]
31
-
32
-
33
- def connect_to_es(host: str, port: int, usr: str, pwd: str) -> elasticsearch.Elasticsearch:
34
- """Connect to a remote Elasticsearch database."""
35
- addr = f"https://{host}:{port}"
36
- logger.info("Trying to connect to Elasticsearch at %s.", addr)
37
-
38
- try:
39
- es = elasticsearch.Elasticsearch(
40
- addr,
41
- # ssl_assert_fingerprint=args.es_cert_fp,
42
- # ca_certs=args.es_cert,
43
- verify_certs=False,
44
- basic_auth=(usr, pwd)
45
- )
46
- logger.info(es.info())
47
- except elastic_transport.TransportError as e:
48
- logger.error(e.message)
49
- sys.exit(1)
50
- return es
51
-
52
-
53
- def run_composite_aggregation(es: elasticsearch.Elasticsearch,
54
- index: str, query: typing.Dict[str, typing.Any]) \
55
- -> typing.List[Bucket]:
56
- """
57
- Executes a composite aggregation on an Elasticsearch index and returns all paginated results.
58
-
59
- :param es: Elasticsearch client instance.
60
- :param index: Name of the index to query.
61
- :param query: Aggregation query to run.
62
- :return: List of aggregation results.
63
- """
64
- # Extract the aggregation name from the query dict.
65
- agg_name = next(iter(query["aggs"]))
66
- all_buckets: typing.List[Bucket] = []
67
-
68
- try:
69
- logger.debug("Running composite aggregation query %s on index '%s'.", query, index)
70
- response = es.search(index=index, body=query)
71
- except elasticsearch.NotFoundError as e:
72
- raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
73
-
74
- while True:
75
- # Extract buckets from the response.
76
- buckets: typing.List[Bucket] = response['aggregations'][agg_name]['buckets']
77
- all_buckets.extend(buckets)
78
-
79
- # Check if there are more results to fetch.
80
- if 'after_key' in response['aggregations'][agg_name]:
81
- after_key = response['aggregations'][agg_name]['after_key']
82
- query['aggs'][agg_name]['composite']['after'] = after_key
83
- try:
84
- logger.debug("Running query %s on index '%s'.", query, index)
85
- response = es.search(index=index, body=query) # Fetch the next page of results.
86
- except elasticsearch.NotFoundError as e:
87
- raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
88
- else:
89
- break
90
-
91
- return all_buckets
92
-
93
-
94
- def get_process_ids(es: elasticsearch.Elasticsearch, index: str, proc_field_name: str) \
95
- -> typing.Set[str]:
96
- """Return a set of process IDs."""
97
- process_ids = set()
98
-
99
- query = {
100
- "size": 0,
101
- "aggs": {
102
- "get_proc_ids": {
103
- "composite": {
104
- "sources": {"proc_id": {"terms": {"field": f"{proc_field_name}.keyword"}}},
105
- "size": 1000,
106
- }
107
- }
108
- }
109
- }
110
-
111
- buckets: typing.List[Bucket] = run_composite_aggregation(es, index, query)
112
-
113
- for bucket in buckets:
114
- process_ids.add(bucket['key']['proc_id'])
115
-
116
- return process_ids
117
-
118
-
119
- def add_verbose_control_args(parser: argparse.ArgumentParser) -> None:
120
- """
121
- Add verbose control arguments to the parser.
122
- Arguments are added to the parser by using its reference.
123
- """
124
- parser.add_argument('-q', '--quiet', dest='verbose', action='store_const',
125
- const=0, default=1,
126
- help='Set verbosity to 0 (quiet mode).')
127
- parser.add_argument('-v', '--verbose', dest='verbose', action='count',
128
- default=1,
129
- help=('Verbose level. -v for information, -vv for debug,' +
130
- ' -vvv for trace.'))
131
-
132
-
133
- def add_es_connection_args(parser: argparse.ArgumentParser) -> None:
134
- """
135
- Add arguments to the parser needed to gather ElasticSearch server connection parameters.
136
- Arguments are added to the parser by using its reference.
137
- """
138
- parser.add_argument('--es-host', dest='es_host', default='localhost',
139
- help='Address of Elasticsearch host.')
140
- parser.add_argument('--es-port', type=int, default=9200, dest='es_port',
141
- help='Elasticsearch port.')
142
- parser.add_argument('--es-usr', dest='es_usr', default='elastic',
143
- help='Elasticsearch user.')
144
- parser.add_argument('--es-pwd', dest='es_pwd', required=True,
145
- help='Elasticsearch password.')
146
- parser.add_argument('--es-cert', dest='es_cert',
147
- help='Elasticsearch certificate file.')
148
- parser.add_argument('--es-cert-fp', dest='es_cert_fp',
149
- help='Elasticsearch certificate fingerprint.')
150
- parser.add_argument('--es-index-prefix', dest='es_index_prefix',
151
- help='Add the given prefix to each index created during import.')
@@ -1,25 +0,0 @@
1
- genelastic/__init__.py,sha256=lMTq5VsAuRjNlf3eAEqGE-Yvht63IJ0nIf9z_1hwC00,486
2
- genelastic/analyses.py,sha256=UTvNIhZpK2zF77zg02ftyAdUNpWhTwQJeqb2scU2b_Y,1961
3
- genelastic/analysis.py,sha256=N8oo8uXoFbdLb7C1_67rTFEzV962G_CIqlaEE9IPjiM,7876
4
- genelastic/bi_process.py,sha256=CT4AFFv-pyJceKnYCHKS7SKGhVuSxOJUw5CXSbED15s,698
5
- genelastic/bi_processes.py,sha256=ciGQyoR4Tuxhoq4FPK6b0O92AzGLgijVGqS19QeMg6I,1405
6
- genelastic/common.py,sha256=22SDJJmED2bQygO7GjXFfnB-KH0UujoH867bpz2OAQ4,6065
7
- genelastic/constants.py,sha256=Y-3i8VlMFTcS158tsbBjvjsCvnWXVXE-Y3A2QOgcoOE,935
8
- genelastic/data_file.py,sha256=QzOOThuCRlWg_iiH3-6FnYZaVgDVfJI0YxZ0Eoz30kc,2709
9
- genelastic/filename_pattern.py,sha256=IDQ9ffXxISJ6VMineu-qxnxZgjyejhVVesWIyUhbriE,1980
10
- genelastic/gen_data.py,sha256=s8-wTh7O7tyuszcIQC4dP1_kVyWLFMhtQMhQLL2JlD8,6922
11
- genelastic/import_bundle.py,sha256=ZqiKi5BYBo4by2FWBsS5qGyDRn7xxLtSb3ks1SqySNc,4865
12
- genelastic/import_bundle_factory.py,sha256=nK-VlJATgCNnJSTQotOva89j9H5pdJqU58u1QtlqJkA,9080
13
- genelastic/import_data.py,sha256=SENK1_Khw88Jgs8EXvDwk9jhQidiETxmAVhn9ag6jNs,11489
14
- genelastic/info.py,sha256=3fk1fPrpfK8oRo1WnABNDSGdEpq1G6wvCW_D8meyHss,7789
15
- genelastic/integrity.py,sha256=ypXl9kAdnsxa7LgZ9nDgsklBqVlG9I61A5hqfGeGYgs,13090
16
- genelastic/logger.py,sha256=eV_LACPjkIg3G_D5g0oTcIRZL86E_iQ2UM_t0CwEkUI,1835
17
- genelastic/tags.py,sha256=xHCLWgnXcLUUKN3zthQXoJ7yjEhPoQi7JLvdMtB6T5c,3913
18
- genelastic/validate_data.py,sha256=V0f7fFTs5FkVU8NoBfDI7mQDwITzW_QXt3bj5OgsdzQ,1531
19
- genelastic/wet_process.py,sha256=uhsZrpDHUiP6-Y6f6_3xcsvqDl0ew_-9aY8vFr3kB3A,693
20
- genelastic/wet_processes.py,sha256=PtV0HFs6rGan_-3-BiXeab-VBX1JQGucktoXE4GuaAk,1355
21
- genelastic-0.6.0.dist-info/METADATA,sha256=Ad8wOo_mTY3l7RVy9WNdMAzVnWhTxEb2uacXue1CdUU,1335
22
- genelastic-0.6.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
23
- genelastic-0.6.0.dist-info/entry_points.txt,sha256=ZYi1_Rmjl-9XRywzPdV-U7TxA7Z6yyLVt-W13fZtxsQ,204
24
- genelastic-0.6.0.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
25
- genelastic-0.6.0.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- [console_scripts]
2
- db_info = genelastic.info:main
3
- db_integrity = genelastic.integrity:main
4
- gen-data = genelastic.gen_data:main
5
- import = genelastic.import_data:main
6
- validate = genelastic.validate_data:main
File without changes
File without changes