genelastic 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genelastic/__init__.py +0 -13
- genelastic/api/__init__.py +0 -0
- genelastic/api/extends/__init__.py +0 -0
- genelastic/api/extends/example.py +7 -0
- genelastic/api/routes.py +84 -0
- genelastic/api/server.py +72 -0
- genelastic/api/settings.py +13 -0
- genelastic/common/__init__.py +12 -0
- genelastic/common/cli.py +35 -0
- genelastic/common/elastic.py +183 -0
- genelastic/common/exceptions.py +6 -0
- genelastic/common/types.py +20 -0
- genelastic/import_data/__init__.py +9 -0
- genelastic/{analyses.py → import_data/analyses.py} +3 -1
- genelastic/{analysis.py → import_data/analysis.py} +3 -2
- genelastic/{bi_process.py → import_data/bi_process.py} +1 -1
- genelastic/{bi_processes.py → import_data/bi_processes.py} +2 -1
- genelastic/{data_file.py → import_data/data_file.py} +3 -1
- genelastic/{filename_pattern.py → import_data/filename_pattern.py} +2 -1
- genelastic/{gen_data.py → import_data/gen_data.py} +3 -2
- genelastic/{import_bundle.py → import_data/import_bundle.py} +2 -1
- genelastic/{import_bundle_factory.py → import_data/import_bundle_factory.py} +3 -1
- genelastic/{import_data.py → import_data/import_data.py} +49 -51
- genelastic/{info.py → import_data/info.py} +29 -50
- genelastic/{integrity.py → import_data/integrity.py} +53 -87
- genelastic/{tags.py → import_data/tags.py} +2 -1
- genelastic/{validate_data.py → import_data/validate_data.py} +6 -4
- genelastic/{wet_processes.py → import_data/wet_processes.py} +2 -1
- {genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/METADATA +7 -2
- genelastic-0.6.1.dist-info/RECORD +36 -0
- {genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/WHEEL +1 -1
- genelastic-0.6.1.dist-info/entry_points.txt +6 -0
- genelastic/common.py +0 -151
- genelastic-0.6.0.dist-info/RECORD +0 -25
- genelastic-0.6.0.dist-info/entry_points.txt +0 -6
- /genelastic/{constants.py → import_data/constants.py} +0 -0
- /genelastic/{logger.py → import_data/logger.py} +0 -0
- /genelastic/{wet_process.py → import_data/wet_process.py} +0 -0
- {genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/top_level.txt +0 -0
|
@@ -2,24 +2,16 @@
|
|
|
2
2
|
import argparse
|
|
3
3
|
import logging
|
|
4
4
|
import typing
|
|
5
|
-
|
|
6
|
-
import elasticsearch
|
|
7
|
-
import urllib3
|
|
8
5
|
from elasticsearch import NotFoundError
|
|
9
6
|
|
|
10
|
-
from .common import (
|
|
11
|
-
|
|
12
|
-
from .logger import configure_logging
|
|
7
|
+
from genelastic.common import (ElasticQueryConn, DBIntegrityError, Bucket,
|
|
8
|
+
add_verbose_control_args, add_es_connection_args)
|
|
13
9
|
|
|
10
|
+
from .logger import configure_logging
|
|
14
11
|
|
|
15
12
|
logger = logging.getLogger('genelastic')
|
|
16
13
|
logging.getLogger('elastic_transport').setLevel(logging.WARNING) # Disable excessive logging
|
|
17
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
18
14
|
|
|
19
|
-
class DBIntegrityError(Exception):
|
|
20
|
-
"""Represents an integrity error,
|
|
21
|
-
raised when the database content does not match the expected data schema.
|
|
22
|
-
"""
|
|
23
15
|
|
|
24
16
|
def read_args() -> argparse.Namespace:
|
|
25
17
|
"""Read arguments from command line."""
|
|
@@ -32,49 +24,14 @@ def read_args() -> argparse.Namespace:
|
|
|
32
24
|
return parser.parse_args()
|
|
33
25
|
|
|
34
26
|
|
|
35
|
-
def
|
|
36
|
-
"""
|
|
37
|
-
Ensure that all values of a field in an index are all unique.
|
|
38
|
-
|
|
39
|
-
:param es: Elasticsearch database instance.
|
|
40
|
-
:param index: Name of the index.
|
|
41
|
-
:param field: Field name to check for value uniqueness.
|
|
42
|
-
:raises DBIntegrityError: Some values of the given field are duplicated in the index.
|
|
43
|
-
"""
|
|
44
|
-
|
|
45
|
-
logger.info("Ensuring that the field '%s' in the index '%s' only contains unique values...",
|
|
46
|
-
field, index)
|
|
47
|
-
query = {
|
|
48
|
-
"size": 0,
|
|
49
|
-
"aggs": {
|
|
50
|
-
"duplicate_proc_ids": {
|
|
51
|
-
"terms": {
|
|
52
|
-
"field": f"{field}.keyword",
|
|
53
|
-
"size": 10000,
|
|
54
|
-
"min_doc_count": 2
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
buckets: typing.List[Bucket] = run_composite_aggregation(es, index, query)
|
|
60
|
-
duplicated_processes: typing.Set[str] = set(map(lambda bucket: str(bucket["key"]), buckets))
|
|
61
|
-
|
|
62
|
-
if len(duplicated_processes) > 0:
|
|
63
|
-
raise DBIntegrityError(f"Found non-unique value for field {field} in index '{index}': "
|
|
64
|
-
f"{", ".join(duplicated_processes)}.")
|
|
65
|
-
|
|
66
|
-
logger.info("All values of field '%s' in index '%s' are unique.",
|
|
67
|
-
field, index)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def check_for_undefined_file_indices(es: elasticsearch.Elasticsearch, analyses_index: str) -> None:
|
|
27
|
+
def check_for_undefined_file_indices(es_query_conn: ElasticQueryConn, analyses_index: str) -> None:
|
|
71
28
|
"""
|
|
72
29
|
Check for potentially undefined files indices in the analyses index.
|
|
73
30
|
|
|
74
|
-
:param
|
|
31
|
+
:param es_query_conn: Elasticsearch database instance.
|
|
75
32
|
:param analyses_index: Name of the index where analyses are stored.
|
|
76
|
-
:raises DBIntegrityError:
|
|
77
|
-
are undefined.
|
|
33
|
+
:raises genelastic.common.DBIntegrityError:
|
|
34
|
+
Some files indices are used in the analyses index but are undefined.
|
|
78
35
|
"""
|
|
79
36
|
logger.info("Checking for references to undefined file indices in the index '%s'...",
|
|
80
37
|
analyses_index)
|
|
@@ -93,13 +50,13 @@ def check_for_undefined_file_indices(es: elasticsearch.Elasticsearch, analyses_i
|
|
|
93
50
|
}
|
|
94
51
|
}
|
|
95
52
|
|
|
96
|
-
buckets: typing.List[Bucket] = run_composite_aggregation(
|
|
53
|
+
buckets: typing.List[Bucket] = es_query_conn.run_composite_aggregation(analyses_index, query)
|
|
97
54
|
|
|
98
55
|
for bucket in buckets:
|
|
99
56
|
file_index = bucket['key']['file_index']
|
|
100
57
|
|
|
101
58
|
try:
|
|
102
|
-
|
|
59
|
+
es_query_conn.client.indices.get(index=file_index)
|
|
103
60
|
logger.debug("File index %s used in index '%s' is defined.",
|
|
104
61
|
file_index, analyses_index)
|
|
105
62
|
except NotFoundError:
|
|
@@ -115,12 +72,12 @@ def check_for_undefined_file_indices(es: elasticsearch.Elasticsearch, analyses_i
|
|
|
115
72
|
logger.info("All defined file indices are referenced.")
|
|
116
73
|
|
|
117
74
|
|
|
118
|
-
def get_undefined_processes(
|
|
75
|
+
def get_undefined_processes(es_query_conn: ElasticQueryConn, analyses_index: str,
|
|
119
76
|
process_index: str, field: str) -> typing.Set[str]:
|
|
120
77
|
"""
|
|
121
78
|
Return a set of undefined processes IDs in an index.
|
|
122
79
|
|
|
123
|
-
:param
|
|
80
|
+
:param es_query_conn: Elasticsearch database instance.
|
|
124
81
|
:param analyses_index: Name of the index where analyses are stored.
|
|
125
82
|
:param process_index: Name of the index to check for undefined processes.
|
|
126
83
|
:param field: Field name used to retrieve the process ID.
|
|
@@ -131,37 +88,39 @@ def get_undefined_processes(es: elasticsearch.Elasticsearch, analyses_index: str
|
|
|
131
88
|
"aggs": {
|
|
132
89
|
"get_analyses_processes": {
|
|
133
90
|
"composite": {
|
|
134
|
-
"sources": {
|
|
91
|
+
"sources": {"process": {"terms": {"field": f"{field}.keyword"}}},
|
|
135
92
|
"size": 1000,
|
|
136
93
|
}
|
|
137
94
|
}
|
|
138
95
|
}
|
|
139
96
|
}
|
|
140
97
|
|
|
141
|
-
buckets: typing.List[Bucket] = run_composite_aggregation(
|
|
98
|
+
buckets: typing.List[Bucket] = es_query_conn.run_composite_aggregation(analyses_index, query)
|
|
142
99
|
|
|
143
100
|
used_processes = set(map(lambda bucket: bucket["key"]["process"], buckets))
|
|
144
101
|
logger.debug("Used values for field '%s' in index '%s': %s",
|
|
145
102
|
field, analyses_index, used_processes)
|
|
146
103
|
|
|
147
|
-
defined_processes =
|
|
104
|
+
defined_processes = es_query_conn.get_field_values(process_index, "proc_id")
|
|
148
105
|
logger.debug("Defined values in index '%s': %s", process_index, defined_processes)
|
|
149
106
|
|
|
150
107
|
return used_processes.difference(defined_processes)
|
|
151
108
|
|
|
152
109
|
|
|
153
|
-
def check_for_undefined_wet_processes(
|
|
110
|
+
def check_for_undefined_wet_processes(es_query_conn: ElasticQueryConn,
|
|
154
111
|
analyses_index: str, wet_process_index: str) -> None:
|
|
155
112
|
"""
|
|
156
113
|
Check that each wet process used in the analyses index is defined.
|
|
157
114
|
|
|
158
|
-
:param
|
|
115
|
+
:param es_query_conn: Elasticsearch database instance.
|
|
159
116
|
:param analyses_index: Name of the index where analyses are stored.
|
|
160
117
|
:param wet_process_index: Name of the index where wet processes are stored.
|
|
161
|
-
:raises DBIntegrityError:
|
|
118
|
+
:raises genelastic.common.DBIntegrityError:
|
|
119
|
+
Some wet processes used in the analyses index are undefined.
|
|
162
120
|
"""
|
|
163
121
|
logger.info("Checking for undefined wet processes used in index '%s'...", analyses_index)
|
|
164
|
-
undefined_wet_processes = get_undefined_processes(
|
|
122
|
+
undefined_wet_processes = get_undefined_processes(es_query_conn,
|
|
123
|
+
analyses_index, wet_process_index,
|
|
165
124
|
"metadata.wet_process")
|
|
166
125
|
|
|
167
126
|
if len(undefined_wet_processes) > 0:
|
|
@@ -171,18 +130,21 @@ def check_for_undefined_wet_processes(es: elasticsearch.Elasticsearch,
|
|
|
171
130
|
logger.info("All wet processes used in index '%s' are defined.", wet_process_index)
|
|
172
131
|
|
|
173
132
|
|
|
174
|
-
def check_for_undefined_bi_processes(
|
|
133
|
+
def check_for_undefined_bi_processes(es_query_conn: ElasticQueryConn,
|
|
175
134
|
analyses_index: str, bi_process_index: str) -> None:
|
|
176
135
|
"""
|
|
177
136
|
Check that each bio info process used in the analyses index is defined.
|
|
178
137
|
|
|
179
|
-
:param
|
|
138
|
+
:param es_query_conn: Elasticsearch database instance.
|
|
180
139
|
:param analyses_index: Name of the index where analyses are stored.
|
|
181
140
|
:param bi_process_index: Name of the index where bio info processes are stored.
|
|
182
|
-
:raises DBIntegrityError:
|
|
141
|
+
:raises genelastic.common.DBIntegrityError:
|
|
142
|
+
Some bio info processes used in the analyses index are undefined.
|
|
183
143
|
"""
|
|
184
|
-
logger.info("Checking for undefined bio info processes used in index '%s'...",
|
|
185
|
-
|
|
144
|
+
logger.info("Checking for undefined bio info processes used in index '%s'...",
|
|
145
|
+
analyses_index)
|
|
146
|
+
undefined_bi_processes = get_undefined_processes(es_query_conn, analyses_index,
|
|
147
|
+
bi_process_index,
|
|
186
148
|
"metadata.bi_process")
|
|
187
149
|
|
|
188
150
|
if len(undefined_bi_processes) > 0:
|
|
@@ -193,18 +155,19 @@ def check_for_undefined_bi_processes(es: elasticsearch.Elasticsearch,
|
|
|
193
155
|
logger.info("All bio info processes used in index '%s' are defined.", bi_process_index)
|
|
194
156
|
|
|
195
157
|
|
|
196
|
-
def check_for_unused_file_indices(
|
|
158
|
+
def check_for_unused_file_indices(es_query_conn: ElasticQueryConn,
|
|
197
159
|
analyses_index: str, index_prefix: str) -> int:
|
|
198
160
|
"""
|
|
199
161
|
Check that each of the file indices are used in at least one analysis.
|
|
200
162
|
|
|
201
|
-
:param
|
|
163
|
+
:param es_query_conn: Elasticsearch database instance.
|
|
202
164
|
:param analyses_index: Name of the index where analyses are stored.
|
|
203
165
|
:param index_prefix: Prefix given to all the indices of the ElasticSearch database.
|
|
204
166
|
:returns: 1 if some file indices exists but are unused in the analyses index,
|
|
205
167
|
and 0 otherwise.
|
|
206
168
|
"""
|
|
207
|
-
json_indices =
|
|
169
|
+
json_indices = (es_query_conn.client.cat.
|
|
170
|
+
indices(index=f"{index_prefix}-file-*", format="json").body)
|
|
208
171
|
found_file_indices = set(map(lambda x: x["index"], json_indices))
|
|
209
172
|
|
|
210
173
|
query = {
|
|
@@ -219,7 +182,7 @@ def check_for_unused_file_indices(es: elasticsearch.Elasticsearch,
|
|
|
219
182
|
}
|
|
220
183
|
}
|
|
221
184
|
|
|
222
|
-
buckets: typing.List[Bucket] = run_composite_aggregation(
|
|
185
|
+
buckets: typing.List[Bucket] = es_query_conn.run_composite_aggregation(analyses_index, query)
|
|
223
186
|
|
|
224
187
|
used_files_indices = set(map(lambda bucket: bucket['key']['file_index'], buckets))
|
|
225
188
|
unused_files_indices = found_file_indices.difference(used_files_indices)
|
|
@@ -233,12 +196,12 @@ def check_for_unused_file_indices(es: elasticsearch.Elasticsearch,
|
|
|
233
196
|
return 0
|
|
234
197
|
|
|
235
198
|
|
|
236
|
-
def check_for_unused_wet_processes(
|
|
199
|
+
def check_for_unused_wet_processes(es_query_conn: ElasticQueryConn, analyses_index: str,
|
|
237
200
|
wet_proc_index: str) -> int:
|
|
238
201
|
"""
|
|
239
202
|
Check for defined wet processes that are not used in the analyses index.
|
|
240
203
|
|
|
241
|
-
:param
|
|
204
|
+
:param es_query_conn: Elasticsearch database instance.
|
|
242
205
|
:param analyses_index: Name of the index where analyses are stored.
|
|
243
206
|
:param wet_proc_index: Name of the index where wet processes are stored.
|
|
244
207
|
:returns: 1 if some wet process are defined but unused in the analyses index,
|
|
@@ -246,10 +209,10 @@ def check_for_unused_wet_processes(es: elasticsearch.Elasticsearch, analyses_ind
|
|
|
246
209
|
"""
|
|
247
210
|
logger.info("Checking for unused wet processes in the index '%s'...", wet_proc_index)
|
|
248
211
|
|
|
249
|
-
defined_wet_procs =
|
|
212
|
+
defined_wet_procs = es_query_conn.get_field_values(wet_proc_index, "proc_id")
|
|
250
213
|
logger.debug("Found the following defined wet processes: %s", defined_wet_procs)
|
|
251
214
|
|
|
252
|
-
used_wet_procs =
|
|
215
|
+
used_wet_procs = es_query_conn.get_field_values(analyses_index, "metadata.wet_process")
|
|
253
216
|
logger.debug("Following processes are used in the index '%s': %s",
|
|
254
217
|
analyses_index, used_wet_procs)
|
|
255
218
|
|
|
@@ -262,12 +225,12 @@ def check_for_unused_wet_processes(es: elasticsearch.Elasticsearch, analyses_ind
|
|
|
262
225
|
return 0
|
|
263
226
|
|
|
264
227
|
|
|
265
|
-
def check_for_unused_bi_processes(
|
|
228
|
+
def check_for_unused_bi_processes(es_query_conn: ElasticQueryConn, analyses_index: str,
|
|
266
229
|
bi_proc_index: str) -> int:
|
|
267
230
|
"""
|
|
268
231
|
Check for defined bio info processes that are not used in the analyses index.
|
|
269
232
|
|
|
270
|
-
:param
|
|
233
|
+
:param es_query_conn: Elasticsearch database instance.
|
|
271
234
|
:param analyses_index: Name of the index where analyses are stored.
|
|
272
235
|
:param bi_proc_index: Name of the index where bio info processes are stored.
|
|
273
236
|
:returns: 1 if some wet process are defined but unused in the analyses index,
|
|
@@ -275,10 +238,10 @@ def check_for_unused_bi_processes(es: elasticsearch.Elasticsearch, analyses_inde
|
|
|
275
238
|
"""
|
|
276
239
|
logger.info("Checking for unused bio info processes in the index '%s'...", bi_proc_index)
|
|
277
240
|
|
|
278
|
-
defined_bi_procs =
|
|
241
|
+
defined_bi_procs = es_query_conn.get_field_values(bi_proc_index, "proc_id")
|
|
279
242
|
logger.debug("Found the following defined bio info processes: %s", defined_bi_procs)
|
|
280
243
|
|
|
281
|
-
used_bi_procs =
|
|
244
|
+
used_bi_procs = es_query_conn.get_field_values(analyses_index, "metadata.bi_process")
|
|
282
245
|
logger.debug("Following processes are used in the index '%s': %s",
|
|
283
246
|
analyses_index, used_bi_procs)
|
|
284
247
|
|
|
@@ -302,22 +265,25 @@ def main() -> None:
|
|
|
302
265
|
wet_processes_index = f"{args.es_index_prefix}-wet_processes"
|
|
303
266
|
bi_processes_index = f"{args.es_index_prefix}-bi_processes"
|
|
304
267
|
|
|
305
|
-
|
|
268
|
+
addr = f"https://{args.es_host}:{args.es_port}"
|
|
269
|
+
logger.info("Trying to connect to Elasticsearch at %s...", addr)
|
|
270
|
+
es_query_conn = ElasticQueryConn(addr, args.es_cert_fp,
|
|
271
|
+
basic_auth=(args.es_usr, args.es_pwd))
|
|
306
272
|
|
|
307
273
|
# Fatal errors
|
|
308
274
|
try:
|
|
309
|
-
ensure_unique(
|
|
310
|
-
ensure_unique(
|
|
311
|
-
check_for_undefined_file_indices(
|
|
312
|
-
check_for_undefined_wet_processes(
|
|
313
|
-
check_for_undefined_bi_processes(
|
|
275
|
+
es_query_conn.ensure_unique(wet_processes_index, "proc_id")
|
|
276
|
+
es_query_conn.ensure_unique(bi_processes_index, "proc_id")
|
|
277
|
+
check_for_undefined_file_indices(es_query_conn, analyses_index)
|
|
278
|
+
check_for_undefined_wet_processes(es_query_conn, analyses_index, wet_processes_index)
|
|
279
|
+
check_for_undefined_bi_processes(es_query_conn, analyses_index, bi_processes_index)
|
|
314
280
|
except DBIntegrityError as e:
|
|
315
281
|
raise SystemExit(e) from e
|
|
316
282
|
|
|
317
283
|
# Warnings
|
|
318
|
-
check_for_unused_wet_processes(
|
|
319
|
-
check_for_unused_bi_processes(
|
|
320
|
-
check_for_unused_file_indices(
|
|
284
|
+
check_for_unused_wet_processes(es_query_conn, analyses_index, wet_processes_index)
|
|
285
|
+
check_for_unused_bi_processes(es_query_conn, analyses_index, bi_processes_index)
|
|
286
|
+
check_for_unused_file_indices(es_query_conn, analyses_index, args.es_index_prefix)
|
|
321
287
|
|
|
322
288
|
|
|
323
289
|
if __name__ == '__main__':
|
|
@@ -2,11 +2,12 @@
|
|
|
2
2
|
import argparse
|
|
3
3
|
import logging
|
|
4
4
|
|
|
5
|
-
from schema import SchemaError # type: ignore
|
|
5
|
+
from schema import SchemaError # type: ignore
|
|
6
|
+
|
|
7
|
+
from genelastic.common import add_verbose_control_args
|
|
6
8
|
|
|
7
|
-
from . import make_import_bundle_from_files
|
|
8
|
-
from .common import add_verbose_control_args
|
|
9
9
|
from .logger import configure_logging
|
|
10
|
+
from .import_bundle_factory import make_import_bundle_from_files
|
|
10
11
|
|
|
11
12
|
logger = logging.getLogger('genelastic')
|
|
12
13
|
|
|
@@ -15,7 +16,8 @@ def read_args() -> argparse.Namespace:
|
|
|
15
16
|
"""Read arguments from command line."""
|
|
16
17
|
parser = argparse.ArgumentParser(description="Ensure that YAML files "
|
|
17
18
|
"follow the genelastic YAML bundle schema.",
|
|
18
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
|
19
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
20
|
+
allow_abbrev=False)
|
|
19
21
|
add_verbose_control_args(parser)
|
|
20
22
|
parser.add_argument('files', type=str, nargs="+", default=None,
|
|
21
23
|
help="YAML files to validate.")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: genelastic
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: Generate and store genetic data into an Elasticsearch database.
|
|
5
5
|
Author: CNRGH
|
|
6
6
|
Author-email: Pierrick ROGER <pierrick.roger@cnrgh.fr>, Maxime BLANCHON <maxime.blanchon@cnrgh.fr>
|
|
@@ -18,6 +18,11 @@ Requires-Dist: schema
|
|
|
18
18
|
Requires-Dist: PyYAML
|
|
19
19
|
Requires-Dist: biophony >=1.0.1
|
|
20
20
|
Requires-Dist: colorlog
|
|
21
|
+
Provides-Extra: api
|
|
22
|
+
Requires-Dist: flask ; extra == 'api'
|
|
23
|
+
Requires-Dist: elasticsearch ; extra == 'api'
|
|
24
|
+
Requires-Dist: environs ; extra == 'api'
|
|
25
|
+
Requires-Dist: connexion[flask,swagger-ui,uvicorn] ; extra == 'api'
|
|
21
26
|
Provides-Extra: docs
|
|
22
27
|
Requires-Dist: sphinx ; extra == 'docs'
|
|
23
28
|
Requires-Dist: sphinx-autoapi ; extra == 'docs'
|
|
@@ -25,7 +30,7 @@ Requires-Dist: furo ; extra == 'docs'
|
|
|
25
30
|
Provides-Extra: tests
|
|
26
31
|
Requires-Dist: pytest ; extra == 'tests'
|
|
27
32
|
Requires-Dist: mypy ; extra == 'tests'
|
|
28
|
-
Requires-Dist: pylint
|
|
33
|
+
Requires-Dist: pylint ; extra == 'tests'
|
|
29
34
|
Requires-Dist: bandit ; extra == 'tests'
|
|
30
35
|
Requires-Dist: coverage ; extra == 'tests'
|
|
31
36
|
Requires-Dist: yamllint ; extra == 'tests'
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
genelastic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
genelastic/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
genelastic/api/routes.py,sha256=FicBE_HStV6u8-Q9k6ABNLJNBwRFPsSTjAoTc0JnocU,2882
|
|
4
|
+
genelastic/api/server.py,sha256=oJREb8LfPM9O3vd8grTnZhQptYcIYXY-qFlFH1Z7G-8,2271
|
|
5
|
+
genelastic/api/settings.py,sha256=A6idvtaaT5Q-v78S8EKiE1LjYdLOvaXyxx7KrREq_9c,479
|
|
6
|
+
genelastic/api/extends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
genelastic/api/extends/example.py,sha256=mVOG6HzkxZ2uzAHIlG6OqCJstED6Ie421O6SIBFmU2w,207
|
|
8
|
+
genelastic/common/__init__.py,sha256=cK_dmQbI4pf7GQqTwdqbUqUMQcJuF9tBGpE0JR1EUp0,747
|
|
9
|
+
genelastic/common/cli.py,sha256=t2Lk8I1ZyF5LlLnZu03JT4Z58_Yl5l2UMdKIyDuhqg8,1738
|
|
10
|
+
genelastic/common/elastic.py,sha256=uDnc03jqvflBeUiAkevJq_oZnsKDxXOul6x0pF4d_wg,6956
|
|
11
|
+
genelastic/common/exceptions.py,sha256=YSXqF2f29x9rKZYRT-5wko0ySGgggvNBnUV-8n2hoc4,203
|
|
12
|
+
genelastic/common/types.py,sha256=RBHZwW3wNYIM4KG9APWUuvXp1oztjlMFpuFhzoi26UI,1061
|
|
13
|
+
genelastic/import_data/__init__.py,sha256=uczwevd0ikG6GsA9Lkjei19TPCk0hny6iacKFje1w7w,413
|
|
14
|
+
genelastic/import_data/analyses.py,sha256=jS9dRJveWiE06eRQT0tcra_UWwTVfjK1lDliWnv9nNA,1974
|
|
15
|
+
genelastic/import_data/analysis.py,sha256=qHPi7iAiMxhy9Ljjv1qndmzUX9G3yFfcH4Mu8LX_ujQ,7918
|
|
16
|
+
genelastic/import_data/bi_process.py,sha256=WZ4cqLnD0wyzCQdTpb6Zj11BN9B7ytaX7MBF2CFqlhg,708
|
|
17
|
+
genelastic/import_data/bi_processes.py,sha256=scZgak_Ihp5UYgYBQcdPxVt7bnh7cESt-PJ4xx_pnWw,1416
|
|
18
|
+
genelastic/import_data/constants.py,sha256=Y-3i8VlMFTcS158tsbBjvjsCvnWXVXE-Y3A2QOgcoOE,935
|
|
19
|
+
genelastic/import_data/data_file.py,sha256=e7iEqltECVDTMxBc7JcUHOfv434_thFQlcLlYIEiD_I,2721
|
|
20
|
+
genelastic/import_data/filename_pattern.py,sha256=w4sX9lCcTLcA2zgXE6lMQOHQSMEi5FgW_nVUzlmjpvE,1991
|
|
21
|
+
genelastic/import_data/gen_data.py,sha256=tVms8CsKvxtxXlS1o6jEKpy1AJi1waI5MerZgQQitrc,6979
|
|
22
|
+
genelastic/import_data/import_bundle.py,sha256=FMfw-ZwywWEXkRwaRdsj_E1VmoXiEbPrx5Wf8MUpx1Y,4876
|
|
23
|
+
genelastic/import_data/import_bundle_factory.py,sha256=otaWF8NqimfAf9-1fenDAeU63e_6oR6-Ugdj0JsBt8w,9092
|
|
24
|
+
genelastic/import_data/import_data.py,sha256=86YDW06XcqGCfKuizolDHwGnOjeN_i6x7NnfU6lAENQ,11788
|
|
25
|
+
genelastic/import_data/info.py,sha256=naUAqMqIwo6L36KsLVGcE-d1G35PDvkaV-qrIUcSBQ0,7328
|
|
26
|
+
genelastic/import_data/integrity.py,sha256=7mN-py67k2wVWOxnYCv0orabeL6TZ3O8wn3yk1Rw3vA,12207
|
|
27
|
+
genelastic/import_data/logger.py,sha256=eV_LACPjkIg3G_D5g0oTcIRZL86E_iQ2UM_t0CwEkUI,1835
|
|
28
|
+
genelastic/import_data/tags.py,sha256=815hsW-cpqX09vG3a4W9uWhRCMNMtpedJMrHQxJw6zg,3924
|
|
29
|
+
genelastic/import_data/validate_data.py,sha256=u8-FNcofP0crx_jKdM8NRjfm8WK7_WwkBX_y0pM1TBc,1604
|
|
30
|
+
genelastic/import_data/wet_process.py,sha256=uhsZrpDHUiP6-Y6f6_3xcsvqDl0ew_-9aY8vFr3kB3A,693
|
|
31
|
+
genelastic/import_data/wet_processes.py,sha256=rWHX3RY4_mQd5JXHrzPCno6-uKVx8MmYxAQl_n9xftM,1366
|
|
32
|
+
genelastic-0.6.1.dist-info/METADATA,sha256=uocOr4DpI4aJvimcMdBTt5DBZpBP4o14J9S_vHWMVZw,1537
|
|
33
|
+
genelastic-0.6.1.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
|
|
34
|
+
genelastic-0.6.1.dist-info/entry_points.txt,sha256=tPM55ca4ft8XNNFqRFJFtoQ0gTYmFi4Yww4R4qiVbjw,264
|
|
35
|
+
genelastic-0.6.1.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
|
|
36
|
+
genelastic-0.6.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
[console_scripts]
|
|
2
|
+
db_info = genelastic.import_data.info:main
|
|
3
|
+
db_integrity = genelastic.import_data.integrity:main
|
|
4
|
+
gen-data = genelastic.import_data.gen_data:main
|
|
5
|
+
import = genelastic.import_data.import_data:main
|
|
6
|
+
validate = genelastic.import_data.validate_data:main
|
genelastic/common.py
DELETED
|
@@ -1,151 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module: common
|
|
3
|
-
|
|
4
|
-
This module contains custom types and functions shared by multiple genelastic scripts.
|
|
5
|
-
"""
|
|
6
|
-
import argparse
|
|
7
|
-
import sys
|
|
8
|
-
import typing
|
|
9
|
-
import logging
|
|
10
|
-
|
|
11
|
-
import elastic_transport
|
|
12
|
-
import elasticsearch
|
|
13
|
-
|
|
14
|
-
logger = logging.getLogger('genelastic')
|
|
15
|
-
|
|
16
|
-
AnalysisMetaData: typing.TypeAlias = typing.Dict[str, str | int]
|
|
17
|
-
WetProcessesData: typing.TypeAlias = typing.Dict[str, str | int | float]
|
|
18
|
-
BioInfoProcessData: typing.TypeAlias = typing.Dict[str, str | typing.List[str]]
|
|
19
|
-
BundleDict: typing.TypeAlias = typing.Dict[str, typing.Any]
|
|
20
|
-
|
|
21
|
-
AnalysisDocument: typing.TypeAlias = typing.Dict[str, str | None | AnalysisMetaData]
|
|
22
|
-
MetadataDocument: typing.TypeAlias = typing.Dict[str, int | str | typing.List[typing.Any | None]]
|
|
23
|
-
ProcessDocument: typing.TypeAlias = (typing.Dict[str, str] |
|
|
24
|
-
WetProcessesData |
|
|
25
|
-
BioInfoProcessData)
|
|
26
|
-
BulkItems: typing.TypeAlias = typing.List[typing.Dict[str, str |
|
|
27
|
-
MetadataDocument |
|
|
28
|
-
AnalysisDocument |
|
|
29
|
-
ProcessDocument]]
|
|
30
|
-
Bucket: typing.TypeAlias = typing.Dict[str, typing.Dict[typing.Any, typing.Any]]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def connect_to_es(host: str, port: int, usr: str, pwd: str) -> elasticsearch.Elasticsearch:
|
|
34
|
-
"""Connect to a remote Elasticsearch database."""
|
|
35
|
-
addr = f"https://{host}:{port}"
|
|
36
|
-
logger.info("Trying to connect to Elasticsearch at %s.", addr)
|
|
37
|
-
|
|
38
|
-
try:
|
|
39
|
-
es = elasticsearch.Elasticsearch(
|
|
40
|
-
addr,
|
|
41
|
-
# ssl_assert_fingerprint=args.es_cert_fp,
|
|
42
|
-
# ca_certs=args.es_cert,
|
|
43
|
-
verify_certs=False,
|
|
44
|
-
basic_auth=(usr, pwd)
|
|
45
|
-
)
|
|
46
|
-
logger.info(es.info())
|
|
47
|
-
except elastic_transport.TransportError as e:
|
|
48
|
-
logger.error(e.message)
|
|
49
|
-
sys.exit(1)
|
|
50
|
-
return es
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def run_composite_aggregation(es: elasticsearch.Elasticsearch,
|
|
54
|
-
index: str, query: typing.Dict[str, typing.Any]) \
|
|
55
|
-
-> typing.List[Bucket]:
|
|
56
|
-
"""
|
|
57
|
-
Executes a composite aggregation on an Elasticsearch index and returns all paginated results.
|
|
58
|
-
|
|
59
|
-
:param es: Elasticsearch client instance.
|
|
60
|
-
:param index: Name of the index to query.
|
|
61
|
-
:param query: Aggregation query to run.
|
|
62
|
-
:return: List of aggregation results.
|
|
63
|
-
"""
|
|
64
|
-
# Extract the aggregation name from the query dict.
|
|
65
|
-
agg_name = next(iter(query["aggs"]))
|
|
66
|
-
all_buckets: typing.List[Bucket] = []
|
|
67
|
-
|
|
68
|
-
try:
|
|
69
|
-
logger.debug("Running composite aggregation query %s on index '%s'.", query, index)
|
|
70
|
-
response = es.search(index=index, body=query)
|
|
71
|
-
except elasticsearch.NotFoundError as e:
|
|
72
|
-
raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
|
|
73
|
-
|
|
74
|
-
while True:
|
|
75
|
-
# Extract buckets from the response.
|
|
76
|
-
buckets: typing.List[Bucket] = response['aggregations'][agg_name]['buckets']
|
|
77
|
-
all_buckets.extend(buckets)
|
|
78
|
-
|
|
79
|
-
# Check if there are more results to fetch.
|
|
80
|
-
if 'after_key' in response['aggregations'][agg_name]:
|
|
81
|
-
after_key = response['aggregations'][agg_name]['after_key']
|
|
82
|
-
query['aggs'][agg_name]['composite']['after'] = after_key
|
|
83
|
-
try:
|
|
84
|
-
logger.debug("Running query %s on index '%s'.", query, index)
|
|
85
|
-
response = es.search(index=index, body=query) # Fetch the next page of results.
|
|
86
|
-
except elasticsearch.NotFoundError as e:
|
|
87
|
-
raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
|
|
88
|
-
else:
|
|
89
|
-
break
|
|
90
|
-
|
|
91
|
-
return all_buckets
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def get_process_ids(es: elasticsearch.Elasticsearch, index: str, proc_field_name: str) \
|
|
95
|
-
-> typing.Set[str]:
|
|
96
|
-
"""Return a set of process IDs."""
|
|
97
|
-
process_ids = set()
|
|
98
|
-
|
|
99
|
-
query = {
|
|
100
|
-
"size": 0,
|
|
101
|
-
"aggs": {
|
|
102
|
-
"get_proc_ids": {
|
|
103
|
-
"composite": {
|
|
104
|
-
"sources": {"proc_id": {"terms": {"field": f"{proc_field_name}.keyword"}}},
|
|
105
|
-
"size": 1000,
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
buckets: typing.List[Bucket] = run_composite_aggregation(es, index, query)
|
|
112
|
-
|
|
113
|
-
for bucket in buckets:
|
|
114
|
-
process_ids.add(bucket['key']['proc_id'])
|
|
115
|
-
|
|
116
|
-
return process_ids
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def add_verbose_control_args(parser: argparse.ArgumentParser) -> None:
|
|
120
|
-
"""
|
|
121
|
-
Add verbose control arguments to the parser.
|
|
122
|
-
Arguments are added to the parser by using its reference.
|
|
123
|
-
"""
|
|
124
|
-
parser.add_argument('-q', '--quiet', dest='verbose', action='store_const',
|
|
125
|
-
const=0, default=1,
|
|
126
|
-
help='Set verbosity to 0 (quiet mode).')
|
|
127
|
-
parser.add_argument('-v', '--verbose', dest='verbose', action='count',
|
|
128
|
-
default=1,
|
|
129
|
-
help=('Verbose level. -v for information, -vv for debug,' +
|
|
130
|
-
' -vvv for trace.'))
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def add_es_connection_args(parser: argparse.ArgumentParser) -> None:
|
|
134
|
-
"""
|
|
135
|
-
Add arguments to the parser needed to gather ElasticSearch server connection parameters.
|
|
136
|
-
Arguments are added to the parser by using its reference.
|
|
137
|
-
"""
|
|
138
|
-
parser.add_argument('--es-host', dest='es_host', default='localhost',
|
|
139
|
-
help='Address of Elasticsearch host.')
|
|
140
|
-
parser.add_argument('--es-port', type=int, default=9200, dest='es_port',
|
|
141
|
-
help='Elasticsearch port.')
|
|
142
|
-
parser.add_argument('--es-usr', dest='es_usr', default='elastic',
|
|
143
|
-
help='Elasticsearch user.')
|
|
144
|
-
parser.add_argument('--es-pwd', dest='es_pwd', required=True,
|
|
145
|
-
help='Elasticsearch password.')
|
|
146
|
-
parser.add_argument('--es-cert', dest='es_cert',
|
|
147
|
-
help='Elasticsearch certificate file.')
|
|
148
|
-
parser.add_argument('--es-cert-fp', dest='es_cert_fp',
|
|
149
|
-
help='Elasticsearch certificate fingerprint.')
|
|
150
|
-
parser.add_argument('--es-index-prefix', dest='es_index_prefix',
|
|
151
|
-
help='Add the given prefix to each index created during import.')
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
genelastic/__init__.py,sha256=lMTq5VsAuRjNlf3eAEqGE-Yvht63IJ0nIf9z_1hwC00,486
|
|
2
|
-
genelastic/analyses.py,sha256=UTvNIhZpK2zF77zg02ftyAdUNpWhTwQJeqb2scU2b_Y,1961
|
|
3
|
-
genelastic/analysis.py,sha256=N8oo8uXoFbdLb7C1_67rTFEzV962G_CIqlaEE9IPjiM,7876
|
|
4
|
-
genelastic/bi_process.py,sha256=CT4AFFv-pyJceKnYCHKS7SKGhVuSxOJUw5CXSbED15s,698
|
|
5
|
-
genelastic/bi_processes.py,sha256=ciGQyoR4Tuxhoq4FPK6b0O92AzGLgijVGqS19QeMg6I,1405
|
|
6
|
-
genelastic/common.py,sha256=22SDJJmED2bQygO7GjXFfnB-KH0UujoH867bpz2OAQ4,6065
|
|
7
|
-
genelastic/constants.py,sha256=Y-3i8VlMFTcS158tsbBjvjsCvnWXVXE-Y3A2QOgcoOE,935
|
|
8
|
-
genelastic/data_file.py,sha256=QzOOThuCRlWg_iiH3-6FnYZaVgDVfJI0YxZ0Eoz30kc,2709
|
|
9
|
-
genelastic/filename_pattern.py,sha256=IDQ9ffXxISJ6VMineu-qxnxZgjyejhVVesWIyUhbriE,1980
|
|
10
|
-
genelastic/gen_data.py,sha256=s8-wTh7O7tyuszcIQC4dP1_kVyWLFMhtQMhQLL2JlD8,6922
|
|
11
|
-
genelastic/import_bundle.py,sha256=ZqiKi5BYBo4by2FWBsS5qGyDRn7xxLtSb3ks1SqySNc,4865
|
|
12
|
-
genelastic/import_bundle_factory.py,sha256=nK-VlJATgCNnJSTQotOva89j9H5pdJqU58u1QtlqJkA,9080
|
|
13
|
-
genelastic/import_data.py,sha256=SENK1_Khw88Jgs8EXvDwk9jhQidiETxmAVhn9ag6jNs,11489
|
|
14
|
-
genelastic/info.py,sha256=3fk1fPrpfK8oRo1WnABNDSGdEpq1G6wvCW_D8meyHss,7789
|
|
15
|
-
genelastic/integrity.py,sha256=ypXl9kAdnsxa7LgZ9nDgsklBqVlG9I61A5hqfGeGYgs,13090
|
|
16
|
-
genelastic/logger.py,sha256=eV_LACPjkIg3G_D5g0oTcIRZL86E_iQ2UM_t0CwEkUI,1835
|
|
17
|
-
genelastic/tags.py,sha256=xHCLWgnXcLUUKN3zthQXoJ7yjEhPoQi7JLvdMtB6T5c,3913
|
|
18
|
-
genelastic/validate_data.py,sha256=V0f7fFTs5FkVU8NoBfDI7mQDwITzW_QXt3bj5OgsdzQ,1531
|
|
19
|
-
genelastic/wet_process.py,sha256=uhsZrpDHUiP6-Y6f6_3xcsvqDl0ew_-9aY8vFr3kB3A,693
|
|
20
|
-
genelastic/wet_processes.py,sha256=PtV0HFs6rGan_-3-BiXeab-VBX1JQGucktoXE4GuaAk,1355
|
|
21
|
-
genelastic-0.6.0.dist-info/METADATA,sha256=Ad8wOo_mTY3l7RVy9WNdMAzVnWhTxEb2uacXue1CdUU,1335
|
|
22
|
-
genelastic-0.6.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
23
|
-
genelastic-0.6.0.dist-info/entry_points.txt,sha256=ZYi1_Rmjl-9XRywzPdV-U7TxA7Z6yyLVt-W13fZtxsQ,204
|
|
24
|
-
genelastic-0.6.0.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
|
|
25
|
-
genelastic-0.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|