genelastic 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. genelastic/api/.env +4 -0
  2. genelastic/api/cli_start_api.py +18 -0
  3. genelastic/api/errors.py +52 -0
  4. genelastic/api/extends/example.py +0 -6
  5. genelastic/api/extends/example.yml +0 -0
  6. genelastic/api/routes.py +313 -181
  7. genelastic/api/server.py +34 -26
  8. genelastic/api/settings.py +5 -9
  9. genelastic/api/specification.yml +512 -0
  10. genelastic/common/__init__.py +0 -39
  11. genelastic/common/cli.py +100 -0
  12. genelastic/common/elastic.py +374 -46
  13. genelastic/common/exceptions.py +34 -2
  14. genelastic/common/server.py +59 -0
  15. genelastic/common/types.py +1 -14
  16. genelastic/import_data/__init__.py +0 -27
  17. genelastic/import_data/checker.py +99 -0
  18. genelastic/import_data/checker_observer.py +13 -0
  19. genelastic/import_data/cli/__init__.py +0 -0
  20. genelastic/import_data/cli/cli_check.py +136 -0
  21. genelastic/import_data/cli/gen_data.py +143 -0
  22. genelastic/import_data/cli/import_data.py +346 -0
  23. genelastic/import_data/cli/info.py +247 -0
  24. genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
  25. genelastic/import_data/cli/validate.py +146 -0
  26. genelastic/import_data/collect.py +185 -0
  27. genelastic/import_data/constants.py +136 -11
  28. genelastic/import_data/import_bundle.py +102 -59
  29. genelastic/import_data/import_bundle_factory.py +70 -149
  30. genelastic/import_data/importers/__init__.py +0 -0
  31. genelastic/import_data/importers/importer_base.py +131 -0
  32. genelastic/import_data/importers/importer_factory.py +85 -0
  33. genelastic/import_data/importers/importer_types.py +223 -0
  34. genelastic/import_data/logger.py +2 -1
  35. genelastic/import_data/models/__init__.py +0 -0
  36. genelastic/import_data/models/analyses.py +178 -0
  37. genelastic/import_data/models/analysis.py +144 -0
  38. genelastic/import_data/models/data_file.py +110 -0
  39. genelastic/import_data/models/process.py +45 -0
  40. genelastic/import_data/models/processes.py +84 -0
  41. genelastic/import_data/models/tags.py +170 -0
  42. genelastic/import_data/models/unique_list.py +109 -0
  43. genelastic/import_data/models/validate.py +26 -0
  44. genelastic/import_data/patterns.py +90 -0
  45. genelastic/import_data/random_bundle.py +79 -54
  46. genelastic/import_data/resolve.py +157 -0
  47. genelastic/ui/.env +1 -0
  48. genelastic/ui/cli_start_ui.py +20 -0
  49. genelastic/ui/routes.py +333 -0
  50. genelastic/ui/server.py +9 -82
  51. genelastic/ui/settings.py +2 -6
  52. genelastic/ui/static/cea-cnrgh.ico +0 -0
  53. genelastic/ui/static/cea.ico +0 -0
  54. genelastic/ui/static/layout.ico +0 -0
  55. genelastic/ui/static/novaseq6000.png +0 -0
  56. genelastic/ui/static/style.css +430 -0
  57. genelastic/ui/static/ui.js +458 -0
  58. genelastic/ui/templates/analyses.html +98 -0
  59. genelastic/ui/templates/analysis_detail.html +44 -0
  60. genelastic/ui/templates/bi_process_detail.html +129 -0
  61. genelastic/ui/templates/bi_processes.html +116 -0
  62. genelastic/ui/templates/explorer.html +356 -0
  63. genelastic/ui/templates/home.html +207 -0
  64. genelastic/ui/templates/layout.html +153 -0
  65. genelastic/ui/templates/version.html +21 -0
  66. genelastic/ui/templates/wet_process_detail.html +131 -0
  67. genelastic/ui/templates/wet_processes.html +116 -0
  68. genelastic-0.9.0.dist-info/METADATA +686 -0
  69. genelastic-0.9.0.dist-info/RECORD +76 -0
  70. genelastic-0.9.0.dist-info/WHEEL +4 -0
  71. genelastic-0.9.0.dist-info/entry_points.txt +10 -0
  72. genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
  73. genelastic/import_data/analyses.py +0 -69
  74. genelastic/import_data/analysis.py +0 -205
  75. genelastic/import_data/bi_process.py +0 -27
  76. genelastic/import_data/bi_processes.py +0 -49
  77. genelastic/import_data/cli_gen_data.py +0 -116
  78. genelastic/import_data/cli_import.py +0 -379
  79. genelastic/import_data/cli_info.py +0 -256
  80. genelastic/import_data/cli_validate.py +0 -54
  81. genelastic/import_data/data_file.py +0 -87
  82. genelastic/import_data/filename_pattern.py +0 -57
  83. genelastic/import_data/tags.py +0 -123
  84. genelastic/import_data/wet_process.py +0 -28
  85. genelastic/import_data/wet_processes.py +0 -53
  86. genelastic-0.7.0.dist-info/METADATA +0 -105
  87. genelastic-0.7.0.dist-info/RECORD +0 -40
  88. genelastic-0.7.0.dist-info/WHEEL +0 -5
  89. genelastic-0.7.0.dist-info/entry_points.txt +0 -6
  90. genelastic-0.7.0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,346 @@
1
+ # vi: se tw=80
2
+
3
+ # Elasticsearch Python API:
4
+ # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/overview.html
5
+ # https://elasticsearch-py.readthedocs.io/en/latest/api.html
6
+
7
+ import argparse
8
+ import logging
9
+ import sys
10
+ from datetime import UTC, datetime
11
+ from pathlib import Path
12
+
13
+ from genelastic.common.cli import (
14
+ add_es_connection_args,
15
+ add_verbose_control_args,
16
+ add_version_arg,
17
+ log_item,
18
+ log_section,
19
+ log_subsection,
20
+ positive_int,
21
+ )
22
+ from genelastic.common.elastic import ElasticImportConn
23
+ from genelastic.import_data.import_bundle_factory import (
24
+ make_import_bundle_from_files,
25
+ )
26
+ from genelastic.import_data.importers.importer_base import ImporterError
27
+ from genelastic.import_data.importers.importer_factory import ImporterFactory
28
+ from genelastic.import_data.logger import configure_logging
29
+ from genelastic.import_data.models.analysis import Analysis
30
+ from genelastic.import_data.models.data_file import DataFile
31
+ from genelastic.import_data.models.processes import Processes
32
+
33
+ logger = logging.getLogger("genelastic")
34
+ logging.getLogger("elastic_transport").setLevel(
35
+ logging.WARNING
36
+ ) # Disable excessive logging
37
+ logging.getLogger("urllib3").setLevel(
38
+ logging.WARNING
39
+ ) # Disable excessive logging
40
+
41
+
42
+ def read_args() -> argparse.Namespace:
43
+ """Read arguments from command line."""
44
+ parser = argparse.ArgumentParser(
45
+ description="Genetics data importer.",
46
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
47
+ allow_abbrev=False,
48
+ )
49
+ add_version_arg(parser)
50
+ add_verbose_control_args(parser)
51
+ add_es_connection_args(parser)
52
+ parser.add_argument(
53
+ "-D",
54
+ "--dry-run",
55
+ dest="dryrun",
56
+ action="count",
57
+ default=0,
58
+ help=(
59
+ "Dry-run level. -D for data files loading (VCF, coverage, etc) "
60
+ "without connecting or importing to database. "
61
+ "-DD for metadata YAML files loading only (no loading of data files)."
62
+ ),
63
+ )
64
+ parser.add_argument(
65
+ "--log-file", dest="log_file", help="Path to a log file."
66
+ )
67
+ parser.add_argument(
68
+ "--no-list",
69
+ dest="no_list",
70
+ action="store_true",
71
+ help="Do not print list of files to be imported.",
72
+ )
73
+ parser.add_argument(
74
+ "--no-confirm",
75
+ dest="no_confirm",
76
+ action="store_true",
77
+ help="Do not ask confirmation before importing.",
78
+ )
79
+ parser.add_argument(
80
+ "-t",
81
+ "--threads",
82
+ dest="thread_count",
83
+ type=positive_int,
84
+ default=4,
85
+ help="Number of threads to use for parallel data files import.",
86
+ )
87
+ parser.add_argument(
88
+ "--multi-match",
89
+ dest="multi_match",
90
+ action="store_true",
91
+ help=(
92
+ "Enable grouping of files from the same 'data_path' into multiple "
93
+ "analyses by extracting variable metadata fields directly from "
94
+ "filenames using the file prefix. If some metadata fields (e.g., "
95
+ "sample_name, wet_process, bi_process) are not defined in the YAML "
96
+ "bundle, the importer detects all analyses sharing the same "
97
+ "defined metadata, but differing by the undefined fields. This "
98
+ "allows importing and filtering several analyses at once from a "
99
+ "single directory, based on the metadata present in filenames. "
100
+ "When disabled (default), only files matching the fixed filename "
101
+ "pattern (where all metadata fields are defined in the YAML) are "
102
+ "grouped into a single analysis; other files are ignored."
103
+ ),
104
+ )
105
+ parser.add_argument(
106
+ "files",
107
+ type=Path,
108
+ nargs="+",
109
+ default=None,
110
+ help="Data files that describe what to import.",
111
+ )
112
+ return parser.parse_args()
113
+
114
+
115
+ def import_analysis(
116
+ es_import_conn: ElasticImportConn,
117
+ analysis: Analysis,
118
+ ) -> None:
119
+ """Import analysis into a dedicated index."""
120
+ logger.info(
121
+ " -> Importing analysis '%s' metadata into index '%s'...",
122
+ analysis.id,
123
+ es_import_conn.analyses_index,
124
+ )
125
+
126
+ documents = [
127
+ {
128
+ "_index": es_import_conn.analyses_index,
129
+ "_source": {
130
+ "created_at": datetime.now(UTC).isoformat(),
131
+ "analysis_id": analysis.id,
132
+ "bundle_file": str(analysis.bundle_file),
133
+ "data_path": str(analysis.data_path),
134
+ "metadata": analysis.metadata,
135
+ },
136
+ }
137
+ ]
138
+
139
+ es_import_conn.bulk_import(documents)
140
+
141
+
142
+ def import_data_file(
143
+ es_import_conn: ElasticImportConn,
144
+ data_file: DataFile,
145
+ ) -> None:
146
+ """Import data files into a dedicated index."""
147
+ logger.info(
148
+ " -> Importing metadata into index '%s'...",
149
+ es_import_conn.data_files_index,
150
+ )
151
+
152
+ documents = [
153
+ {
154
+ "_index": es_import_conn.data_files_index,
155
+ "_source": {
156
+ "created_at": datetime.now(UTC).isoformat(),
157
+ "analysis_id": data_file.analysis_id,
158
+ "path": str(data_file.path),
159
+ "bundle_file": str(data_file.bundle_file),
160
+ "metadata": data_file.metadata,
161
+ "metrics": data_file.metrics,
162
+ },
163
+ }
164
+ ]
165
+
166
+ es_import_conn.bulk_import(documents)
167
+
168
+
169
+ def import_data_file_content(
170
+ es_import_conn: ElasticImportConn,
171
+ data_file: DataFile,
172
+ thread_count: int,
173
+ dry_run: int,
174
+ ) -> None:
175
+ """Import data file content into a dedicated index,
176
+ based on their extension and type.
177
+ """
178
+ # -DD: no file processing, no import.
179
+ if dry_run > 1:
180
+ logger.info("[Dryrun] Data file neither processed nor imported.")
181
+ return
182
+
183
+ try:
184
+ logger.info(
185
+ " -> Processing file content for import...",
186
+ )
187
+ importer = ImporterFactory.get_importer(
188
+ data_file, es_import_conn, thread_count
189
+ )
190
+
191
+ # -D: only process files, no import.
192
+ if dry_run == 1:
193
+ logger.info("[Dryrun] Data file processed but not imported.")
194
+ return
195
+
196
+ logger.info(
197
+ " -> Importing file content into index '%s'...",
198
+ importer.target_index,
199
+ )
200
+ importer.import_docs()
201
+ except ImporterError as e:
202
+ logger.error(e)
203
+
204
+
205
+ def import_processes(
206
+ es_import_conn: ElasticImportConn,
207
+ index: str,
208
+ processes: Processes,
209
+ ) -> None:
210
+ """Import processes into a dedicated index, based on their type."""
211
+ documents = [
212
+ {
213
+ "_index": index,
214
+ "_source": {
215
+ "proc_id": process.id,
216
+ "type": process.type,
217
+ "metadata": process.data,
218
+ },
219
+ }
220
+ for process in processes.values()
221
+ ]
222
+
223
+ es_import_conn.bulk_import(documents)
224
+
225
+
226
+ def main() -> None:
227
+ """Entry point of the import script."""
228
+ # Read command line arguments
229
+ args = read_args()
230
+
231
+ # Configure logging
232
+ configure_logging(args.verbose, log_file=args.log_file)
233
+ logger.debug("Arguments: %s", args)
234
+ logger.debug("LOGGERS: %s", logging.root.manager.loggerDict)
235
+
236
+ # Open connection to ES
237
+ addr = f"https://{args.es_host}:{args.es_port}"
238
+ logger.info("Connecting to Elasticsearch at %s...", addr)
239
+ es_import_conn = ElasticImportConn(
240
+ addr,
241
+ args.es_cert_fp,
242
+ args.es_index_prefix,
243
+ args.dryrun,
244
+ basic_auth=(args.es_usr, args.es_pwd),
245
+ )
246
+
247
+ log_section("LOAD DATA")
248
+ logger.info("")
249
+ import_bundle = make_import_bundle_from_files(
250
+ args.files, multi_match=args.multi_match, check=True
251
+ )
252
+ all_bundled_files = import_bundle.analyses.get_data_files()
253
+
254
+ if not all_bundled_files:
255
+ logger.warning("No matching data files found from import bundle(s) !")
256
+
257
+ log_section("IMPORT DATA")
258
+ # List files before importing.
259
+ if not args.no_list:
260
+ logger.info("")
261
+ logger.info(
262
+ "The following %s file(s) will be imported:", len(all_bundled_files)
263
+ )
264
+
265
+ for data_file in all_bundled_files:
266
+ logger.info("- '%s'", data_file.path)
267
+ else:
268
+ logger.debug(
269
+ "'--no-list' argument provided: "
270
+ "not listing files about to be imported."
271
+ )
272
+
273
+ # Ask confirmation for importing
274
+ if not args.no_confirm:
275
+ answer: str = "maybe"
276
+ while answer not in ["", "n", "y"]:
277
+ answer = input("Import (y/N)? ").lower()
278
+ if answer != "y":
279
+ logger.info("Import canceled.")
280
+ sys.exit(0)
281
+ else:
282
+ logger.debug(
283
+ "'--no-confirm' argument provided: "
284
+ "not asking for confirmation before importing files."
285
+ )
286
+
287
+ # Start import.
288
+ log_subsection("Importing wet processes...")
289
+ logger.info(
290
+ "-> Importing %s wet process(es) into index '%s': %s.",
291
+ len(import_bundle.wet_processes),
292
+ es_import_conn.wet_processes_index,
293
+ ", ".join(import_bundle.wet_processes.keys()),
294
+ )
295
+ import_processes(
296
+ es_import_conn,
297
+ es_import_conn.wet_processes_index,
298
+ import_bundle.wet_processes,
299
+ )
300
+ log_subsection("Importing bioinformatics processes...")
301
+ logger.info(
302
+ "-> Importing %s bioinformatics process(es) into index '%s': %s.",
303
+ len(import_bundle.bi_processes),
304
+ es_import_conn.bi_processes_index,
305
+ ", ".join(import_bundle.bi_processes.keys()),
306
+ )
307
+ import_processes(
308
+ es_import_conn,
309
+ es_import_conn.bi_processes_index,
310
+ import_bundle.bi_processes,
311
+ )
312
+
313
+ log_subsection("Importing analysis metadata...")
314
+ for i, analysis in enumerate(sorted(import_bundle.analyses)):
315
+ log_item(
316
+ "Analysis",
317
+ i + 1,
318
+ len(import_bundle.analyses),
319
+ )
320
+ import_analysis(es_import_conn, analysis)
321
+
322
+ log_subsection("Importing data files...")
323
+ counter = 1
324
+ for ext in sorted(import_bundle.analyses.extensions):
325
+ data_files = import_bundle.analyses.get_data_files(ext)
326
+ logger.info("[ %s data files ]", ext.upper())
327
+
328
+ for data_file in data_files:
329
+ logger.info(
330
+ " -> Processing data file #%s/%s: '%s'...",
331
+ counter,
332
+ len(import_bundle.analyses.get_data_files()),
333
+ data_file.path.name,
334
+ )
335
+ import_data_file(es_import_conn, data_file)
336
+ import_data_file_content(
337
+ es_import_conn, data_file, args.thread_count, args.dryrun
338
+ )
339
+ logger.info("")
340
+ counter += 1
341
+
342
+ logger.info("=> Done.")
343
+
344
+
345
+ if __name__ == "__main__":
346
+ main()
@@ -0,0 +1,247 @@
1
+ import argparse
2
+ import logging
3
+ from datetime import datetime
4
+
5
+ from genelastic.common.cli import (
6
+ add_es_connection_args,
7
+ add_verbose_control_args,
8
+ add_version_arg,
9
+ )
10
+ from genelastic.common.elastic import ElasticQueryConn
11
+ from genelastic.import_data.logger import configure_logging
12
+
13
+ logger = logging.getLogger("genelastic")
14
+ logging.getLogger("elastic_transport").setLevel(
15
+ logging.WARNING
16
+ ) # Disable excessive logging
17
+
18
+
19
+ def read_args() -> argparse.Namespace:
20
+ """Read arguments from the command line."""
21
+ parser = argparse.ArgumentParser(
22
+ description="ElasticSearch database info.",
23
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
24
+ allow_abbrev=False,
25
+ )
26
+ add_version_arg(parser)
27
+ add_verbose_control_args(parser)
28
+ add_es_connection_args(parser)
29
+ parser.add_argument(
30
+ "-a",
31
+ "--list-analyses",
32
+ action="store_true",
33
+ help="List all imported analyses.",
34
+ )
35
+ parser.add_argument(
36
+ "-w",
37
+ "--list-wet-processes",
38
+ action="store_true",
39
+ help="List all imported wet processes.",
40
+ )
41
+ parser.add_argument(
42
+ "-b",
43
+ "--list-bi-processes",
44
+ action="store_true",
45
+ help="List all imported bio info processes.",
46
+ )
47
+ parser.add_argument(
48
+ "-B",
49
+ "--list-bundles",
50
+ action="store_true",
51
+ help="List YAML bundles and associated analyses.",
52
+ )
53
+
54
+ return parser.parse_args()
55
+
56
+
57
+ def list_processes(es_query_conn: ElasticQueryConn, index: str) -> None:
58
+ """List all processes."""
59
+ process_ids = es_query_conn.get_field_values(index, "proc_id")
60
+
61
+ if len(process_ids) == 0:
62
+ logger.info("Empty response.")
63
+ return
64
+
65
+ for process_id in process_ids:
66
+ logger.info("- %s", process_id)
67
+
68
+
69
+ def list_wet_processes(es_query_conn: ElasticQueryConn) -> None:
70
+ """List all wet processes."""
71
+ logger.info("Imported wet processes")
72
+ logger.info("======================")
73
+ list_processes(es_query_conn, es_query_conn.wet_processes_index)
74
+
75
+
76
+ def list_bi_processes(es_query_conn: ElasticQueryConn) -> None:
77
+ """List all bio info processes."""
78
+ logger.info("Imported bi processes")
79
+ logger.info("=====================")
80
+ list_processes(es_query_conn, es_query_conn.bi_processes_index)
81
+
82
+
83
+ def list_analyses(es_query_conn: ElasticQueryConn) -> None:
84
+ """List all imported analyses and their associated data files."""
85
+ query = {
86
+ "size": 0,
87
+ "aggs": {
88
+ "by_analysis": {
89
+ "composite": {
90
+ "size": 1000,
91
+ "sources": [
92
+ {
93
+ "analysis_id": {
94
+ "terms": {"field": "analysis_id.keyword"}
95
+ }
96
+ }
97
+ ],
98
+ },
99
+ "aggs": {
100
+ "data_files": {
101
+ "top_hits": {
102
+ "size": 100,
103
+ }
104
+ }
105
+ },
106
+ }
107
+ },
108
+ }
109
+
110
+ buckets = es_query_conn.run_composite_aggregation(
111
+ es_query_conn.data_files_index, query
112
+ )
113
+
114
+ if not buckets:
115
+ logger.info("No data files found.")
116
+ return
117
+
118
+ logger.info("Data files per YAML bundle")
119
+ logger.info("=" * 80)
120
+
121
+ for i, bucket in enumerate(buckets):
122
+ analysis_id = bucket["key"]["analysis_id"]
123
+ hits = bucket["data_files"]["hits"]["hits"]
124
+ doc_count = len(hits)
125
+
126
+ logger.info(
127
+ "[%d] Analysis ID: %s (%d file%s)",
128
+ i + 1,
129
+ analysis_id,
130
+ doc_count,
131
+ "s" if doc_count > 1 else "",
132
+ )
133
+ logger.info("-" * 80)
134
+
135
+ for j, hit in enumerate(hits):
136
+ source = hit["_source"]
137
+
138
+ created_at = datetime.fromisoformat(source["created_at"])
139
+ created_at_formatted = created_at.strftime("%Y-%m-%d")
140
+
141
+ logger.info(" File %d of %d:", j + 1, doc_count)
142
+ logger.info(" created_at : %s", created_at_formatted)
143
+ logger.info(" bundle_file : %s", source["bundle_file"])
144
+ logger.info(" path : %s", source["path"])
145
+
146
+
147
+ def list_bundles(es_query_conn: ElasticQueryConn) -> None:
148
+ """List bundle_file → associated analysis_id (clean visual CLI output)."""
149
+ query = {
150
+ "size": 0,
151
+ "aggs": {
152
+ "by_bundle": {
153
+ "composite": {
154
+ "size": 2000,
155
+ "sources": [
156
+ {
157
+ "bundle_file": {
158
+ "terms": {"field": "bundle_file.keyword"}
159
+ }
160
+ }
161
+ ],
162
+ },
163
+ "aggs": {
164
+ "analyses": {
165
+ "terms": {
166
+ "field": "analysis_id.keyword",
167
+ "size": 2000,
168
+ }
169
+ }
170
+ },
171
+ }
172
+ },
173
+ }
174
+
175
+ buckets = es_query_conn.run_composite_aggregation(
176
+ es_query_conn.data_files_index, query
177
+ )
178
+
179
+ if not buckets:
180
+ logger.info("No bundles found.")
181
+ return
182
+
183
+ # Sort bundles by bundle_file path
184
+ buckets = sorted(buckets, key=lambda b: b["key"]["bundle_file"])
185
+
186
+ logger.info("========================================")
187
+ logger.info(" BUNDLES AND ASSOCIATED ANALYSES")
188
+ logger.info("========================================")
189
+ logger.info("")
190
+
191
+ for idx, bucket in enumerate(buckets, start=1):
192
+ bundle = bucket["key"]["bundle_file"]
193
+ analyses = bucket["analyses"]["buckets"]
194
+
195
+ logger.info("#%d %s", idx, bundle)
196
+ if not analyses:
197
+ logger.info(" (no analyses)")
198
+ else:
199
+ for a in analyses:
200
+ logger.info(" • %s", a["key"])
201
+
202
+ logger.info("----------------------------------------")
203
+
204
+
205
+ def main() -> None:
206
+ """Entry point of the info script."""
207
+ args = read_args()
208
+
209
+ configure_logging(args.verbose)
210
+ logger.debug("Arguments: %s", args)
211
+
212
+ addr = f"https://{args.es_host}:{args.es_port}"
213
+ logger.info("Connecting to Elasticsearch at %s...", addr)
214
+ es_query_conn = ElasticQueryConn(
215
+ addr,
216
+ args.es_cert_fp,
217
+ args.es_index_prefix,
218
+ basic_auth=(args.es_usr, args.es_pwd),
219
+ )
220
+
221
+ list_call_count = 0
222
+
223
+ if args.list_bundles:
224
+ list_bundles(es_query_conn)
225
+ list_call_count += 1
226
+
227
+ if args.list_analyses:
228
+ list_analyses(es_query_conn)
229
+ list_call_count += 1
230
+
231
+ if args.list_wet_processes:
232
+ list_wet_processes(es_query_conn)
233
+ list_call_count += 1
234
+
235
+ if args.list_bi_processes:
236
+ list_bi_processes(es_query_conn)
237
+ list_call_count += 1
238
+
239
+ if list_call_count == 0:
240
+ logger.debug("No list option specified, listing everything.")
241
+ list_analyses(es_query_conn)
242
+ list_wet_processes(es_query_conn)
243
+ list_bi_processes(es_query_conn)
244
+
245
+
246
+ if __name__ == "__main__":
247
+ main()
@@ -1,17 +1,20 @@
1
1
  import argparse
2
2
  import logging
3
+ import typing
3
4
 
4
5
  from elasticsearch import NotFoundError
5
6
 
6
- from genelastic.common import (
7
- Bucket,
8
- DBIntegrityError,
9
- ElasticQueryConn,
7
+ from genelastic.common.cli import (
10
8
  add_es_connection_args,
11
9
  add_verbose_control_args,
10
+ add_version_arg,
12
11
  )
12
+ from genelastic.common.elastic import ElasticQueryConn
13
+ from genelastic.common.exceptions import DBIntegrityError
14
+ from genelastic.import_data.logger import configure_logging
13
15
 
14
- from .logger import configure_logging
16
+ if typing.TYPE_CHECKING:
17
+ from genelastic.common.types import Bucket
15
18
 
16
19
  logger = logging.getLogger("genelastic")
17
20
  logging.getLogger("elastic_transport").setLevel(
@@ -27,6 +30,7 @@ def read_args() -> argparse.Namespace:
27
30
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
28
31
  allow_abbrev=False,
29
32
  )
33
+ add_version_arg(parser)
30
34
  add_verbose_control_args(parser)
31
35
  add_es_connection_args(parser)
32
36
  return parser.parse_args()
@@ -42,6 +46,13 @@ def check_for_undefined_file_indices(
42
46
  :raises genelastic.common.DBIntegrityError:
43
47
  Some files indices are used in the analyses index but are undefined.
44
48
  """
49
+ if not es_query_conn.client:
50
+ logger.info(
51
+ "[Dryrun] check_for_undefined_file_indices: "
52
+ "no Elasticsearch client."
53
+ )
54
+ return
55
+
45
56
  logger.info(
46
57
  "Checking for references to undefined file indices in the index '%s'...",
47
58
  analyses_index,
@@ -217,6 +228,13 @@ def check_for_unused_file_indices(
217
228
  :returns: 1 if some file indices exists but are unused in the analyses index,
218
229
  and 0 otherwise.
219
230
  """
231
+ if not es_query_conn.client:
232
+ logger.info(
233
+ "[Dryrun] check_for_unused_file_indices: "
234
+ "no Elasticsearch client."
235
+ )
236
+ return -1
237
+
220
238
  json_indices = es_query_conn.client.cat.indices(
221
239
  index=f"{index_prefix}-file-*", format="json"
222
240
  ).body
@@ -349,9 +367,13 @@ def main() -> None:
349
367
  bi_processes_index = f"{args.es_index_prefix}-bi_processes"
350
368
 
351
369
  addr = f"https://{args.es_host}:{args.es_port}"
352
- logger.info("Trying to connect to Elasticsearch at %s...", addr)
370
+ logger.info("Connecting to Elasticsearch at %s...", addr)
353
371
  es_query_conn = ElasticQueryConn(
354
- addr, args.es_cert_fp, basic_auth=(args.es_usr, args.es_pwd)
372
+ addr,
373
+ args.es_cert_fp,
374
+ args.es_index_prefix,
375
+ args.dryrun,
376
+ basic_auth=(args.es_usr, args.es_pwd),
355
377
  )
356
378
 
357
379
  # Fatal errors