genelastic 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. genelastic/api/.env +4 -0
  2. genelastic/api/cli_start_api.py +18 -0
  3. genelastic/api/errors.py +52 -0
  4. genelastic/api/extends/example.py +0 -6
  5. genelastic/api/extends/example.yml +0 -0
  6. genelastic/api/routes.py +313 -181
  7. genelastic/api/server.py +34 -26
  8. genelastic/api/settings.py +5 -9
  9. genelastic/api/specification.yml +512 -0
  10. genelastic/common/__init__.py +0 -39
  11. genelastic/common/cli.py +100 -0
  12. genelastic/common/elastic.py +374 -46
  13. genelastic/common/exceptions.py +34 -2
  14. genelastic/common/server.py +59 -0
  15. genelastic/common/types.py +1 -14
  16. genelastic/import_data/__init__.py +0 -27
  17. genelastic/import_data/checker.py +99 -0
  18. genelastic/import_data/checker_observer.py +13 -0
  19. genelastic/import_data/cli/__init__.py +0 -0
  20. genelastic/import_data/cli/cli_check.py +136 -0
  21. genelastic/import_data/cli/gen_data.py +143 -0
  22. genelastic/import_data/cli/import_data.py +346 -0
  23. genelastic/import_data/cli/info.py +247 -0
  24. genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
  25. genelastic/import_data/cli/validate.py +146 -0
  26. genelastic/import_data/collect.py +185 -0
  27. genelastic/import_data/constants.py +136 -11
  28. genelastic/import_data/import_bundle.py +102 -59
  29. genelastic/import_data/import_bundle_factory.py +70 -149
  30. genelastic/import_data/importers/__init__.py +0 -0
  31. genelastic/import_data/importers/importer_base.py +131 -0
  32. genelastic/import_data/importers/importer_factory.py +85 -0
  33. genelastic/import_data/importers/importer_types.py +223 -0
  34. genelastic/import_data/logger.py +2 -1
  35. genelastic/import_data/models/__init__.py +0 -0
  36. genelastic/import_data/models/analyses.py +178 -0
  37. genelastic/import_data/models/analysis.py +144 -0
  38. genelastic/import_data/models/data_file.py +110 -0
  39. genelastic/import_data/models/process.py +45 -0
  40. genelastic/import_data/models/processes.py +84 -0
  41. genelastic/import_data/models/tags.py +170 -0
  42. genelastic/import_data/models/unique_list.py +109 -0
  43. genelastic/import_data/models/validate.py +26 -0
  44. genelastic/import_data/patterns.py +90 -0
  45. genelastic/import_data/random_bundle.py +79 -54
  46. genelastic/import_data/resolve.py +157 -0
  47. genelastic/ui/.env +1 -0
  48. genelastic/ui/cli_start_ui.py +20 -0
  49. genelastic/ui/routes.py +333 -0
  50. genelastic/ui/server.py +9 -82
  51. genelastic/ui/settings.py +2 -6
  52. genelastic/ui/static/cea-cnrgh.ico +0 -0
  53. genelastic/ui/static/cea.ico +0 -0
  54. genelastic/ui/static/layout.ico +0 -0
  55. genelastic/ui/static/novaseq6000.png +0 -0
  56. genelastic/ui/static/style.css +430 -0
  57. genelastic/ui/static/ui.js +458 -0
  58. genelastic/ui/templates/analyses.html +98 -0
  59. genelastic/ui/templates/analysis_detail.html +44 -0
  60. genelastic/ui/templates/bi_process_detail.html +129 -0
  61. genelastic/ui/templates/bi_processes.html +116 -0
  62. genelastic/ui/templates/explorer.html +356 -0
  63. genelastic/ui/templates/home.html +207 -0
  64. genelastic/ui/templates/layout.html +153 -0
  65. genelastic/ui/templates/version.html +21 -0
  66. genelastic/ui/templates/wet_process_detail.html +131 -0
  67. genelastic/ui/templates/wet_processes.html +116 -0
  68. genelastic-0.9.0.dist-info/METADATA +686 -0
  69. genelastic-0.9.0.dist-info/RECORD +76 -0
  70. genelastic-0.9.0.dist-info/WHEEL +4 -0
  71. genelastic-0.9.0.dist-info/entry_points.txt +10 -0
  72. genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
  73. genelastic/import_data/analyses.py +0 -69
  74. genelastic/import_data/analysis.py +0 -205
  75. genelastic/import_data/bi_process.py +0 -27
  76. genelastic/import_data/bi_processes.py +0 -49
  77. genelastic/import_data/cli_gen_data.py +0 -116
  78. genelastic/import_data/cli_import.py +0 -379
  79. genelastic/import_data/cli_info.py +0 -256
  80. genelastic/import_data/cli_validate.py +0 -54
  81. genelastic/import_data/data_file.py +0 -87
  82. genelastic/import_data/filename_pattern.py +0 -57
  83. genelastic/import_data/tags.py +0 -123
  84. genelastic/import_data/wet_process.py +0 -28
  85. genelastic/import_data/wet_processes.py +0 -53
  86. genelastic-0.7.0.dist-info/METADATA +0 -105
  87. genelastic-0.7.0.dist-info/RECORD +0 -40
  88. genelastic-0.7.0.dist-info/WHEEL +0 -5
  89. genelastic-0.7.0.dist-info/entry_points.txt +0 -6
  90. genelastic-0.7.0.dist-info/top_level.txt +0 -1
@@ -1,379 +0,0 @@
1
- # vi: se tw=80
2
-
3
- # Elasticsearch Python API:
4
- # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/overview.html
5
- # https://elasticsearch-py.readthedocs.io/en/latest/api.html
6
-
7
- import argparse
8
- import csv
9
- import datetime
10
- import hashlib
11
- import logging
12
- import sys
13
- import time
14
- from pathlib import Path
15
-
16
- import vcf
17
-
18
- from genelastic.common import (
19
- AnalysisDocument,
20
- BulkItems,
21
- ElasticImportConn,
22
- MetadataDocument,
23
- ProcessDocument,
24
- add_es_connection_args,
25
- add_verbose_control_args,
26
- )
27
-
28
- from .bi_processes import BioInfoProcesses
29
- from .data_file import DataFile
30
- from .import_bundle_factory import make_import_bundle_from_files
31
- from .logger import configure_logging
32
- from .wet_processes import WetProcesses
33
-
34
- logger = logging.getLogger("genelastic")
35
- logging.getLogger("elastic_transport").setLevel(
36
- logging.WARNING
37
- ) # Disable excessive logging
38
- logging.getLogger("urllib3").setLevel(
39
- logging.WARNING
40
- ) # Disable excessive logging
41
-
42
-
43
- def read_args() -> argparse.Namespace:
44
- """Read arguments from command line."""
45
- parser = argparse.ArgumentParser(
46
- description="Genetics data importer.",
47
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
48
- allow_abbrev=False,
49
- )
50
- add_verbose_control_args(parser)
51
- add_es_connection_args(parser)
52
- parser.add_argument(
53
- "-D",
54
- "--dry-run",
55
- dest="dryrun",
56
- action="count",
57
- default=0,
58
- help=(
59
- "Dry-run level. -D for data files loading (VCF, coverage, etc) "
60
- "without connecting or importing to database. "
61
- "-DD for metadata YAML files loading only (no loading of data files)."
62
- ),
63
- )
64
- parser.add_argument(
65
- "--log-file", dest="log_file", help="Path to a log file."
66
- )
67
- parser.add_argument(
68
- "--no-list",
69
- dest="no_list",
70
- action="store_true",
71
- help="Do not print list of files to be imported.",
72
- )
73
- parser.add_argument(
74
- "--no-confirm",
75
- dest="no_confirm",
76
- action="store_true",
77
- help="Do not ask confirmation before importing.",
78
- )
79
- parser.add_argument(
80
- "files",
81
- type=Path,
82
- nargs="+",
83
- default=None,
84
- help="Data files that describe what to import.",
85
- )
86
- return parser.parse_args()
87
-
88
-
89
- def import_cov_file(
90
- es_import_conn: ElasticImportConn | None,
91
- file_index: str,
92
- file: Path,
93
- dryrun: int = 0,
94
- ) -> None:
95
- """Import a coverage file to the Elasticsearch database."""
96
- # Set field types
97
- if dryrun == 0 and es_import_conn:
98
- es_import_conn.client.indices.put_mapping(
99
- index=file_index,
100
- body={
101
- "properties": {
102
- "pos": {"type": "integer"},
103
- "depth": {"type": "byte"},
104
- }
105
- },
106
- )
107
-
108
- # Open file
109
- if dryrun > 1:
110
- logger.info(
111
- "Would load and import Coverage file %s " "into index %s.",
112
- file,
113
- file_index,
114
- )
115
- else:
116
- logger.info("Load Coverage file %s.", file)
117
- if dryrun == 1:
118
- logger.info(
119
- "Would import Coverage file %s into index %s.", file, file_index
120
- )
121
- else:
122
- logger.info(
123
- "Import Coverage file %s into index %s.", file, file_index
124
- )
125
- with file.open(newline="", encoding="utf-8") as f:
126
- # Read file as CSV
127
- reader = csv.reader(f, delimiter="\t", quotechar='"')
128
-
129
- # Loop on al lines
130
- for row in reader:
131
- # Build document
132
- # Position starts at 0 inside coverage file
133
- doc: MetadataDocument = {
134
- "type": "coverage",
135
- "chr": row[0],
136
- "pos": int(row[1]) + 1,
137
- "depth": int(row[2]),
138
- }
139
-
140
- # Insert document
141
- if dryrun == 0 and es_import_conn:
142
- es_import_conn.client.index(index=file_index, document=doc)
143
-
144
-
145
- def import_analysis_metadata( # noqa: PLR0913
146
- es_import_conn: ElasticImportConn | None,
147
- index_prefix: str,
148
- file_index: str,
149
- file: DataFile,
150
- analysis_type: str,
151
- dryrun: int = 0,
152
- ) -> None:
153
- """Import analysis metadata into a dedicated index."""
154
- doc: AnalysisDocument = {
155
- "path": str(file.path.resolve()),
156
- "bundle_path": str(file.bundle_path.resolve())
157
- if file.bundle_path
158
- else None,
159
- "metadata": file.metadata,
160
- "file_index": file_index,
161
- "type": analysis_type,
162
- }
163
-
164
- bulk_items: BulkItems = [
165
- {"_index": f"{index_prefix}-analyses", "_source": doc}
166
- ]
167
-
168
- if dryrun == 0 and es_import_conn:
169
- es_import_conn.import_items(
170
- bulk_items,
171
- start_time=time.perf_counter(),
172
- total_items=len(bulk_items),
173
- )
174
-
175
-
176
- def import_vcf_file(
177
- es_import_conn: ElasticImportConn | None,
178
- file_index: str,
179
- file: DataFile,
180
- dryrun: int = 0,
181
- ) -> None:
182
- """Import a VCF file to the Elasticsearch database."""
183
- logger.info('Import VCF file "%s".', file)
184
-
185
- if dryrun > 1:
186
- logger.info(
187
- "Would load and import VCF file %s " "into index %s.",
188
- file.path,
189
- file_index,
190
- )
191
- else:
192
- logger.info("Load VCF file %s.", file.path)
193
- if dryrun == 1:
194
- logger.info(
195
- "Would import VCF file %s into index %s.", file.path, file_index
196
- )
197
- else:
198
- logger.info(
199
- "Importing VCF file %s into index %s...", file.path, file_index
200
- )
201
-
202
- try:
203
- vcf_reader = vcf.Reader(filename=str(file.path))
204
- n = 0
205
- start = time.perf_counter()
206
- bulk_sz = 256 # Bulk size
207
- bulk_items: BulkItems = []
208
- for record in vcf_reader:
209
- # Correct values
210
- if not record.CHROM.startswith("chr"):
211
- if record.CHROM.lower().startswith("chr"):
212
- record.CHROM = "chr" + record.CHROM[3:]
213
- else:
214
- record.CHROM = "chr" + record.CHROM
215
-
216
- # Build document
217
- alt = [x if x is None else x.type for x in record.ALT]
218
- doc: MetadataDocument = {
219
- "type": "vcf",
220
- "chr": record.CHROM,
221
- "pos": record.POS,
222
- "alt": alt,
223
- "info": record.INFO,
224
- }
225
-
226
- if dryrun == 0:
227
- # Append item to bulk
228
- bulk_items.append({"_index": file_index, "_source": doc})
229
- n += 1
230
-
231
- # Insert bulk of items
232
- if len(bulk_items) >= bulk_sz and es_import_conn:
233
- es_import_conn.import_items(
234
- bulk_items, start_time=start, total_items=n
235
- )
236
- bulk_items = []
237
-
238
- # Insert remaining items
239
- if dryrun == 0 and es_import_conn:
240
- es_import_conn.import_items(
241
- bulk_items, start_time=start, total_items=n
242
- )
243
-
244
- except StopIteration:
245
- logger.error("Skipping empty file : %s.", file.path)
246
-
247
-
248
- def import_processes(
249
- es_import_conn: ElasticImportConn | None,
250
- index: str,
251
- processes: WetProcesses | BioInfoProcesses,
252
- dryrun: int = 0,
253
- ) -> None:
254
- """Import processes into their own index."""
255
- bulk_items: BulkItems = []
256
-
257
- for proc_id in processes.get_process_ids():
258
- process = processes[proc_id]
259
- process_type = process.__class__.__name__
260
- doc: ProcessDocument = process.data | {
261
- "proc_id": proc_id,
262
- "type": process_type,
263
- }
264
- bulk_items.append({"_index": index, "_source": doc})
265
-
266
- if dryrun == 0 and es_import_conn:
267
- es_import_conn.import_items(
268
- bulk_items,
269
- start_time=time.perf_counter(),
270
- total_items=len(bulk_items),
271
- )
272
-
273
-
274
- def generate_unique_index(index_prefix: str, filepath: Path) -> str:
275
- """Generate a unique index with the following format:
276
- <index_prefix>_<current_date>_<md5_hashed_filepath>
277
- """
278
- current_date = datetime.datetime.now(tz=datetime.UTC).strftime("%Y%m%d")
279
- hashed_filepath = hashlib.md5(
280
- str(filepath).encode("utf-8"), usedforsecurity=False
281
- ).hexdigest()
282
- return f"{index_prefix}-file-{current_date}-{hashed_filepath}"
283
-
284
-
285
- def main() -> None: # noqa: C901
286
- """Entry point of the import script."""
287
- # Read command line arguments
288
- args = read_args()
289
-
290
- # Configure logging
291
- configure_logging(args.verbose, log_file=args.log_file)
292
- logger.debug("Arguments: %s", args)
293
- logger.debug("LOGGERS: %s", logging.root.manager.loggerDict)
294
-
295
- # Open connection to ES
296
- if args.dryrun == 0:
297
- addr = f"https://{args.es_host}:{args.es_port}"
298
- logger.info("Trying to connect to Elasticsearch at %s...", addr)
299
- es_import_conn = ElasticImportConn(
300
- addr, args.es_cert_fp, basic_auth=(args.es_usr, args.es_pwd)
301
- )
302
- else:
303
- es_import_conn = None
304
-
305
- # Load YAML import bundle
306
- import_bundle = make_import_bundle_from_files(args.files, check=True)
307
- all_bundled_files = import_bundle.get_files()
308
-
309
- # CHECK
310
- for f in all_bundled_files:
311
- if not f.exists():
312
- msg = f"Path {f.path} does not point to a valid file."
313
- raise RuntimeError(msg)
314
-
315
- # LIST
316
- if not args.no_list:
317
- for f in all_bundled_files:
318
- logger.info("Will import %s.", f.path)
319
-
320
- # Ask confirmation for importing
321
- if not args.no_confirm:
322
- answer: str = "maybe"
323
- while answer not in ["", "n", "y"]:
324
- answer = input("Import (y/N)? ").lower()
325
- if answer != "y":
326
- logger.info("Import canceled.")
327
- sys.exit(0)
328
-
329
- # IMPORT
330
- # Loop on file categories
331
- for cat in import_bundle.analyses.get_all_categories():
332
- # Import all files in this category.
333
- for f in import_bundle.get_files(cat):
334
- logger.info("Import %s files from %s.", cat, f.path)
335
- # First, generate a unique index name for each file.
336
- file_index = generate_unique_index(args.es_index_prefix, f.path)
337
- # Then, import the analysis metadata into a dedicated index.
338
- import_analysis_metadata(
339
- es_import_conn,
340
- args.es_index_prefix,
341
- file_index,
342
- f,
343
- cat,
344
- args.dryrun,
345
- )
346
- # Finally, import the file in its own index.
347
- globals()[f"import_{cat}_file"](
348
- es_import_conn=es_import_conn,
349
- file_index=file_index,
350
- file=f,
351
- dryrun=args.dryrun,
352
- )
353
-
354
- # Import processes
355
- logger.info("Importing wet processes.")
356
- logger.info(
357
- "Wet processes IDs = %s",
358
- str(import_bundle.wet_processes.get_process_ids()),
359
- )
360
- import_processes(
361
- es_import_conn,
362
- f"{args.es_index_prefix}-wet_processes",
363
- import_bundle.wet_processes,
364
- )
365
-
366
- logger.info("Importing bio info processes.")
367
- logger.info(
368
- "Bio info processes IDs = %s",
369
- str(import_bundle.bi_processes.get_process_ids()),
370
- )
371
- import_processes(
372
- es_import_conn,
373
- f"{args.es_index_prefix}-bi_processes",
374
- import_bundle.bi_processes,
375
- )
376
-
377
-
378
- if __name__ == "__main__":
379
- main()
@@ -1,256 +0,0 @@
1
- import argparse
2
- import logging
3
-
4
- from genelastic.common import (
5
- Bucket,
6
- ElasticQueryConn,
7
- add_es_connection_args,
8
- add_verbose_control_args,
9
- )
10
-
11
- from .logger import configure_logging
12
-
13
- logger = logging.getLogger("genelastic")
14
- logging.getLogger("elastic_transport").setLevel(
15
- logging.WARNING
16
- ) # Disable excessive logging
17
-
18
-
19
- def read_args() -> argparse.Namespace:
20
- """Read arguments from command line."""
21
- parser = argparse.ArgumentParser(
22
- description="ElasticSearch database info.",
23
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
24
- allow_abbrev=False,
25
- )
26
- add_verbose_control_args(parser)
27
- add_es_connection_args(parser)
28
- parser.add_argument(
29
- "-y",
30
- "--list-bundles",
31
- action="store_true",
32
- help="List all imported YAML bundles.",
33
- )
34
- parser.add_argument(
35
- "-f",
36
- "--list-data-files",
37
- action="store_true",
38
- help="List all imported data files.",
39
- )
40
- parser.add_argument(
41
- "-w",
42
- "--list-wet-processes",
43
- action="store_true",
44
- help="List all imported wet processes.",
45
- )
46
- parser.add_argument(
47
- "-b",
48
- "--list-bi-processes",
49
- action="store_true",
50
- help="List all imported bio info processes.",
51
- )
52
- parser.add_argument(
53
- "-Y",
54
- "--list-data-files-per-bundle",
55
- action="store_true",
56
- help="For each imported YAML bundle, "
57
- "display some info and list its data files.",
58
- )
59
- return parser.parse_args()
60
-
61
-
62
- def list_bundles(es_query_conn: ElasticQueryConn, index: str) -> None:
63
- """List all imported YAML bundles."""
64
- query = {
65
- "size": 0,
66
- "aggs": {
67
- "get_bundle_paths": {
68
- "composite": {
69
- "sources": {
70
- "bundle_path": {
71
- "terms": {"field": "bundle_path.keyword"}
72
- }
73
- },
74
- "size": 1000,
75
- }
76
- }
77
- },
78
- }
79
-
80
- buckets: list[Bucket] = es_query_conn.run_composite_aggregation(
81
- index, query
82
- )
83
-
84
- logger.info("Imported YAML files")
85
- logger.info("===================")
86
-
87
- if len(buckets) == 0:
88
- logger.info("Empty response.")
89
- return
90
-
91
- for bucket in buckets:
92
- bundle_path = bucket["key"]["bundle_path"]
93
- logger.info("- %s", bundle_path)
94
-
95
-
96
- def list_data_files(es_query_conn: ElasticQueryConn, index: str) -> None:
97
- """List all imported data files."""
98
- query = {
99
- "size": 0,
100
- "aggs": {
101
- "get_paths": {
102
- "composite": {
103
- "sources": {"path": {"terms": {"field": "path.keyword"}}},
104
- "size": 1000,
105
- }
106
- }
107
- },
108
- }
109
-
110
- buckets: list[Bucket] = es_query_conn.run_composite_aggregation(
111
- index, query
112
- )
113
-
114
- logger.info("Imported data files")
115
- logger.info("===================")
116
-
117
- if len(buckets) == 0:
118
- logger.info("Empty response.")
119
- return
120
-
121
- for bucket in buckets:
122
- bundle_path = bucket["key"]["path"]
123
- logger.info("- %s", bundle_path)
124
-
125
-
126
- def list_processes(es_query_conn: ElasticQueryConn, index: str) -> None:
127
- """List all processes."""
128
- process_ids = es_query_conn.get_field_values(index, "proc_id")
129
-
130
- if len(process_ids) == 0:
131
- logger.info("Empty response.")
132
- return
133
-
134
- for process_id in process_ids:
135
- logger.info("- %s", process_id)
136
-
137
-
138
- def list_wet_processes(es_query_conn: ElasticQueryConn, index: str) -> None:
139
- """List all wet processes."""
140
- logger.info("Imported wet processes")
141
- logger.info("======================")
142
- list_processes(es_query_conn, index)
143
-
144
-
145
- def list_bi_processes(es_query_conn: ElasticQueryConn, index: str) -> None:
146
- """List all bio info processes."""
147
- logger.info("Imported bi processes")
148
- logger.info("=====================")
149
- list_processes(es_query_conn, index)
150
-
151
-
152
- def list_data_files_per_bundle(
153
- es_query_conn: ElasticQueryConn, index: str
154
- ) -> None:
155
- """For each imported YAML bundle, display some info and list its data files."""
156
- query = {
157
- "size": 0,
158
- "aggs": {
159
- "data_files": {
160
- "composite": {
161
- "sources": [
162
- {
163
- "bundle_path": {
164
- "terms": {"field": "bundle_path.keyword"}
165
- }
166
- }
167
- ],
168
- "size": 100,
169
- },
170
- "aggs": {"docs": {"top_hits": {"size": 100}}},
171
- }
172
- },
173
- }
174
-
175
- buckets: list[Bucket] = es_query_conn.run_composite_aggregation(
176
- index, query
177
- )
178
-
179
- logger.info("Data files per YAML bundle")
180
- logger.info("==========================")
181
-
182
- if len(buckets) == 0:
183
- logger.info("Empty response.")
184
- return
185
-
186
- for bucket in buckets:
187
- documents = bucket["docs"]["hits"]["hits"]
188
- if len(documents) == 0:
189
- continue
190
-
191
- logger.info("- Bundle Path: %s", bucket["key"]["bundle_path"])
192
- logger.info(
193
- " -> Wet process: %s",
194
- documents[0]["_source"]["metadata"]["wet_process"],
195
- )
196
- logger.info(
197
- " -> Bio info process: %s",
198
- documents[0]["_source"]["metadata"]["bi_process"],
199
- )
200
- logger.info(" -> Data files:")
201
-
202
- for doc in documents:
203
- logger.info(" - Index: %s", doc["_source"]["file_index"])
204
- logger.info(" Path: %s", doc["_source"]["path"])
205
-
206
-
207
- def main() -> None:
208
- """Entry point of the info script."""
209
- args = read_args()
210
-
211
- configure_logging(args.verbose)
212
- logger.debug("Arguments: %s", args)
213
-
214
- addr = f"https://{args.es_host}:{args.es_port}"
215
- logger.info("Trying to connect to Elasticsearch at %s...", addr)
216
- es_query_conn = ElasticQueryConn(
217
- addr, args.es_cert_fp, basic_auth=(args.es_usr, args.es_pwd)
218
- )
219
-
220
- analysis_index = f"{args.es_index_prefix}-analyses"
221
- wet_processes_index = f"{args.es_index_prefix}-wet_processes"
222
- bi_processes_index = f"{args.es_index_prefix}-bi_processes"
223
-
224
- list_call_count = 0
225
-
226
- if args.list_bundles:
227
- list_bundles(es_query_conn, analysis_index)
228
- list_call_count += 1
229
-
230
- if args.list_data_files:
231
- list_data_files(es_query_conn, analysis_index)
232
- list_call_count += 1
233
-
234
- if args.list_wet_processes:
235
- list_wet_processes(es_query_conn, wet_processes_index)
236
- list_call_count += 1
237
-
238
- if args.list_bi_processes:
239
- list_bi_processes(es_query_conn, bi_processes_index)
240
- list_call_count += 1
241
-
242
- if args.list_data_files_per_bundle:
243
- list_data_files_per_bundle(es_query_conn, analysis_index)
244
- list_call_count += 1
245
-
246
- if list_call_count == 0:
247
- logger.debug("No list option specified, listing everything.")
248
- list_bundles(es_query_conn, analysis_index)
249
- list_data_files(es_query_conn, analysis_index)
250
- list_wet_processes(es_query_conn, wet_processes_index)
251
- list_bi_processes(es_query_conn, bi_processes_index)
252
- list_data_files_per_bundle(es_query_conn, analysis_index)
253
-
254
-
255
- if __name__ == "__main__":
256
- main()
@@ -1,54 +0,0 @@
1
- import argparse
2
- import logging
3
- from pathlib import Path
4
-
5
- from schema import SchemaError
6
-
7
- from genelastic.common import add_verbose_control_args
8
-
9
- from .import_bundle_factory import make_import_bundle_from_files
10
- from .logger import configure_logging
11
-
12
- logger = logging.getLogger("genelastic")
13
-
14
-
15
- def read_args() -> argparse.Namespace:
16
- """Read arguments from command line."""
17
- parser = argparse.ArgumentParser(
18
- description="Ensure that YAML files "
19
- "follow the genelastic YAML bundle schema.",
20
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
21
- allow_abbrev=False,
22
- )
23
- add_verbose_control_args(parser)
24
- parser.add_argument(
25
- "files",
26
- type=Path,
27
- nargs="+",
28
- default=None,
29
- help="YAML files to validate.",
30
- )
31
- parser.add_argument(
32
- "-c",
33
- "--check",
34
- action="store_true",
35
- help="In addition to validating the schema, "
36
- "check for undefined referenced processes.",
37
- )
38
- return parser.parse_args()
39
-
40
-
41
- def main() -> int:
42
- """Entry point of the validate script."""
43
- args = read_args()
44
- configure_logging(args.verbose)
45
-
46
- try:
47
- make_import_bundle_from_files(args.files, check=args.check)
48
- except (ValueError, RuntimeError, TypeError, SchemaError) as e:
49
- # Catch any exception that can be raised by 'make_import_bundle_from_files'.
50
- logger.error(e)
51
- return 1
52
-
53
- logger.info("All YAML files respect the genelastic YAML bundle format.")
54
- return 0