gammasimtools 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {gammasimtools-0.23.0.dist-info → gammasimtools-0.25.0.dist-info}/METADATA +1 -1
  2. {gammasimtools-0.23.0.dist-info → gammasimtools-0.25.0.dist-info}/RECORD +89 -85
  3. {gammasimtools-0.23.0.dist-info → gammasimtools-0.25.0.dist-info}/entry_points.txt +1 -0
  4. simtools/_version.py +2 -2
  5. simtools/application_control.py +54 -4
  6. simtools/applications/convert_geo_coordinates_of_array_elements.py +1 -1
  7. simtools/applications/db_add_file_to_db.py +2 -2
  8. simtools/applications/db_add_simulation_model_from_repository_to_db.py +1 -1
  9. simtools/applications/db_add_value_from_json_to_db.py +2 -2
  10. simtools/applications/db_development_tools/write_array_elements_positions_to_repository.py +1 -1
  11. simtools/applications/db_generate_compound_indexes.py +1 -1
  12. simtools/applications/db_get_array_layouts_from_db.py +2 -2
  13. simtools/applications/db_get_file_from_db.py +1 -1
  14. simtools/applications/db_get_parameter_from_db.py +1 -1
  15. simtools/applications/db_inspect_databases.py +4 -2
  16. simtools/applications/db_upload_model_repository.py +1 -1
  17. simtools/applications/derive_ctao_array_layouts.py +1 -1
  18. simtools/applications/derive_psf_parameters.py +5 -0
  19. simtools/applications/derive_pulse_shape_parameters.py +195 -0
  20. simtools/applications/generate_array_config.py +1 -1
  21. simtools/applications/maintain_simulation_model_add_production.py +11 -21
  22. simtools/applications/plot_array_layout.py +63 -1
  23. simtools/applications/production_generate_grid.py +1 -1
  24. simtools/applications/simulate_flasher.py +3 -2
  25. simtools/applications/simulate_pedestals.py +1 -1
  26. simtools/applications/simulate_prod.py +8 -23
  27. simtools/applications/simulate_prod_htcondor_generator.py +7 -0
  28. simtools/applications/submit_array_layouts.py +7 -5
  29. simtools/applications/validate_camera_fov.py +1 -1
  30. simtools/applications/validate_cumulative_psf.py +2 -2
  31. simtools/applications/validate_file_using_schema.py +49 -123
  32. simtools/applications/validate_optics.py +1 -1
  33. simtools/configuration/commandline_parser.py +15 -15
  34. simtools/configuration/configurator.py +1 -1
  35. simtools/corsika/corsika_config.py +199 -91
  36. simtools/data_model/model_data_writer.py +15 -3
  37. simtools/data_model/schema.py +145 -36
  38. simtools/data_model/validate_data.py +82 -48
  39. simtools/db/db_handler.py +61 -294
  40. simtools/db/db_model_upload.py +3 -2
  41. simtools/db/mongo_db.py +626 -0
  42. simtools/dependencies.py +38 -17
  43. simtools/io/eventio_handler.py +128 -0
  44. simtools/job_execution/htcondor_script_generator.py +0 -2
  45. simtools/layout/array_layout.py +7 -7
  46. simtools/layout/array_layout_utils.py +4 -4
  47. simtools/model/array_model.py +72 -72
  48. simtools/model/calibration_model.py +12 -9
  49. simtools/model/model_parameter.py +196 -160
  50. simtools/model/model_repository.py +176 -39
  51. simtools/model/model_utils.py +3 -3
  52. simtools/model/site_model.py +59 -27
  53. simtools/model/telescope_model.py +21 -13
  54. simtools/ray_tracing/mirror_panel_psf.py +4 -4
  55. simtools/ray_tracing/psf_analysis.py +11 -8
  56. simtools/ray_tracing/psf_parameter_optimisation.py +823 -680
  57. simtools/reporting/docs_auto_report_generator.py +1 -1
  58. simtools/reporting/docs_read_parameters.py +72 -11
  59. simtools/runners/corsika_runner.py +12 -3
  60. simtools/runners/corsika_simtel_runner.py +6 -0
  61. simtools/runners/runner_services.py +17 -7
  62. simtools/runners/simtel_runner.py +12 -54
  63. simtools/schemas/model_parameters/flasher_pulse_exp_decay.schema.yml +2 -0
  64. simtools/schemas/model_parameters/flasher_pulse_shape.schema.yml +50 -0
  65. simtools/schemas/model_parameters/flasher_pulse_width.schema.yml +2 -0
  66. simtools/schemas/simulation_models_info.schema.yml +4 -1
  67. simtools/simtel/pulse_shapes.py +268 -0
  68. simtools/simtel/simtel_config_writer.py +179 -21
  69. simtools/simtel/simtel_io_event_writer.py +2 -2
  70. simtools/simtel/simulator_array.py +58 -12
  71. simtools/simtel/simulator_light_emission.py +45 -8
  72. simtools/simulator.py +361 -346
  73. simtools/testing/assertions.py +110 -10
  74. simtools/testing/configuration.py +1 -1
  75. simtools/testing/log_inspector.py +4 -1
  76. simtools/testing/sim_telarray_metadata.py +1 -1
  77. simtools/testing/validate_output.py +46 -15
  78. simtools/utils/names.py +2 -4
  79. simtools/utils/value_conversion.py +10 -5
  80. simtools/version.py +61 -0
  81. simtools/visualization/legend_handlers.py +14 -4
  82. simtools/visualization/plot_array_layout.py +229 -33
  83. simtools/visualization/plot_mirrors.py +837 -0
  84. simtools/visualization/plot_pixels.py +1 -1
  85. simtools/visualization/plot_psf.py +1 -1
  86. simtools/visualization/plot_tables.py +1 -1
  87. simtools/simtel/simtel_io_file_info.py +0 -62
  88. {gammasimtools-0.23.0.dist-info → gammasimtools-0.25.0.dist-info}/WHEEL +0 -0
  89. {gammasimtools-0.23.0.dist-info → gammasimtools-0.25.0.dist-info}/licenses/LICENSE +0 -0
  90. {gammasimtools-0.23.0.dist-info → gammasimtools-0.25.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,626 @@
1
+ """MongoDB database handler for direct database operations."""
2
+
3
+ import io
4
+ import logging
5
+ import re
6
+ from pathlib import Path
7
+ from threading import Lock
8
+
9
+ import gridfs
10
+ import jsonschema
11
+ from astropy.table import Table
12
+ from bson.objectid import ObjectId
13
+ from pymongo import MongoClient, monitoring
14
+
15
+ from simtools.io import ascii_handler
16
+
17
+ logging.getLogger("pymongo").setLevel(logging.WARNING)
18
+
19
+
20
+ jsonschema_db_dict = {
21
+ "$schema": "https://json-schema.org/draft/2020-12/schema#",
22
+ "type": "object",
23
+ "description": "MongoDB configuration",
24
+ "properties": {
25
+ "db_server": {"type": "string", "description": "DB server address"},
26
+ "db_api_port": {
27
+ "type": "integer",
28
+ "minimum": 1,
29
+ "maximum": 65535,
30
+ "default": 27017,
31
+ "description": "Port to use",
32
+ },
33
+ "db_api_user": {"type": "string", "description": "API username"},
34
+ "db_api_pw": {"type": "string", "description": "Password for the API user"},
35
+ "db_api_authentication_database": {
36
+ "type": ["string", "null"],
37
+ "default": "admin",
38
+ "description": "DB with user info (optional)",
39
+ },
40
+ "db_simulation_model": {
41
+ "type": "string",
42
+ "description": "Name of simulation model database",
43
+ },
44
+ "db_simulation_model_version": {
45
+ "type": "string",
46
+ "description": "Version of simulation model database",
47
+ },
48
+ },
49
+ "required": [
50
+ "db_server",
51
+ "db_api_port",
52
+ "db_api_user",
53
+ "db_api_pw",
54
+ "db_simulation_model",
55
+ "db_simulation_model_version",
56
+ ],
57
+ }
58
+
59
+
60
+ class IdleConnectionMonitor(monitoring.ConnectionPoolListener):
61
+ """
62
+ A listener to track MongoDB connection pool activity.
63
+
64
+ Used to monitor idle connections and log connection events.
65
+ Switched on in debug mode.
66
+ """
67
+
68
+ def __init__(self):
69
+ self._logger = logging.getLogger("IdleConnectionMonitor")
70
+ self.open_connections = 0
71
+
72
+ def connection_created(self, event):
73
+ """Handle connection creation event."""
74
+ self.open_connections += 1
75
+ self._logger.debug(
76
+ f"MongoDB connection Created: {event.address}. Total in Pool: {self.open_connections}"
77
+ )
78
+
79
+ def connection_closed(self, event):
80
+ """Handle connection closure event."""
81
+ self.open_connections -= 1
82
+ self._logger.debug(
83
+ f"MongoDB connection Closed: {event.address}. Reason: {event.reason}. "
84
+ f"Total in Pool: {self.open_connections}"
85
+ )
86
+
87
+ def connection_check_out_started(self, event):
88
+ """Handle connection check out started event."""
89
+
90
+ def connection_check_out_failed(self, event):
91
+ """Handle connection check out failure event."""
92
+
93
+ def connection_checked_out(self, event):
94
+ """Handle connection checked out event."""
95
+
96
+ def connection_checked_in(self, event):
97
+ """Handle connection checked in event."""
98
+
99
+ def connection_ready(self, event):
100
+ """Handle connection ready event."""
101
+
102
+ def pool_created(self, event):
103
+ """Handle connection pool creation event."""
104
+
105
+ def pool_ready(self, event):
106
+ """Handle connection pool ready event."""
107
+
108
+ def pool_cleared(self, event):
109
+ """Handle connection pool cleared event."""
110
+
111
+ def pool_closed(self, event):
112
+ """Handle connection pool closure event."""
113
+
114
+
115
+ class MongoDBHandler: # pylint: disable=unsubscriptable-object
116
+ """
117
+ MongoDBHandler provides low-level interface to MongoDB operations.
118
+
119
+ This class handles direct MongoDB operations including connection management,
120
+ database queries, file operations via GridFS, and index generation.
121
+
122
+ Parameters
123
+ ----------
124
+ db_config: dict
125
+ Dictionary with the MongoDB configuration (see jsonschema_db_dict for details).
126
+ """
127
+
128
+ db_client: MongoClient = None
129
+ _lock = Lock()
130
+ _logger = logging.getLogger(__name__)
131
+
132
+ def __init__(self, db_config=None):
133
+ """Initialize the MongoDBHandler class."""
134
+ self.db_config = MongoDBHandler.validate_db_config(db_config)
135
+ self.list_of_collections = {}
136
+
137
+ if self.db_config:
138
+ self._initialize_client(self.db_config)
139
+
140
+ @classmethod
141
+ def _initialize_client(cls, db_config):
142
+ """
143
+ Initialize the MongoDB client in a thread-safe manner.
144
+
145
+ Only initializes if it hasn't been done yet. Uses double-checked locking
146
+ to ensure thread safety.
147
+
148
+ Parameters
149
+ ----------
150
+ db_config: dict
151
+ Dictionary with the MongoDB configuration.
152
+ """
153
+ if cls.db_client is not None:
154
+ return
155
+ with cls._lock:
156
+ if cls.db_client is None:
157
+ try:
158
+ uri = cls._build_uri(db_config)
159
+ client_kwargs = {"maxIdleTimeMS": 10000}
160
+
161
+ if cls._logger.isEnabledFor(logging.DEBUG):
162
+ client_kwargs["event_listeners"] = [IdleConnectionMonitor()]
163
+
164
+ cls.db_client = MongoClient(uri, **client_kwargs)
165
+ cls._logger.debug("MongoDB client initialized successfully.")
166
+ except Exception as e:
167
+ cls._logger.error(f"Failed to initialize MongoDB client: {e}")
168
+ raise
169
+
170
+ @staticmethod
171
+ def _build_uri(db_config):
172
+ """
173
+ Build MongoDB URI from configuration.
174
+
175
+ Parameters
176
+ ----------
177
+ db_config: dict
178
+ Dictionary with the MongoDB configuration.
179
+
180
+ Returns
181
+ -------
182
+ str
183
+ MongoDB connection URI.
184
+ """
185
+ direct_connection = db_config["db_server"] in (
186
+ "localhost",
187
+ "simtools-mongodb",
188
+ "mongodb",
189
+ )
190
+ auth_source = (
191
+ db_config.get("db_api_authentication_database")
192
+ if db_config.get("db_api_authentication_database")
193
+ else "admin"
194
+ )
195
+
196
+ username = db_config["db_api_user"]
197
+ password = db_config["db_api_pw"]
198
+ server = db_config["db_server"]
199
+ port = db_config["db_api_port"]
200
+
201
+ uri_base = f"mongodb://{username}:{password}@{server}:{port}/"
202
+ params = [f"authSource={auth_source}"]
203
+
204
+ if direct_connection:
205
+ params.append("directConnection=true")
206
+ else:
207
+ params.append("ssl=true")
208
+ params.append("tlsAllowInvalidHostnames=true")
209
+ params.append("tlsAllowInvalidCertificates=true")
210
+
211
+ return f"{uri_base}?{'&'.join(params)}"
212
+
213
+ @staticmethod
214
+ def validate_db_config(db_config):
215
+ """
216
+ Validate the MongoDB configuration.
217
+
218
+ Parameters
219
+ ----------
220
+ db_config: dict
221
+ Dictionary with the MongoDB configuration.
222
+
223
+ Returns
224
+ -------
225
+ dict or None
226
+ Validated MongoDB configuration or None if no valid config provided.
227
+
228
+ Raises
229
+ ------
230
+ ValueError
231
+ If the MongoDB configuration is invalid.
232
+ """
233
+ if db_config is None or all(value is None for value in db_config.values()):
234
+ return None
235
+ try:
236
+ jsonschema.validate(instance=db_config, schema=jsonschema_db_dict)
237
+ return db_config
238
+ except jsonschema.exceptions.ValidationError as err:
239
+ raise ValueError("Invalid MongoDB configuration") from err
240
+
241
+ @staticmethod
242
+ def get_db_name(db_name=None, db_simulation_model_version=None, model_name=None):
243
+ """
244
+ Build DB name from configuration.
245
+
246
+ Parameters
247
+ ----------
248
+ db_name: str
249
+ Direct database name (if provided, returns this).
250
+ db_simulation_model_version: str
251
+ Version of the simulation model.
252
+ model_name: str
253
+ Name of the simulation model.
254
+
255
+ Returns
256
+ -------
257
+ str or None
258
+ Database name.
259
+ """
260
+ if db_name:
261
+ return db_name
262
+ if db_simulation_model_version and model_name:
263
+ return f"{model_name}-{db_simulation_model_version.replace('.', '-')}"
264
+ return None
265
+
266
+ def print_connection_info(self, db_name):
267
+ """
268
+ Print the connection information.
269
+
270
+ Parameters
271
+ ----------
272
+ db_name: str
273
+ Name of the database.
274
+ """
275
+ if self.db_config:
276
+ self._logger.info(
277
+ f"Connected to MongoDB at {self.db_config['db_server']}:"
278
+ f"{self.db_config['db_api_port']} "
279
+ f"using database: {db_name}"
280
+ )
281
+ else:
282
+ self._logger.info("No MongoDB configuration provided.")
283
+
284
+ def is_remote_database(self):
285
+ """
286
+ Check if the database is remote.
287
+
288
+ Check for domain pattern like "cta-simpipe-protodb.zeuthen.desy.de"
289
+
290
+ Returns
291
+ -------
292
+ bool
293
+ True if the database is remote, False otherwise.
294
+ """
295
+ if self.db_config:
296
+ db_server = self.db_config["db_server"]
297
+ domain_pattern = r"^([a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}$"
298
+ return bool(re.match(domain_pattern, db_server))
299
+ return False
300
+
301
+ @staticmethod
302
+ def get_entry_date_from_document(document):
303
+ """
304
+ Extract entry date from a MongoDB document's ObjectId.
305
+
306
+ Parameters
307
+ ----------
308
+ document: dict
309
+ MongoDB document with '_id' field.
310
+
311
+ Returns
312
+ -------
313
+ datetime.datetime
314
+ The generation time of the document's ObjectId.
315
+ """
316
+ return ObjectId(document["_id"]).generation_time
317
+
318
+ def get_collection(self, collection_name, db_name):
319
+ """
320
+ Get a collection from the DB.
321
+
322
+ Parameters
323
+ ----------
324
+ collection_name: str
325
+ Name of the collection.
326
+ db_name: str
327
+ Name of the DB.
328
+
329
+ Returns
330
+ -------
331
+ pymongo.collection.Collection
332
+ The collection from the DB.
333
+ """
334
+ return MongoDBHandler.db_client[db_name][collection_name]
335
+
336
+ def get_collections(self, db_name, model_collections_only=False):
337
+ """
338
+ List of collections in the DB.
339
+
340
+ Parameters
341
+ ----------
342
+ db_name: str
343
+ Database name.
344
+ model_collections_only: bool
345
+ If True, only return model collections (i.e. exclude fs.files, fs.chunks)
346
+
347
+ Returns
348
+ -------
349
+ list
350
+ List of collection names
351
+ """
352
+ if db_name not in self.list_of_collections:
353
+ self.list_of_collections[db_name] = MongoDBHandler.db_client[
354
+ db_name
355
+ ].list_collection_names()
356
+ collections = self.list_of_collections[db_name]
357
+ if model_collections_only:
358
+ return [collection for collection in collections if not collection.startswith("fs.")]
359
+ return collections
360
+
361
+ def list_database_names(self):
362
+ """
363
+ Get list of database names.
364
+
365
+ Returns
366
+ -------
367
+ list
368
+ List of database names.
369
+ """
370
+ return MongoDBHandler.db_client.list_database_names()
371
+
372
+ def generate_compound_indexes_for_databases(
373
+ self, db_name, db_simulation_model, db_simulation_model_version
374
+ ):
375
+ """
376
+ Generate compound indexes for several databases.
377
+
378
+ Parameters
379
+ ----------
380
+ db_name: str
381
+ Name of the database.
382
+ db_simulation_model: str
383
+ Name of the simulation model.
384
+ db_simulation_model_version: str
385
+ Version of the simulation model.
386
+
387
+ Raises
388
+ ------
389
+ ValueError
390
+ If the requested database is not found.
391
+ """
392
+ databases = [
393
+ d
394
+ for d in MongoDBHandler.db_client.list_database_names()
395
+ if d not in ("config", "admin", "local")
396
+ ]
397
+ requested = self.get_db_name(
398
+ db_name=db_name,
399
+ db_simulation_model_version=db_simulation_model_version,
400
+ model_name=db_simulation_model,
401
+ )
402
+ if requested != "all" and requested not in databases:
403
+ raise ValueError(
404
+ f"Requested database '{requested}' not found. "
405
+ f"Following databases are available: {', '.join(databases)}"
406
+ )
407
+
408
+ databases = databases if requested == "all" else [requested]
409
+ for dbs in databases:
410
+ self._logger.info(f"Generating compound indexes for database: {dbs}")
411
+ self.generate_compound_indexes(db_name=dbs)
412
+
413
+ def generate_compound_indexes(self, db_name):
414
+ """
415
+ Generate compound indexes for the MongoDB collections.
416
+
417
+ Indexes based on the typical query patterns.
418
+
419
+ Parameters
420
+ ----------
421
+ db_name: str
422
+ Name of the database.
423
+ """
424
+ collection_names = [
425
+ "telescopes",
426
+ "sites",
427
+ "configuration_sim_telarray",
428
+ "configuration_corsika",
429
+ "calibration_devices",
430
+ ]
431
+ for collection_name in collection_names:
432
+ db_collection = self.get_collection(collection_name, db_name=db_name)
433
+ db_collection.create_index(
434
+ [("instrument", 1), ("site", 1), ("parameter", 1), ("parameter_version", 1)]
435
+ )
436
+ db_collection = self.get_collection("production_tables", db_name=db_name)
437
+ db_collection.create_index([("collection", 1), ("model_version", 1)])
438
+
439
+ def query_db(self, query, collection_name, db_name):
440
+ """
441
+ Query MongoDB and return results as list.
442
+
443
+ Parameters
444
+ ----------
445
+ query: dict
446
+ Query to execute.
447
+ collection_name: str
448
+ Collection name.
449
+ db_name: str
450
+ Database name.
451
+
452
+ Returns
453
+ -------
454
+ list
455
+ List of documents matching the query.
456
+
457
+ Raises
458
+ ------
459
+ ValueError
460
+ if query returned no results.
461
+ """
462
+ collection = self.get_collection(collection_name, db_name=db_name)
463
+ posts = list(collection.find(query))
464
+ if not posts:
465
+ raise ValueError(
466
+ f"The following query for {collection_name} returned zero results: {query} "
467
+ )
468
+ return posts
469
+
470
+ def find_one(self, query, collection_name, db_name):
471
+ """
472
+ Query MongoDB and return first result.
473
+
474
+ Parameters
475
+ ----------
476
+ query: dict
477
+ Query to execute.
478
+ collection_name: str
479
+ Collection name.
480
+ db_name: str
481
+ Database name.
482
+
483
+ Returns
484
+ -------
485
+ dict or None
486
+ First document matching the query or None.
487
+ """
488
+ collection = self.get_collection(collection_name, db_name=db_name)
489
+ return collection.find_one(query)
490
+
491
+ def insert_one(self, document, collection_name, db_name):
492
+ """
493
+ Insert a document into a collection.
494
+
495
+ Parameters
496
+ ----------
497
+ document: dict
498
+ Document to insert.
499
+ collection_name: str
500
+ Collection name.
501
+ db_name: str
502
+ Database name.
503
+
504
+ Returns
505
+ -------
506
+ InsertOneResult
507
+ Result of the insert operation.
508
+ """
509
+ collection = self.get_collection(collection_name, db_name=db_name)
510
+ return collection.insert_one(document)
511
+
512
+ def get_file_from_db(self, db_name, file_name):
513
+ """
514
+ Extract a file from MongoDB and return GridFS file instance.
515
+
516
+ Parameters
517
+ ----------
518
+ db_name: str
519
+ The name of the DB with files of tabulated data
520
+ file_name: str
521
+ The name of the file requested
522
+
523
+ Returns
524
+ -------
525
+ GridOut
526
+ A file instance returned by GridFS find_one
527
+
528
+ Raises
529
+ ------
530
+ FileNotFoundError
531
+ If the desired file is not found.
532
+ """
533
+ db = MongoDBHandler.db_client[db_name]
534
+ file_system = gridfs.GridFS(db)
535
+ if file_system.exists({"filename": file_name}):
536
+ return file_system.find_one({"filename": file_name})
537
+
538
+ raise FileNotFoundError(f"The file {file_name} does not exist in the database {db_name}")
539
+
540
+ def write_file_from_db_to_disk(self, db_name, path, file):
541
+ """
542
+ Extract a file from MongoDB and write it to disk.
543
+
544
+ Parameters
545
+ ----------
546
+ db_name: str
547
+ The name of the DB with files of tabulated data
548
+ path: str or Path
549
+ The path to write the file to
550
+ file: GridOut
551
+ A file instance returned by GridFS find_one
552
+ """
553
+ db = MongoDBHandler.db_client[db_name]
554
+ fs_output = gridfs.GridFSBucket(db)
555
+ with open(Path(path).joinpath(file.filename), "wb") as output_file:
556
+ fs_output.download_to_stream_by_name(file.filename, output_file)
557
+
558
+ def get_ecsv_file_as_astropy_table(self, file_name, db_name):
559
+ """
560
+ Read contents of an ECSV file from the database and return it as an Astropy Table.
561
+
562
+ Files are not written to disk.
563
+
564
+ Parameters
565
+ ----------
566
+ file_name: str
567
+ The name of the ECSV file.
568
+ db_name: str
569
+ The name of the database.
570
+
571
+ Returns
572
+ -------
573
+ astropy.table.Table
574
+ The contents of the ECSV file as an Astropy Table.
575
+ """
576
+ db = MongoDBHandler.db_client[db_name]
577
+ fs = gridfs.GridFSBucket(db)
578
+
579
+ buf = io.BytesIO()
580
+ try:
581
+ fs.download_to_stream_by_name(file_name, buf)
582
+ except gridfs.errors.NoFile as exc:
583
+ raise FileNotFoundError(f"ECSV file '{file_name}' not found in DB.") from exc
584
+ buf.seek(0)
585
+ return Table.read(buf.getvalue().decode("utf-8"), format="ascii.ecsv")
586
+
587
+ def insert_file_to_db(self, file_name, db_name, **kwargs):
588
+ """
589
+ Insert a file to the DB.
590
+
591
+ Parameters
592
+ ----------
593
+ file_name: str or Path
594
+ The name of the file to insert (full path).
595
+ db_name: str
596
+ The name of the DB
597
+ **kwargs (optional): keyword arguments for file creation.
598
+ The full list of arguments can be found in
599
+ https://www.mongodb.com/docs/manual/core/gridfs/
600
+
601
+ Returns
602
+ -------
603
+ file_id: GridOut._id
604
+ If the file exists, return its GridOut._id, otherwise insert the file and return
605
+ its newly created DB GridOut._id.
606
+ """
607
+ db = MongoDBHandler.db_client[db_name]
608
+ file_system = gridfs.GridFS(db)
609
+
610
+ kwargs.setdefault("content_type", "ascii/dat")
611
+ kwargs.setdefault("filename", Path(file_name).name)
612
+
613
+ if file_system.exists({"filename": kwargs["filename"]}):
614
+ self._logger.warning(
615
+ f"The file {kwargs['filename']} exists in the DB. Returning its ID"
616
+ )
617
+ # _id is a public attribute in GridFS GridOut objects
618
+ # pylint: disable=protected-access
619
+ return file_system.find_one({"filename": kwargs["filename"]})._id
620
+
621
+ if not ascii_handler.is_utf8_file(file_name):
622
+ raise ValueError(f"File is not UTF-8 encoded: {file_name}")
623
+
624
+ self._logger.debug(f"Writing file to DB: {file_name}")
625
+ with open(file_name, "rb") as data_file:
626
+ return file_system.put(data_file, **kwargs)