eegdash 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (72) hide show
  1. eegdash/SignalStore/__init__.py +0 -0
  2. eegdash/SignalStore/signalstore/__init__.py +3 -0
  3. eegdash/SignalStore/signalstore/adapters/read_adapters/abstract_read_adapter.py +13 -0
  4. eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/schema_read_adapter.py +16 -0
  5. eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/vocabulary_read_adapter.py +19 -0
  6. eegdash/SignalStore/signalstore/adapters/read_adapters/handmade_records/excel_study_organizer_read_adapter.py +114 -0
  7. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/axona/axona_read_adapter.py +912 -0
  8. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/ReadIntanSpikeFile.py +140 -0
  9. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/intan_read_adapter.py +29 -0
  10. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/__init__.py +0 -0
  11. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/data_to_result.py +62 -0
  12. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/get_bytes_per_data_block.py +36 -0
  13. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/notch_filter.py +50 -0
  14. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/qstring.py +41 -0
  15. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_header.py +135 -0
  16. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_one_data_block.py +45 -0
  17. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/load_intan_rhd_format.py +204 -0
  18. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/__init__.py +0 -0
  19. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/data_to_result.py +60 -0
  20. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/get_bytes_per_data_block.py +37 -0
  21. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/notch_filter.py +50 -0
  22. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/qstring.py +41 -0
  23. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_header.py +153 -0
  24. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_one_data_block.py +47 -0
  25. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/load_intan_rhs_format.py +213 -0
  26. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/neurodata_without_borders/neurodata_without_borders_read_adapter.py +14 -0
  27. eegdash/SignalStore/signalstore/operations/__init__.py +4 -0
  28. eegdash/SignalStore/signalstore/operations/handler_executor.py +22 -0
  29. eegdash/SignalStore/signalstore/operations/handler_factory.py +41 -0
  30. eegdash/SignalStore/signalstore/operations/handlers/base_handler.py +44 -0
  31. eegdash/SignalStore/signalstore/operations/handlers/domain/property_model_handlers.py +79 -0
  32. eegdash/SignalStore/signalstore/operations/handlers/domain/schema_handlers.py +3 -0
  33. eegdash/SignalStore/signalstore/operations/helpers/abstract_helper.py +17 -0
  34. eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_extractor.py +33 -0
  35. eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_rawio.py +165 -0
  36. eegdash/SignalStore/signalstore/operations/helpers/spikeinterface_helper.py +100 -0
  37. eegdash/SignalStore/signalstore/operations/helpers/wrappers/neo_wrappers.py +21 -0
  38. eegdash/SignalStore/signalstore/operations/helpers/wrappers/nwb_wrappers.py +27 -0
  39. eegdash/SignalStore/signalstore/store/__init__.py +8 -0
  40. eegdash/SignalStore/signalstore/store/data_access_objects.py +1181 -0
  41. eegdash/SignalStore/signalstore/store/datafile_adapters.py +131 -0
  42. eegdash/SignalStore/signalstore/store/repositories.py +928 -0
  43. eegdash/SignalStore/signalstore/store/store_errors.py +68 -0
  44. eegdash/SignalStore/signalstore/store/unit_of_work.py +97 -0
  45. eegdash/SignalStore/signalstore/store/unit_of_work_provider.py +67 -0
  46. eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_recording.py +1 -0
  47. eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_sorter.py +1 -0
  48. eegdash/SignalStore/signalstore/utilities/testing/data_mocks.py +513 -0
  49. eegdash/SignalStore/signalstore/utilities/tools/dataarrays.py +49 -0
  50. eegdash/SignalStore/signalstore/utilities/tools/mongo_records.py +25 -0
  51. eegdash/SignalStore/signalstore/utilities/tools/operation_response.py +78 -0
  52. eegdash/SignalStore/signalstore/utilities/tools/purge_orchestration_response.py +21 -0
  53. eegdash/SignalStore/signalstore/utilities/tools/quantities.py +15 -0
  54. eegdash/SignalStore/signalstore/utilities/tools/strings.py +38 -0
  55. eegdash/SignalStore/signalstore/utilities/tools/time.py +17 -0
  56. eegdash/SignalStore/tests/conftest.py +799 -0
  57. eegdash/SignalStore/tests/data/valid_data/data_arrays/make_fake_data.py +59 -0
  58. eegdash/SignalStore/tests/unit/store/conftest.py +0 -0
  59. eegdash/SignalStore/tests/unit/store/test_data_access_objects.py +1235 -0
  60. eegdash/SignalStore/tests/unit/store/test_repositories.py +1309 -0
  61. eegdash/SignalStore/tests/unit/store/test_unit_of_work.py +7 -0
  62. eegdash/SignalStore/tests/unit/test_ci_cd.py +8 -0
  63. eegdash/__init__.py +1 -0
  64. eegdash/aws_ingest.py +29 -0
  65. eegdash/data_utils.py +213 -0
  66. eegdash/main.py +17 -0
  67. eegdash/signalstore_data_utils.py +280 -0
  68. eegdash-0.0.1.dist-info/LICENSE +20 -0
  69. eegdash-0.0.1.dist-info/METADATA +72 -0
  70. eegdash-0.0.1.dist-info/RECORD +72 -0
  71. eegdash-0.0.1.dist-info/WHEEL +5 -0
  72. eegdash-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1181 @@
1
+ from abc import ABC, abstractmethod
2
+ from datetime import datetime, timezone
3
+ import os
4
+ import re
5
+ import numpy as np
6
+ import json
7
+ import traceback
8
+ from time import sleep
9
+ import xarray as xr
10
+ import pymongo
11
+
12
+ from eegdash.SignalStore.signalstore.store.store_errors import *
13
+
14
+ from eegdash.SignalStore.signalstore.store.datafile_adapters import AbstractDataFileAdapter, XarrayDataArrayNetCDFAdapter
15
+ from concurrent.futures import ThreadPoolExecutor
16
+
17
+ class AbstractDataAccessObject(ABC):
18
+
19
+ @abstractmethod
20
+ def get(self):
21
+ """Get a single object."""
22
+ pass
23
+
24
+ @abstractmethod
25
+ def exists(self):
26
+ """Check if an object exists."""
27
+ pass
28
+
29
+ @abstractmethod
30
+ def add(self):
31
+ """Add a single object."""
32
+ pass
33
+
34
+ @abstractmethod
35
+ def mark_for_deletion(self):
36
+ """Mark a single object for deletion."""
37
+ pass
38
+
39
+ @abstractmethod
40
+ def restore(self):
41
+ """Restore the most recent version of a single object."""
42
+ pass
43
+
44
+ @abstractmethod
45
+ def list_marked_for_deletion(self):
46
+ """List objects marked for deletion."""
47
+ pass
48
+
49
+ @abstractmethod
50
+ def purge(self):
51
+ """Purge (permanently delete) objects marked for deletion."""
52
+ pass
53
+
54
+ class AbstractQueriableDataAccessObject(AbstractDataAccessObject):
55
+ @abstractmethod
56
+ def find(self):
57
+ """Apply filtering to get multiple objects fitting a description."""
58
+ pass
59
+
60
+
61
+ # ===================
62
+ # Data Access Objects
63
+ # ===================
64
+ class MongoDAOTypeError(TypeError):
65
+ pass
66
+
67
+ class MongoDAODocumentNotFoundError(NotFoundError):
68
+ pass
69
+
70
+ class MongoDAODocumentAlreadyExistsError(AlreadyExistsError):
71
+ pass
72
+
73
+ class MongoDAORangeError(RangeError):
74
+ pass
75
+
76
+ class MongoDAOArgumentNameError(ArgumentNameError):
77
+ pass
78
+
79
+ class MongoDAOUncaughtError(UncaughtError):
80
+ pass
81
+
82
+ class MongoDAO(AbstractQueriableDataAccessObject):
83
+ """This class is a base class for accessing MongoDB documents.
84
+ It implements the basic CRUD operations in a way that simplifies the
85
+ implementation of specific data access objects for different types of
86
+ documents.
87
+ """
88
+ def __init__(self,
89
+ client,
90
+ database_name: str,
91
+ collection_name: str,
92
+ index_fields: list
93
+ ):
94
+ """Initializes the data Base MongoDB Data Access Object."""
95
+ self._client = client # mongoDB client
96
+ self._db = client[database_name] # mongoDB database
97
+ self._collection = self._db[collection_name] # mongoDB collection
98
+ # index by index_fields
99
+ self._collection_name = collection_name
100
+ self._index_args = set(index_fields)
101
+ index_field_tuples = [(field, 1) for field in index_fields]
102
+ # add time_of_removal field to index
103
+ index_field_tuples.append(('version_timestamp', 1))
104
+ index_field_tuples.append(('time_of_removal', 1))
105
+ try:
106
+ self._collection.create_index(index_field_tuples, unique=True) # create index
107
+ except pymongo.errors.OperationFailure as e:
108
+ if "user is not allowed to do action" in str(e).lower():
109
+ # Skip index creation for users without write permissions
110
+ pass
111
+ else:
112
+ # Re-raise other operation failures that aren't permission related
113
+ raise
114
+ self._set_argument_types(index_fields)
115
+
116
+ def get(self, version_timestamp=0, **kwargs):
117
+ """Gets a document from the repository.
118
+ Arguments:
119
+ **kwargs {dict} -- Only the index fields are allowed as keyword arguments.
120
+ Raises:
121
+ NotFoundError -- If the document is not found.
122
+ Returns:
123
+ dict -- The document.
124
+ """
125
+ self._check_kwargs_are_only_index_args(**kwargs)
126
+ self._check_args(version_timestamp=version_timestamp,**kwargs)
127
+ if isinstance(version_timestamp, datetime):
128
+ version_timestamp = version_timestamp.astimezone(timezone.utc)
129
+ document = self._collection.find_one(
130
+ {
131
+ 'time_of_removal': None,
132
+ 'version_timestamp': version_timestamp,
133
+ **kwargs
134
+ },
135
+ {'_id': 0})
136
+ if document is None:
137
+ return None
138
+ else:
139
+ return self._deserialize(document)
140
+
141
+ def find(self, filter=None, projection=None, **kwargs):
142
+ """Returns a filtered list of documents from the repository.
143
+ Arguments:
144
+ filter {dict} -- The filter to apply to the query.
145
+ Returns:
146
+ list[dict] -- The list of documents.
147
+ """
148
+ self._check_args(filter=filter, projection=projection)
149
+ if filter is None:
150
+ filter = {"time_of_removal": None}
151
+ else:
152
+ filter = filter.copy() # avoid mutations to the input dict
153
+ filter["time_of_removal"] = None
154
+ if projection is None:
155
+ projection = {'_id': 0}
156
+ else:
157
+ projection = projection.copy() # avoid mutations to the input dict
158
+ projection['_id'] = 0
159
+
160
+ with ThreadPoolExecutor() as executor:
161
+ documents = list(executor.map(self._deserialize, self._collection.find(filter, projection, **kwargs)))
162
+ return documents
163
+
164
+ def exists(self, version_timestamp=0, **kwargs):
165
+ self._check_kwargs_are_only_index_args(**kwargs)
166
+ self._check_args(**kwargs)
167
+ return self._collection.find_one(
168
+ {
169
+ 'time_of_removal': None,
170
+ 'version_timestamp': version_timestamp,
171
+ **kwargs
172
+ }
173
+ ) is not None
174
+
175
+ def add(self, document, timestamp, versioning_on=False):
176
+ """Adds a document to the repository.
177
+ Arguments:
178
+ document {dict} -- The document to add.
179
+ timestamp {datetime.timestamp} -- The timestamp to add the document with.
180
+ Raises:
181
+ MongoDAODocumentAlreadyExistsError -- If the document already exists.
182
+ Returns:
183
+ None
184
+ """
185
+ self._check_args(document=document, timestamp=timestamp)
186
+ # get the index fields from the document
187
+ if document.get('version_timestamp') is None or document.get('version_timestamp') == 0:
188
+ if versioning_on:
189
+ document['version_timestamp'] = timestamp
190
+ else:
191
+ document['version_timestamp'] = 0
192
+ document_index_args = {key: value for key, value in document.items() if key in self._index_args}
193
+ if self.exists(**document_index_args):
194
+ raise MongoDAODocumentAlreadyExistsError(
195
+ f'Cannot add document with index fields {document_index_args} because it already exists in repository.'
196
+ )
197
+ document = document.copy()
198
+ document['time_of_save'] = timestamp
199
+ document['time_of_removal'] = None
200
+ result = self._collection.insert_one(self._serialize(document))
201
+ return None
202
+
203
+ def mark_for_deletion(self, timestamp, version_timestamp=0, **kwargs):
204
+ """Marks a document for deletion.
205
+ Arguments:
206
+ timestamp {datetime.timestamp} -- The timestamp to mark the document for deletion with.
207
+ **kwargs {dict} -- Only the index fields are allowed as keyword arguments.
208
+ Raises:
209
+ MongoDAODocumentNotFoundError -- If the document does not exist.
210
+ Returns:
211
+ None
212
+ """
213
+ sleep(0.0000001) # sleep for a very short time to avoid
214
+ self._check_args(timestamp=timestamp, version_timestamp=version_timestamp, **kwargs)
215
+ self._check_kwargs_are_only_index_args(**kwargs)
216
+ document = self.get(**kwargs, version_timestamp=version_timestamp)
217
+ if document is None:
218
+ raise MongoDAODocumentNotFoundError(
219
+ f'Cannot delete document with index fields {kwargs} and version_timestamp {version_timestamp} because it does not exist in repository.'
220
+ )
221
+ else:
222
+ self._collection.update_one({'time_of_removal': None, **kwargs}, {'$set':{"time_of_removal": datetime_to_microseconds(timestamp)}})
223
+ return None
224
+
225
+ def list_marked_for_deletion(self, time_threshold=None):
226
+ """Returns a list of all deleted documents from the repository."""
227
+ self._check_args(time_threshold=time_threshold)
228
+ if time_threshold is None:
229
+ documents = [self._deserialize(document) for document in self._collection.find({"time_of_removal": {"$ne": None}}, {'_id': 0}, sort=[('time_of_removal', -1)])]
230
+ else:
231
+ documents = [self._deserialize(document) for document in self._collection.find({"time_of_removal":
232
+ {"$lt": datetime_to_microseconds(time_threshold)}},
233
+ {'_id': 0},
234
+ sort=[('time_of_removal', -1)])]
235
+ return documents
236
+
237
+ def restore(self, nth_most_recent=1,**kwargs):
238
+ """Restores a document.
239
+ Arguments:
240
+ timestamp {datetime.timestamp} -- The timestamp to restore the document with.
241
+ nth_most_recent {int} -- The nth most recent version of the deleted document to restore.
242
+ override_existing_document {bool} -- If True, overrides an existing document with the same index fields.
243
+ **kwargs {dict} -- Only the index fields are allowed as keyword arguments.
244
+ Raises:
245
+ MongoDAODocumentNotFoundError -- If the document does not exist.
246
+ MongoDAODocumentAlreadyExistsError -- If the document already exists and override_existing_document is False.
247
+ MongoDAORangeError -- If the nth_most_recent argument is out of range.
248
+ Returns:
249
+ """
250
+ self._check_args(nth_most_recent=nth_most_recent, **kwargs)
251
+ self._check_kwargs_are_only_index_args(**kwargs)
252
+ # get the nth most recent version of the document with a numeric time_of_removal value
253
+ documents = self._collection.find({'time_of_removal': {'$ne': None}, **kwargs}, {'_id': 0}, sort=[('time_of_removal', 1)])
254
+ if len(list(documents)) == 0:
255
+ raise MongoDAORangeError(
256
+ f'Cannot restore document with index fields {kwargs}: no deleted instances of {kwargs} were found in repository.'
257
+ )
258
+ # check for an existing document with the same index fields and no time_of_removal value
259
+ document_exists = self.exists(**kwargs)
260
+ if document_exists:
261
+ raise MongoDAODocumentAlreadyExistsError(
262
+ f'Cannot restore document with index fields {kwargs} because it already exists in repository.'
263
+ )
264
+ # restore the document by adding it with a time_of_removal value of None and a time_of_save value of the current time
265
+ try:
266
+ nth_document = documents[nth_most_recent-1]
267
+ nth_doc_kwargs = {key: value for key, value in nth_document.items() if key in self._index_args}
268
+ except IndexError:
269
+ raise MongoDAORangeError(
270
+ f'Arg nth_most_recent={nth_most_recent} out of range. The record of deleted documents only contains {len(list(documents))} entries.'
271
+ )
272
+ # update the time_of_removal field to None
273
+ self._collection.update_one({'time_of_removal': nth_document['time_of_removal'], **nth_doc_kwargs}, {'$set':{"time_of_removal": None}})
274
+ return None
275
+
276
+
277
+ def purge(self, time_threshold=None):
278
+ """Purges deleted documents from the repository older than the time threshold."""
279
+ self._check_args(time_threshold=time_threshold)
280
+ if time_threshold is None:
281
+ result = self._collection.delete_many({"time_of_removal": {"$ne": None}})
282
+ count = result.deleted_count
283
+ else:
284
+ result = self._collection.delete_many({"time_of_removal": {"$lt": datetime_to_microseconds(time_threshold)}})
285
+ count = result.deleted_count
286
+ return count
287
+
288
+ def _check_args(self, **kwargs):
289
+ for key, value in kwargs.items():
290
+ try:
291
+ arg_types = self._argument_types[key]
292
+ except KeyError:
293
+ raise MongoDAOArgumentNameError(
294
+ f'Invalid keyword argument name {key}.'
295
+ )
296
+ if not isinstance(value, arg_types) and value is not None:
297
+ raise MongoDAOTypeError(
298
+ f'Invalid type {type(value)} for argument {key}. Must be one of {arg_types}.'
299
+ )
300
+
301
+ def _check_kwargs_are_only_index_args(self, **kwargs):
302
+ keys = set(kwargs.keys())
303
+ index_2 = self._index_args - {'version_timestamp'} # sometimes we don't use version_timestamp
304
+ if not (self._index_args == keys or index_2 == keys):
305
+ raise MongoDAOArgumentNameError(
306
+ f"Invalid keyword arguments: {keys}.\nRequired arguments: {self._index_args - {'version_timestamp'}}.\nOptional arguments: 'version_timestamp'."
307
+ )
308
+
309
+ def _serialize(self, document):
310
+ """Serializes a document object.
311
+ Arguments:
312
+ document {dict} -- The document object to serialize.
313
+ Returns:
314
+ dict -- The serialized document.
315
+ """
316
+ result = document.copy()
317
+ for key, value in document.items():
318
+ serializer = self.property_serializers.get(key)
319
+ if serializer is not None:
320
+ try:
321
+ result[key] = self.property_serializers[key](value)
322
+ except Exception as e:
323
+ raise MongoDAOUncaughtError(
324
+ f'An error occurred while serializing property {key}\n\nof document {document}.\n\nThe error was: {e}'
325
+ )
326
+ return result
327
+
328
+ def _deserialize(self, document):
329
+ """Deserializes a document object.
330
+ Arguments:
331
+ document {dict} -- The document object to deserialize.
332
+ Returns:
333
+ dict -- The deserialized document.
334
+ """
335
+ result = document.copy()
336
+ for key, value in result.items():
337
+ deserializer = self.property_deserializers.get(key)
338
+ if deserializer is not None:
339
+ try:
340
+ result[key] = self.property_deserializers[key](value)
341
+ except Exception as e:
342
+ raise MongoDAOUncaughtError(
343
+ f'An uncaught error occurred while deserializing property: "{key}" from document: \n\n{document}.\n\nThe error was: {e}'
344
+ )
345
+ return result
346
+
347
+ @property
348
+ def property_serializers(self):
349
+ return {
350
+ 'time_of_save': datetime_to_microseconds,
351
+ 'time_of_removal': datetime_to_microseconds,
352
+ 'version_timestamp': self._serialize_version_timestamp,
353
+ 'json_schema': dict_to_json_bytes,
354
+ }
355
+
356
+ @property
357
+ def property_deserializers(self):
358
+ return {
359
+ 'time_of_save': microseconds_to_datetime,
360
+ 'time_of_removal': microseconds_to_datetime,
361
+ 'version_timestamp': self._deserialize_version_timestamp,
362
+ 'json_schema': json_bytes_to_dict,
363
+ }
364
+
365
+ @property
366
+ def collection_name(self):
367
+ return self._collection_name
368
+
369
+ def _set_argument_types(self, index_fields):
370
+ # set argument types dictionary for checking argument types
371
+ nowtype = type(datetime.now(timezone.utc))
372
+ self._argument_types = {
373
+ 'version_timestamp': (nowtype, int),
374
+ 'filter': (dict, type(None)),
375
+ 'projection': (dict, type(None)),
376
+ 'timestamp': (nowtype),
377
+ 'time_threshold': (nowtype, type(None)),
378
+ 'nth_most_recent': (int),
379
+ 'override_existing_document': (bool),
380
+ 'not_exist_ok': (bool),
381
+ 'document': (dict),
382
+ }
383
+ # set all the index names as argument options with string type
384
+ for field in index_fields:
385
+ if field not in self._argument_types:
386
+ self._argument_types[field] = (str) # only string type because they can never be None
387
+
388
+ def _serialize_version_timestamp(self, value):
389
+ if value == 0 or value is None:
390
+ return 0
391
+ else:
392
+ try:
393
+ return value.astimezone(timezone.utc)
394
+ except AttributeError:
395
+ raise MongoDAOTypeError(
396
+ f"Invalid type {type(value)} for argument version_timestamp. Must be of type {type(datetime.now(timezone.utc))}."
397
+ )
398
+
399
+ def _deserialize_version_timestamp(self, value):
400
+ if value == 0 or value is None:
401
+ return 0
402
+ else:
403
+ try:
404
+ return value.replace(tzinfo=timezone.utc) # we don't want to shift the time twice, since it's already in UTC (MongoDB only stores UTC time)
405
+ except AttributeError:
406
+ raise MongoDAOTypeError(
407
+ f"Invalid type {type(value)} for argument version_timestamp. Must be of type {type(datetime.now(timezone.utc))}."
408
+ )
409
+
410
+ # ===================
411
+
412
+ class FileSystemDAOConfigError(ConfigError):
413
+ pass
414
+
415
+ class FileSystemDAOTypeError(ArgumentTypeError):
416
+ pass
417
+
418
+ class FileSystemDAOFileNotFoundError(NotFoundError):
419
+ pass
420
+
421
+ class FileSystemDAOFileAlreadyExistsError(AlreadyExistsError):
422
+ pass
423
+
424
+ class FileSystemDAORangeError(RangeError):
425
+ pass
426
+
427
+ class FileSystemDAOArgumentNameError(ArgumentNameError):
428
+ pass
429
+
430
+ class FileSystemDAOUncaughtError(UncaughtError):
431
+ pass
432
+
433
+ class FileSystemDAO(AbstractDataAccessObject):
434
+ def __init__(self, filesystem, project_dir, default_data_adapter=XarrayDataArrayNetCDFAdapter()):
435
+ # add / to end of directory if it doesn't already exist
436
+ self._fs = filesystem
437
+ # make sure the project directory exists
438
+ if not self._fs.exists(project_dir):
439
+ self._fs.mkdir(project_dir)
440
+ self._directory = project_dir
441
+ default_data_adapter.set_filesystem(self._fs)
442
+ self._default_data_adapter = default_data_adapter
443
+
444
+ def get(self, schema_ref, data_name, version_timestamp=0, nth_most_recent=1, data_adapter=None):
445
+ """Gets an object from the repository.
446
+ Arguments:
447
+ schema_ref {str} -- The type of object to get.
448
+ data_name {str} -- The name of the object to get.
449
+ version_timestamp {str} -- The version_timestamp of the object to get.
450
+ Raises:
451
+ FileSystemDAOFileNotFoundError -- If the object is not found.
452
+ Returns:
453
+ dict -- The object.
454
+ """
455
+ self._check_args(
456
+ schema_ref=schema_ref,
457
+ data_name=data_name,
458
+ nth_most_recent=nth_most_recent,
459
+ version_timestamp=version_timestamp,
460
+ data_adapter=data_adapter
461
+ )
462
+ if data_adapter is None:
463
+ data_adapter = self._default_data_adapter
464
+ else:
465
+ data_adapter.set_filesystem(self._fs)
466
+ path = self._get_file_path(schema_ref, data_name, version_timestamp, nth_most_recent, data_adapter)
467
+ if path is None:
468
+ return None
469
+ data_object = data_adapter.read_file(path)
470
+ data_object = self._deserialize(data_object)
471
+ return data_object
472
+
473
+ def _get_file_path(self, schema_ref, data_name, version_timestamp, nth_most_recent, data_adapter):
474
+ if data_adapter is None:
475
+ data_adapter = self._default_data_adapter
476
+ else:
477
+ data_adapter.set_filesystem(self._fs)
478
+ path = self.make_filepath(schema_ref, data_name, version_timestamp, data_adapter)
479
+ if not self._fs.exists(path):
480
+ # if the version_timestamp was specified, that's the only version we want to get
481
+ # if it doesn't exist, we check if the precision was too high
482
+ if isinstance(version_timestamp, datetime):
483
+ # try searching for the most recent version that matches up to millisecond precision
484
+ ms_vts = str(datetime_to_microseconds(version_timestamp))[:-3]
485
+ # find any file matching __version_{ms_vts} in the filename
486
+ pattern = self._directory + '/' + self.make_base_filename(schema_ref, data_name) + f'__version_{ms_vts}[0-9][0-9][0-9]' + data_adapter.file_extension
487
+ glob = self._fs.glob(pattern)
488
+ paths = list(filter(lambda path: '_time_of_removal_' not in path, glob))
489
+ # sort by version
490
+ paths.sort()
491
+ if len(glob) == 0:
492
+ wrong_file_pattern = self._directory + '/' + self.make_base_filename(schema_ref, data_name) + f'__version_{ms_vts}[0-9][0-9][0-9]*'
493
+ wrong_file_glob = self._fs.glob(wrong_file_pattern)
494
+ bad_paths = list(filter(lambda path: '_time_of_removal_' not in path, wrong_file_glob))
495
+ if len(bad_paths) == 0:
496
+ return None
497
+ else:
498
+ raise FileSystemDAOFileNotFoundError(
499
+ f"Cannot find a file fitting any pattern like {pattern}. However, there may be a file with a different file extension matching the pattern. The following files were found: {bad_paths}."
500
+ )
501
+ return None
502
+ else:
503
+ path = glob[0]
504
+ # if the version_timestamp was 0 (not specified) then we get the nth_most_recent not deleted one
505
+ else:
506
+ basename = self.make_base_filename(schema_ref, data_name)
507
+ pattern = self._directory + '/' + basename + '*_version_*' + data_adapter.file_extension
508
+ glob = self._fs.glob(pattern)
509
+ # filter out the paths that have a time_of_removal value
510
+ paths = list(filter(lambda path: '_time_of_removal_' not in path, glob))
511
+ if len(paths) == 0:
512
+ return None
513
+ else:
514
+ # get the most recent version
515
+ paths.sort()
516
+ try:
517
+ path = paths[-nth_most_recent]
518
+ except IndexError:
519
+ return None
520
+ return path
521
+
522
+ def exists(self, schema_ref, data_name, version_timestamp=0, data_adapter=None):
523
+ """Checks if an object exists in the repository.
524
+ Arguments:
525
+ schema_ref {str} -- The type of object to check.
526
+ data_name {str} -- The name of the object to check.
527
+ version_timestamp {str} -- The version of the object to check.
528
+ Returns:
529
+ bool -- True if the object exists, else False.
530
+ """
531
+ self._check_args(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp, data_adapter=data_adapter)
532
+ # try getting the object
533
+ try:
534
+ return self.get(schema_ref, data_name, version_timestamp, data_adapter=data_adapter) is not None
535
+ except FileSystemDAOFileNotFoundError as e:
536
+ raise FileSystemDAOFileNotFoundError(
537
+ f"An error occurred while checking if the object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} exists in the repository. Traceback was: {traceback.format_exc()}"
538
+ )
539
+
540
+ def n_versions(self, schema_ref, data_name):
541
+ """Returns the number of versions of an object in the repository."""
542
+ self._check_args(schema_ref=schema_ref, data_name=data_name)
543
+ pattern = self.make_base_filename(schema_ref, data_name) + '*'
544
+ globstring = self._directory + '/' + pattern
545
+ glob = self._fs.glob(globstring)
546
+ # filter out the paths that have a time_of_removal value
547
+ paths = filter(lambda path: '_time_of_removal_' not in path, glob)
548
+ return len(list(paths))
549
+
550
+ def add(self, data_object, data_adapter=None):
551
+ """Adds an object to the repository.
552
+ Arguments:
553
+ data_object -- The object to add.
554
+ data_adapter -- The data adapter to use.
555
+ If none, the default data adapter is used.
556
+ The default data adapter is given to the constructor.
557
+ Raises:
558
+ FileSystemDAOFileAlreadyExistsError -- If the object already exists.
559
+ Returns:
560
+ None
561
+ """
562
+ self._check_args(data_adapter=data_adapter)
563
+ if data_adapter is None:
564
+ data_adapter = self._default_data_adapter
565
+ else:
566
+ data_adapter.set_filesystem(self._fs)
567
+ # separately check object using data adapter
568
+ if not isinstance(data_object, data_adapter.data_object_type):
569
+ raise FileSystemDAOTypeError(
570
+ f"Type mismatch: Received {type(data_object).__name__}, but expected {data_adapter.data_object_type.__name__}.Ensure 'data_object' matches the type required by the current 'data_adapter'. If you're using the default data adapter, it may not be compatible with 'data_object'. Consider specifying a different data adapter that accepts {type(data_object).__name__}. Current data_adapter type: {type(data_adapter).__name__}."
571
+ )
572
+ if data_object.attrs.get('version_timestamp') is None:
573
+ data_object.attrs['version_timestamp'] = 0
574
+ idkwargs = data_adapter.get_id_kwargs(data_object) # (schema_ref, data_name, version_timestamp)
575
+ path = self.make_filepath(**idkwargs, data_adapter=data_adapter)
576
+
577
+ if self.exists(**idkwargs, data_adapter=data_adapter):
578
+ raise FileSystemDAOFileAlreadyExistsError(
579
+ f'Cannot add object with path "{path}" because it already exists in repository.'
580
+ )
581
+ data_object = self._serialize(data_object)
582
+ #get os environment variable 'DEBUG' to check if we should print the data_object
583
+ data_adapter.write_file(path=path, data_object=data_object)
584
+ self._deserialize(data_object) # undo the serialization in case the object is mutated
585
+ return None
586
+
587
+ def mark_for_deletion(self, schema_ref, data_name, time_of_removal, version_timestamp=0, data_adapter=None):
588
+ """Marks an object for deletion.
589
+ Arguments:
590
+ schema_ref {str} -- The type of object to mark for deletion.
591
+ data_name {str} -- The name of the object to mark for deletion.
592
+ time_of_removal {datetime.timestamp} -- The timestamp to mark the object for deletion with.
593
+ version_timestamp {str} -- The version_timestamp of the object to mark for deletion.
594
+ data_adapter {AbstractDataFileAdapter} -- The data adapter to use.
595
+ If none, the default data adapter is used.
596
+ The default data adapter is given to the constructor.
597
+ Raises:
598
+ FileSystemDAOFileNotFoundError -- If the object does not exist.
599
+ Returns:
600
+ None
601
+ """
602
+ sleep(0.0000001) # to prevent non-unique time_of_removal values
603
+ self._check_args(time_of_removal=time_of_removal,
604
+ schema_ref=schema_ref,
605
+ data_name=data_name,
606
+ version_timestamp=version_timestamp,
607
+ data_adapter=data_adapter)
608
+ if data_adapter is None:
609
+ data_adapter = self._default_data_adapter
610
+ else:
611
+ data_adapter.set_filesystem(self._fs)
612
+ path = self._get_file_path(schema_ref, data_name, version_timestamp, 1, data_adapter)
613
+ if path is None:
614
+ raise FileSystemDAOFileNotFoundError(
615
+ f'Cannot remove object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} because it does not exist in repository. The path would have been {self.make_filepath(schema_ref, data_name, version_timestamp, data_adapter)} if this error had not occurred. The exists method returned False.'
616
+ )
617
+ # insert __time_of_removal_{time_of_removal} into the filename before the file extension
618
+ if not data_adapter.file_extension == '':
619
+ new_path = path.replace(data_adapter.file_extension, f'__time_of_removal_{datetime_to_microseconds(time_of_removal)}{data_adapter.file_extension}')
620
+ else:
621
+ new_path = path + f'__time_of_removal_{datetime_to_microseconds(time_of_removal)}'
622
+ if self._fs.exists(new_path):
623
+ raise FileSystemDAOFileAlreadyExistsError(
624
+ f'Cannot mark object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} as marked for deletion because the path {new_path} already exists in repository. The time_of_removal may need to be updated to a more recent time.'
625
+ )
626
+ # check if path is file or directory
627
+ try:
628
+ self._fs.mv(path1=str(path), path2=str(new_path), recursive=True)
629
+ except Exception as e:
630
+ trace = traceback.format_exc()
631
+ raise FileSystemDAOUncaughtError(
632
+ f'An error occurred while renaming the object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} as marked for deletion. The old path was {path} and the new (trash) path was going to be {new_path} The error was: {e} and the traceback was \n\n{trace}'
633
+ )
634
+ return None
635
+
636
+ def list_marked_for_deletion(self, time_threshold=None):
637
+ """Returns a list of all deleted objects from the repository.
638
+ Arguments:
639
+ time_threshold {datetime.timestamp} -- The time threshold.
640
+ data_adapter {AbstractDataFileAdapter} -- The data adapter to use.
641
+ Returns:
642
+ list[dict] -- The list of deleted objects.
643
+ """
644
+ self._check_args(
645
+ time_threshold=time_threshold,
646
+ )
647
+ glob_pattern = self._directory + '/*_time_of_removal_*'
648
+ paths = self._fs.glob(glob_pattern)
649
+ if time_threshold is None:
650
+ result = list(paths)
651
+ else:
652
+ result = []
653
+ for path in paths:
654
+ tor = self._get_time_of_removal_from_path(path)
655
+ if tor <= time_threshold:
656
+ result.append(path)
657
+ return result
658
+
659
+
660
+ def restore(self, schema_ref, data_name, version_timestamp=0, nth_most_recent=1, data_adapter=None):
661
+ """Restores an object.
662
+ Arguments:
663
+ schema_ref {str} -- The type of object to restore.
664
+ data_name {str} -- The name of the object to restore.
665
+ version_timestamp {str} -- The version of the object to restore.
666
+ time_of_removal {datetime.timestamp} -- The timestamp to restore the object with.
667
+ nth_most_recent {int} -- The nth most recent version of the object to restore.
668
+ data_adapter {AbstractDataFileAdapter} -- The data adapter to use.
669
+ Raises:
670
+ FileSystemDAOFileNotFoundError -- If the object does not exist.
671
+ FileSystemDAOFileAlreadyExistsError -- If the object already exists and override_existing_object is False.
672
+ FileSystemDAORangeError -- If the nth_most_recent argument is out of range.
673
+ Returns:
674
+ None
675
+ """
676
+ self._check_args(schema_ref=schema_ref,
677
+ data_name=data_name,
678
+ version_timestamp=version_timestamp,
679
+ nth_most_recent=nth_most_recent,
680
+ data_adapter=data_adapter)
681
+ if not nth_most_recent > 0:
682
+ raise FileSystemDAORangeError(
683
+ f'Arg nth_most_recent={nth_most_recent} out of range. Must be greater than 0.'
684
+ )
685
+ if data_adapter is None:
686
+ data_adapter = self._default_data_adapter
687
+ # get the nth most recent version of the object with a numeric time_of_removal value
688
+ basefilename = self.make_base_filename(schema_ref, data_name, version_timestamp)
689
+ pattern = self._directory + "/" + basefilename + '__time_of_removal_*' + data_adapter.file_extension
690
+ glob = self._fs.glob(pattern)
691
+ paths = list(sorted(glob))
692
+ if len(paths) == 0:
693
+ raise FileSystemDAORangeError(
694
+ f'Cannot restore object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp}: no deleted instances of {schema_ref}, {data_name}, and {version_timestamp} were found in repository.'
695
+ )
696
+ # check for an existing object with the same data_name and no time_of_removal value
697
+ object_exists = self.exists(schema_ref, data_name, version_timestamp, data_adapter)
698
+ if object_exists:
699
+ raise FileSystemDAOFileAlreadyExistsError(
700
+ f'Cannot restore object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} because it already exists in repository.'
701
+ )
702
+ # restore the object by adding it with a time_of_removal value of None and a time_of_save value of the current time
703
+ try:
704
+ nth_path = paths[-nth_most_recent]
705
+ except IndexError:
706
+ raise FileSystemDAORangeError(
707
+ f'Arg nth_most_recent={nth_most_recent} out of range. The record of deleted objects only contains {len(paths)} entries.'
708
+ )
709
+ # set path to the new path
710
+ new_path = self.make_filepath(schema_ref, data_name, version_timestamp, data_adapter)
711
+ try:
712
+ self._fs.mv(str(nth_path), str(new_path), recursive=True)
713
+ except Exception as e:
714
+ raise FileSystemDAOUncaughtError(
715
+ f'An error occurred while moving object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} to the trash. The error was: {e}'
716
+ )
717
+ return None
718
+
719
+ def purge(self, time_threshold=None):
720
+ """Purges deleted objects from the repository older than the time threshold.
721
+ Arguments:
722
+ schema_ref {str} -- The type of object to purge.
723
+ data_name {str} -- The name of the object to purge.
724
+ version_timestamp {str} -- The version of the object to purge.
725
+ time_threshold {datetime.timestamp} -- The time threshold.
726
+ Returns:
727
+ None
728
+ """
729
+ self._check_args(time_threshold=time_threshold)
730
+ paths = self.list_marked_for_deletion(time_threshold)
731
+ count = len(paths)
732
+ for path in paths:
733
+ self._fs.rm(path, recursive=True)
734
+ return count
735
+
736
+
737
+ def make_filepath(self, schema_ref, data_name, version_timestamp=0, data_adapter = None, time_of_removal=None):
738
+ """Returns the filepath for a data array."""
739
+ if data_adapter is None:
740
+ data_adapter = self._default_data_adapter
741
+ else:
742
+ data_adapter.set_filesystem(self._fs)
743
+ basename = self.make_base_filename(schema_ref, data_name, version_timestamp)
744
+ if time_of_removal is not None:
745
+ basename += f"__time_of_removal_{datetime_to_microseconds(time_of_removal)}"
746
+ filename = f"/{basename}{data_adapter.file_extension}"
747
+ return self._directory + filename
748
+
749
+ def make_base_filename(self, schema_ref, data_name, version_timestamp=0):
750
+ """Returns the base filename for a data array."""
751
+ basename = f"{schema_ref}__{data_name}"
752
+ if version_timestamp != 0:
753
+ basename += f"__version_{datetime_to_microseconds(version_timestamp)}"
754
+ return basename
755
+
756
+ def _serialize(self, data_object):
757
+ """Serializes a data object.
758
+ Arguments:
759
+ data_object {dict} -- The data object to serialize.
760
+ Returns:
761
+ dict -- The serialized data object.
762
+ """
763
+ attrs = data_object.attrs.copy()
764
+ for key, value in attrs.items():
765
+ if isinstance(value, bool):
766
+ attrs[key] = str(value)
767
+ if isinstance(value, type(None)):
768
+ attrs[key] = 'None'
769
+ if isinstance(value, dict):
770
+ attrs[key] = json.dumps(value)
771
+ if isinstance(value, list):
772
+ attrs[key] = json.dumps(value)
773
+ if key == 'version_timestamp' and isinstance(value, type(None)):
774
+ attrs[key] = 0
775
+ data_object.attrs = attrs
776
+ return data_object
777
+
778
+ def _deserialize(self, data_object):
779
+ """Deserializes a data object.
780
+ Arguments:
781
+ data_object {dict} -- The data object to deserialize.
782
+ Returns:
783
+ dict -- The deserialized data object.
784
+ """
785
+ attrs = data_object.attrs.copy()
786
+ for key, value in attrs.items():
787
+ if isinstance(value, str):
788
+ if value == 'True':
789
+ attrs[key] = True
790
+ elif value == 'False':
791
+ attrs[key] = False
792
+ elif value == 'None':
793
+ attrs[key] = None
794
+ elif value.startswith('{'):
795
+ attrs[key] = json.loads(value)
796
+ # check if value is a list by looking for brackets and commas
797
+ elif value.startswith('[') and value.endswith(']') and ',' in value:
798
+ attrs[key] = json.loads(value)
799
+ try:
800
+ attrs[key] = int(value)
801
+ except ValueError:
802
+ pass
803
+ elif isinstance(value, np.ndarray):
804
+ attrs[key] = value.tolist()
805
+ data_object.attrs = attrs
806
+ return data_object
807
+
808
+
809
+ def _get_time_of_removal_from_path(self, path):
810
+ """Returns the time of removal from a path."""
811
+ filename = os.path.basename(path)
812
+ match = re.search(r'__time_of_removal_(\d+)', filename)
813
+ if match is None:
814
+ raise FileSystemDAOUncaughtError(
815
+ f'An error occurred while parsing the time_of_removal from path {path}.'
816
+ )
817
+ return microseconds_to_datetime(int(match.group(1)))
818
+
819
+ def _check_args(self, **kwargs):
820
+ for key, value in kwargs.items():
821
+ try:
822
+ arg_types = self._argument_types[key]
823
+ except KeyError:
824
+ raise FileSystemDAOArgumentNameError(
825
+ f'Invalid keyword argument data_name {key}.'
826
+ )
827
+ if not isinstance(value, arg_types):
828
+ raise FileSystemDAOTypeError(
829
+ f'Invalid type {type(value)} for argument {key}. Must be one of {arg_types}.'
830
+ )
831
+
832
+ @property
833
+ def _argument_types(self):
834
+ nonetype = type(None)
835
+ nowtype = type(datetime.now(timezone.utc))
836
+ return {
837
+ 'schema_ref': (str),
838
+ 'data_name': (str),
839
+ 'version_timestamp': (nowtype, int),
840
+ 'time_of_removal': (nowtype, nonetype),
841
+ 'nth_most_recent': (int),
842
+ 'time_threshold': (nowtype, nonetype),
843
+ 'data_adapter': (AbstractDataFileAdapter, nonetype),
844
+ }
845
+
846
+
847
+
848
+ # ===================
849
+
850
+ class InMemoryObjectDAOObjectNotFoundError(NotFoundError):
851
+ pass
852
+
853
+ class InMemoryObjectDAOObjectAlreadyExistsError(Exception):
854
+ pass
855
+
856
+ class InMemoryObjectDAOTypeError(TypeError):
857
+ pass
858
+
859
+ class InMemoryObjectDAORangeError(IndexError):
860
+ pass
861
+
862
+ class InMemoryObjectDAOArgumentNameError(ArgumentNameError):
863
+ pass
864
+
865
+ class InMemoryObjectDAOUncaughtError(UncaughtError):
866
+ pass
867
+
868
+ class InMemoryObjectDAO(AbstractQueriableDataAccessObject):
869
+ def __init__(self, memory_store: dict):
870
+ self._collection = memory_store
871
+ self._collection['objects'] = {}
872
+ self._collection['tags'] = {}
873
+ self._collection['removed'] = {}
874
+
875
+ @property
876
+ def collection(self):
877
+ return self._collection
878
+
879
+ def get(self, tag: str):
880
+ """Gets an object from the repository.
881
+ Arguments:
882
+ tag {str} -- The tag of the object to get.
883
+ Raises:
884
+ InMemoryObjectDAONotFoundError -- If the object is not found.
885
+ Returns:
886
+ dict -- The object.
887
+ """
888
+ self._check_args(tag=tag)
889
+ id = self._collection['tags'].get(tag)
890
+ if id is None:
891
+ return None
892
+ else:
893
+ return self._collection['objects'].get(id)
894
+
895
+ def exists(self, tag: str):
896
+ """Checks if an object exists in the repository.
897
+ Arguments:
898
+ schema_ref {str} -- The type of the object to check.
899
+ data_name {str} -- The name of the object to check.
900
+ Returns:
901
+ bool -- True if the object exists, else False.
902
+ """
903
+ self._check_args(tag=tag)
904
+ return self.get(tag) is not None
905
+
906
+ def find(self, key_filter=None, value_filter=None, filter_relation: str='and'):
907
+ """Returns a filtered list of objects from the repository.
908
+ Arguments:
909
+ filter {function} -- The filter to apply to the query.
910
+ Returns:
911
+ list[dict] -- The list of objects.
912
+ """
913
+ self._check_args(key_filter=key_filter, value_filter=value_filter, filter_relation=filter_relation)
914
+ if key_filter is None and value_filter is None:
915
+ filter_relation = 'none'
916
+ elif key_filter is None and value_filter is not None:
917
+ filter_relation = 'value'
918
+ elif key_filter is not None and value_filter is None:
919
+ filter_relation = 'key'
920
+ elif key_filter is not None and value_filter is not None:
921
+ filter_relation = filter_relation.lower()
922
+ item_filter_options = {
923
+ 'none': lambda item: True,
924
+ 'key': lambda item: key_filter(item[0]),
925
+ 'value': lambda item: value_filter(item[1]),
926
+ 'and': lambda item: key_filter(item[0]) and value_filter(item[1]),
927
+ 'or': lambda item: key_filter(item[0]) or value_filter(item[1]),
928
+ 'xor': lambda item: key_filter(item[0]) ^ value_filter(item[1]),
929
+ 'nand': lambda item: not (key_filter(item[0]) and value_filter(item[1])),
930
+ 'nor': lambda item: not (key_filter(item[0]) or value_filter(item[1])),
931
+ 'xnor': lambda item: not (key_filter(item[0]) ^ value_filter(item[1])),
932
+ }
933
+ item_filter = item_filter_options[filter_relation]
934
+ filtered_collection = filter(item_filter, self._collection.items())
935
+ return dict(filtered_collection)
936
+
937
+ def add(self, object, tag: str):
938
+ """Adds an object to the repository.
939
+ Arguments:
940
+ object {Any} -- The object to add.
941
+ tag {str} -- The tag to add the object with.
942
+ Raises:
943
+ InMemoryObjectDAOObjectAlreadyExistsError -- If the object already exists.
944
+ Returns:
945
+ None
946
+ """
947
+ self._check_args(tag=tag)
948
+ if self.exists(tag):
949
+ raise InMemoryObjectDAOObjectAlreadyExistsError(
950
+ f'Cannot add object with tag {tag} and id {id(object)} because it already exists in object repository.'
951
+ )
952
+ # check if tag is already in use
953
+ if tag in self._collection['tags']:
954
+ raise InMemoryObjectDAOObjectAlreadyExistsError(
955
+ f'Cannot add object with tag {tag} because the tag already exists in object repository. Use a different tag.'
956
+ )
957
+ # check if id(object) is already in use
958
+ if id(object) in self._collection['objects']:
959
+ raise InMemoryObjectDAOObjectAlreadyExistsError(
960
+ f'Cannot add object with id {id(object)} because it already exists in object repository. This tag is not taken, but you must use a different object.'
961
+ )
962
+ self._collection['objects'][id(object)] = object
963
+ self._collection['tags'][tag] = id(object)
964
+ return None
965
+
966
+ def mark_for_deletion(self, tag, time_of_removal: datetime):
967
+ """Marks an object for deletion.
968
+ Arguments:
969
+ schema_ref {str} -- The type of the object to mark for deletion.
970
+ data_name {str} -- The name of the object to mark for deletion.
971
+ timestamp {datetime} -- The timestamp to mark the object for deletion with.
972
+ not_exist_ok {bool} -- If True, no error is raised if the object does not exist.
973
+ Raises:
974
+ InMemoryObjectDAONotFoundError -- If the object does not exist.
975
+ Returns:
976
+ None
977
+ """
978
+ sleep(0.0000001) # to prevent non-unique timestamps
979
+ self._check_args(tag=tag, time_of_removal=time_of_removal)
980
+ if not self.exists(tag):
981
+ raise InMemoryObjectDAOObjectNotFoundError(
982
+ f'Cannot delete object with tag {tag} because it does not exist in object repository.'
983
+ )
984
+ # check if key is already in use
985
+ if time_of_removal in self._collection['removed'].keys():
986
+ entry = self._collection['removed'][time_of_removal]
987
+ raise InMemoryObjectDAOUncaughtError(
988
+ f"Cannot mark object with tag {tag} for deletion because {entry['tag']} has already been marked for deletion with the exact same timestamp ({time_of_removal}). If you received this error, something is wrong with the package. Pleae reach out to the package maintainer."
989
+ )
990
+ # Add the object id to the removed dict using the key
991
+ self._collection['removed'][tag] = {'tag': tag, 'time_of_removal': time_of_removal, 'id': self._collection['tags'][tag]}
992
+ # Remove the object id from the objects dict; note that the 'objects' collection is not modified
993
+ del self._collection['tags'][tag]
994
+ return None
995
+
996
+ def list_marked_for_deletion(self, time_threshold: datetime=None):
997
+ """Returns an alphabetically, then time sorted list of deleted objects from the repository that are older than the time threshold.
998
+ Arguments:
999
+ time_threshold {datetime} -- The time threshold.
1000
+ Returns:
1001
+ list[dict] -- The list of deleted objects.
1002
+ """
1003
+ self._check_args(time_threshold=time_threshold)
1004
+ if time_threshold is None:
1005
+ return list(sorted(self._collection['removed'].values(), key=lambda x: x['time_of_removal']))
1006
+ else:
1007
+ return list(sorted(filter(lambda x: x['time_of_removal'] <= time_threshold, self._collection['removed'].values()), key=lambda x: x['time_of_removal']))
1008
+
1009
+ def restore(self, tag):
1010
+ """Restores an object specified by a tag and either a specific time_of_removal or the nth most recent time_of_removal.
1011
+ Arguments:
1012
+ tag {str} -- The tag of the object to restore.
1013
+ nth_most_recent {int} -- The nth most recent version of the object to restore.
1014
+ time_of_removal {datetime} -- The time_of_removal of the object to restore.
1015
+ Raises:
1016
+ InMemoryObjectDAOObjectNotFoundError -- If the object does not exist.
1017
+ InMemoryObjectDAOObjectAlreadyExistsError -- If the object already exists and override_existing_object is False.
1018
+ InMemoryObjectDAORangeError -- If the nth_most_recent argument is out of range.
1019
+ """
1020
+ self._check_args(tag=tag)
1021
+ if self.exists(tag):
1022
+ raise InMemoryObjectDAOObjectAlreadyExistsError(
1023
+ f"Cannot restore object with tag {tag} because it already exists in object repository."
1024
+ )
1025
+ elif self._collection['removed'].get(tag) is None:
1026
+ raise InMemoryObjectDAOObjectNotFoundError(
1027
+ f"Cannot restore object with tag {tag} because it is not present in the cue of removed objects."
1028
+ )
1029
+ # move the object id from the removed dict to the tags dict
1030
+ self._collection['tags'][tag] = self._collection['removed'][tag]['id']
1031
+ # remove the object id from the removed dict
1032
+ del self._collection['removed'][tag]
1033
+ return None
1034
+
1035
+ def purge(self, time_threshold: datetime=None) -> int:
1036
+ """Purges deleted objects from the repository older than the time threshold.
1037
+ Arguments:
1038
+ time_threshold {datetime} -- The time threshold.
1039
+ Returns:
1040
+ int -- The number of deleted objects purged.
1041
+ """
1042
+ self._check_args(time_threshold=time_threshold)
1043
+ to_delete = self.list_marked_for_deletion(time_threshold)
1044
+ count = len(to_delete)
1045
+ for entry in to_delete:
1046
+ del self._collection['objects'][entry['id']]
1047
+ del self._collection['removed'][entry['tag']]
1048
+ return count
1049
+
1050
+
1051
+ def _get_schema_name(self, object):
1052
+ # extract a human readable typestring without all the <>'s and stuff
1053
+ schema_ref = str(type(object))
1054
+ schema_ref = schema_ref.split("'")[1].lower()
1055
+ if '.' in schema_ref:
1056
+ schema_ref = schema_ref.split('.')[-1]
1057
+ return schema_ref
1058
+
1059
+ def _check_args(self, **kwargs):
1060
+ for key, value in kwargs.items():
1061
+ try:
1062
+ arg_types = self._arg_types[key]
1063
+ except KeyError:
1064
+ raise InMemoryObjectDAOArgumentNameError(
1065
+ f'Invalid argument name {key}. The only valid argument names are {self._arg_types.keys()}.'
1066
+ )
1067
+ # check if value is one of the allowed types
1068
+ if not isinstance(value, arg_types):
1069
+ raise InMemoryObjectDAOTypeError(
1070
+ f'Invalid type {type(value)} for argument {key}. Must be one of {arg_types}.'
1071
+ )
1072
+
1073
+ @property
1074
+ def _arg_types(self):
1075
+ nonetype = type(None)
1076
+ functiontype = type(lambda x: x)
1077
+ return {
1078
+ 'tag': (str),
1079
+ 'time_of_removal': (datetime),
1080
+ 'nth_most_recent': (int),
1081
+ 'time_threshold': (datetime, nonetype),
1082
+ 'key_filter': (functiontype, nonetype),
1083
+ 'value_filter': (functiontype, nonetype),
1084
+ 'filter_relation': (str),
1085
+ }
1086
+
1087
+
1088
+
1089
+ # ===================
1090
+ # Helper Functions
1091
+ # ===================
1092
+
1093
+ # Microseconds
1094
+
1095
+ def datetime_to_microseconds(timestamp: datetime) -> int:
1096
+ """Converts a datetime object to microseconds, with microsecond precition.
1097
+ Arguments:
1098
+ timestamp {datetime} -- The timestamp to convert.
1099
+ Returns:
1100
+ int -- The timestamp in microseconds (full precision).
1101
+ """
1102
+ if isinstance(timestamp, str):
1103
+ timestamp = string_to_datetime(timestamp)
1104
+ if timestamp is None:
1105
+ return None
1106
+ elif timestamp == 0:
1107
+ return None
1108
+ try:
1109
+ return int(timestamp.astimezone(timezone.utc).timestamp() * 1000000)
1110
+ except Exception as e:
1111
+ raise TypeError(f'Invalid type {type(timestamp)} for argument timestamp == {timestamp}. Must be datetime\n\ntraceback: {e}')
1112
+
1113
+ def microseconds_to_datetime(timestamp: int) -> datetime.utcnow:
1114
+ """Converts microseconds to a datetime object.
1115
+ Arguments:
1116
+ timestamp {int} -- The timestamp in microsecnods (full precision).
1117
+ Returns:
1118
+ datetime -- The timestamp as a datetime object.
1119
+ """
1120
+ if timestamp is None:
1121
+ return None
1122
+ elif timestamp == 0:
1123
+ return 0
1124
+ try:
1125
+ return datetime.fromtimestamp(float(timestamp) / 1000000, tz=timezone.utc)
1126
+ except Exception as e:
1127
+ raise TypeError(f'Invalid type {type(timestamp)} for argument timestamp = {timestamp}. Must be int\n\ntraceback: {e}')
1128
+
1129
+ # JSON
1130
+ def dict_to_json_bytes(dictionary: dict) -> str:
1131
+ """Converts a dictionary to a JSON string.
1132
+ Arguments:
1133
+ dictionary {dict} -- The dictionary to convert.
1134
+ Returns:
1135
+ str -- The JSON string.
1136
+ """
1137
+ if dictionary is None:
1138
+ return None
1139
+ if not isinstance(dictionary, dict):
1140
+ raise PropertySerializerArgumentTypeError(f'Invalid dict_to_json_bytes property serializer argument; argument type: {type(dictionary)}. Must be dict.')
1141
+ return json.dumps(dictionary).encode('utf-8')
1142
+
1143
+ class PropertySerializerArgumentTypeError(TypeError):
1144
+ pass
1145
+
1146
+ def json_bytes_to_dict(json_string: str) -> dict:
1147
+ """Converts a JSON string to a dictionary.
1148
+ Arguments:
1149
+ json_string {str} -- The JSON string to convert.
1150
+ Returns:
1151
+ dict -- The dictionary.
1152
+ """
1153
+ if not isinstance(json_string, bytes):
1154
+ raise TypeError(f'Invalid type {type(json_string)} for argument json_string. Must be str.')
1155
+ if json_string is None:
1156
+ return None
1157
+ return json.loads(json_string.decode('utf-8'))
1158
+
1159
+ # String
1160
+
1161
+ def datetime_to_string(timestamp: datetime) -> str:
1162
+ """Converts a datetime object to a string with microsecond precision.
1163
+ Arguments:
1164
+ timestamp {datetime} -- The timestamp to convert.
1165
+ Returns:
1166
+ str -- The timestamp as a string.
1167
+ """
1168
+ if datetime == 0:
1169
+ return '0'
1170
+ return timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %z')
1171
+
1172
+ def string_to_datetime(timestamp: str) -> datetime:
1173
+ """Converts a string to a datetime object with microsecond precision.
1174
+ Arguments:
1175
+ timestamp {str} -- The timestamp to convert.
1176
+ Returns:
1177
+ datetime -- The timestamp as a datetime object.
1178
+ """
1179
+ if timestamp == '0':
1180
+ return 0
1181
+ return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S.%f %z')