eegdash 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (75) hide show
  1. eegdash/data_utils.py → data_utils.py +131 -5
  2. {eegdash-0.0.1.dist-info → eegdash-0.0.3.dist-info}/METADATA +75 -8
  3. eegdash-0.0.3.dist-info/RECORD +8 -0
  4. {eegdash-0.0.1.dist-info → eegdash-0.0.3.dist-info}/WHEEL +1 -1
  5. eegdash-0.0.3.dist-info/top_level.txt +3 -0
  6. main.py +199 -0
  7. eegdash/SignalStore/__init__.py +0 -0
  8. eegdash/SignalStore/signalstore/__init__.py +0 -3
  9. eegdash/SignalStore/signalstore/adapters/read_adapters/abstract_read_adapter.py +0 -13
  10. eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/schema_read_adapter.py +0 -16
  11. eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/vocabulary_read_adapter.py +0 -19
  12. eegdash/SignalStore/signalstore/adapters/read_adapters/handmade_records/excel_study_organizer_read_adapter.py +0 -114
  13. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/axona/axona_read_adapter.py +0 -912
  14. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/ReadIntanSpikeFile.py +0 -140
  15. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/intan_read_adapter.py +0 -29
  16. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/__init__.py +0 -0
  17. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/data_to_result.py +0 -62
  18. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/get_bytes_per_data_block.py +0 -36
  19. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/notch_filter.py +0 -50
  20. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/qstring.py +0 -41
  21. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_header.py +0 -135
  22. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_one_data_block.py +0 -45
  23. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/load_intan_rhd_format.py +0 -204
  24. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/__init__.py +0 -0
  25. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/data_to_result.py +0 -60
  26. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/get_bytes_per_data_block.py +0 -37
  27. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/notch_filter.py +0 -50
  28. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/qstring.py +0 -41
  29. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_header.py +0 -153
  30. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_one_data_block.py +0 -47
  31. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/load_intan_rhs_format.py +0 -213
  32. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/neurodata_without_borders/neurodata_without_borders_read_adapter.py +0 -14
  33. eegdash/SignalStore/signalstore/operations/__init__.py +0 -4
  34. eegdash/SignalStore/signalstore/operations/handler_executor.py +0 -22
  35. eegdash/SignalStore/signalstore/operations/handler_factory.py +0 -41
  36. eegdash/SignalStore/signalstore/operations/handlers/base_handler.py +0 -44
  37. eegdash/SignalStore/signalstore/operations/handlers/domain/property_model_handlers.py +0 -79
  38. eegdash/SignalStore/signalstore/operations/handlers/domain/schema_handlers.py +0 -3
  39. eegdash/SignalStore/signalstore/operations/helpers/abstract_helper.py +0 -17
  40. eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_extractor.py +0 -33
  41. eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_rawio.py +0 -165
  42. eegdash/SignalStore/signalstore/operations/helpers/spikeinterface_helper.py +0 -100
  43. eegdash/SignalStore/signalstore/operations/helpers/wrappers/neo_wrappers.py +0 -21
  44. eegdash/SignalStore/signalstore/operations/helpers/wrappers/nwb_wrappers.py +0 -27
  45. eegdash/SignalStore/signalstore/store/__init__.py +0 -8
  46. eegdash/SignalStore/signalstore/store/data_access_objects.py +0 -1181
  47. eegdash/SignalStore/signalstore/store/datafile_adapters.py +0 -131
  48. eegdash/SignalStore/signalstore/store/repositories.py +0 -928
  49. eegdash/SignalStore/signalstore/store/store_errors.py +0 -68
  50. eegdash/SignalStore/signalstore/store/unit_of_work.py +0 -97
  51. eegdash/SignalStore/signalstore/store/unit_of_work_provider.py +0 -67
  52. eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_recording.py +0 -1
  53. eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_sorter.py +0 -1
  54. eegdash/SignalStore/signalstore/utilities/testing/data_mocks.py +0 -513
  55. eegdash/SignalStore/signalstore/utilities/tools/dataarrays.py +0 -49
  56. eegdash/SignalStore/signalstore/utilities/tools/mongo_records.py +0 -25
  57. eegdash/SignalStore/signalstore/utilities/tools/operation_response.py +0 -78
  58. eegdash/SignalStore/signalstore/utilities/tools/purge_orchestration_response.py +0 -21
  59. eegdash/SignalStore/signalstore/utilities/tools/quantities.py +0 -15
  60. eegdash/SignalStore/signalstore/utilities/tools/strings.py +0 -38
  61. eegdash/SignalStore/signalstore/utilities/tools/time.py +0 -17
  62. eegdash/SignalStore/tests/conftest.py +0 -799
  63. eegdash/SignalStore/tests/data/valid_data/data_arrays/make_fake_data.py +0 -59
  64. eegdash/SignalStore/tests/unit/store/conftest.py +0 -0
  65. eegdash/SignalStore/tests/unit/store/test_data_access_objects.py +0 -1235
  66. eegdash/SignalStore/tests/unit/store/test_repositories.py +0 -1309
  67. eegdash/SignalStore/tests/unit/store/test_unit_of_work.py +0 -7
  68. eegdash/SignalStore/tests/unit/test_ci_cd.py +0 -8
  69. eegdash/aws_ingest.py +0 -29
  70. eegdash/main.py +0 -17
  71. eegdash/signalstore_data_utils.py +0 -280
  72. eegdash-0.0.1.dist-info/RECORD +0 -72
  73. eegdash-0.0.1.dist-info/top_level.txt +0 -1
  74. /eegdash/__init__.py → /__init__.py +0 -0
  75. {eegdash-0.0.1.dist-info → eegdash-0.0.3.dist-info}/LICENSE +0 -0
@@ -1,928 +0,0 @@
1
- from eegdash.SignalStore.signalstore.store.data_access_objects import *
2
- from eegdash.SignalStore.signalstore.store.store_errors import *
3
- from eegdash.SignalStore.signalstore.utilities.tools.strings import contains_regex_characters
4
-
5
- from abc import ABC, abstractmethod
6
- import jsonschema
7
- import json
8
- from datetime import datetime
9
- from time import sleep
10
- from concurrent.futures import ThreadPoolExecutor
11
-
12
-
13
- # ================================
14
- # Base and Support Classes
15
- # ================================
16
-
17
- class AbstractRepository(ABC):
18
-
19
- # Data Retrieval Operations
20
-
21
- @abstractmethod
22
- def get(self):
23
- """Get a single aggregated object."""
24
- pass
25
-
26
- @abstractmethod
27
- def exists(self):
28
- """Check if an aggregated object exists."""
29
- pass
30
-
31
- # Operations That Modify the Repository
32
-
33
- @abstractmethod
34
- def add(self):
35
- """Add a single aggregated object to all relevant collections."""
36
- pass
37
-
38
- @abstractmethod
39
- def remove(self):
40
- """Mark a single aggregated object for deletion in all relevant collections."""
41
- pass
42
-
43
- # Tracking Operations That Modify the Repository
44
- def timestamp(self):
45
- """Get a timestamp to use for tracking and sorting CRUD operations.
46
- """
47
- return datetime.now().astimezone(timezone.utc)
48
-
49
-
50
- @abstractmethod
51
- def undo(self):
52
- """Undo most recent CRUD operation."""
53
- pass
54
-
55
- @abstractmethod
56
- def undo_all(self):
57
- """Undo all CRUD operations in self._operation_history."""
58
- pass
59
-
60
- @abstractmethod
61
- def clear_operation_history(self):
62
- """Clear the history of tracked operations."""
63
- pass
64
-
65
- # Purging removed Objects
66
-
67
- @abstractmethod
68
- def list_marked_for_deletion(self):
69
- """List aggregated objects marked for deletion."""
70
- pass
71
-
72
- @abstractmethod
73
- def purge(self):
74
- """Purge (permanently delete) aggregated objects marked for deletion."""
75
- pass
76
-
77
- @abstractmethod
78
- def _validate(self):
79
- """Validate a single aggregated object prior to adding it into the repository."""
80
- pass
81
-
82
-
83
- class AbstractQueriableRepository(AbstractRepository):
84
-
85
- @abstractmethod
86
- def find(self):
87
- """Apply filtering to get multiple aggregated objects fitting a description."""
88
- pass
89
-
90
- # Operation History Entry
91
-
92
- class OperationHistoryEntry:
93
- def __init__(self, timestamp: datetime, collection_name: str, operation: str, **kwargs):
94
- assert isinstance(timestamp, datetime)
95
- self.timestamp = timestamp
96
- self.collection_name = collection_name
97
- if not operation in ["added", "removed"]:
98
- raise OperationHistoryEntryValueError(f"operation must be one of 'added' or 'removed', not '{operation}'")
99
- self.operation = operation
100
- for key, value in kwargs.items():
101
- setattr(self, key, value)
102
-
103
- def __repr__(self):
104
- repstr = f"OperationHistoryEntry(timestamp={self.timestamp}, repository={self.collection_name}, operation={self.operation}"
105
- for attr in self.__dict__:
106
- if attr not in ["timestamp", "collection_name", "operation"]:
107
- repstr += f", {attr}={getattr(self, attr)}"
108
-
109
- def __eq__(self, other):
110
- for attr in self.__dict__:
111
- if getattr(self, attr) != getattr(other, attr):
112
- return False
113
- return True
114
-
115
- def __ne__(self, other):
116
- return not self.__eq__(other)
117
-
118
- def __gt__(self, other):
119
- return self.timestamp > other.timestamp
120
-
121
- def __geq__(self, other):
122
- return self.timestamp >= other.timestamp
123
-
124
- def __lt__(self, other):
125
- return self.timestamp < other.timestamp
126
-
127
- def __leq__(self, other):
128
- return self.timestamp <= other.timestamp
129
-
130
- def dict(self):
131
- return self.__dict__
132
-
133
- class OperationHistoryEntryValueError(ValueError):
134
- pass
135
-
136
-
137
- # ================================
138
- # Domain Model Repository
139
- # ================================
140
-
141
-
142
-
143
- class DomainRepositoryModelAlreadyExistsError(AlreadyExistsError):
144
- pass
145
-
146
- class DomainRepositoryModelNotFoundError(NotFoundError):
147
- pass
148
-
149
- class DomainRepositoryRangeError(RangeError):
150
- pass
151
-
152
- class DomainRepositoryTypeError(ArgumentTypeError):
153
- pass
154
-
155
- class DomainRepositoryValidationError(ValidationError):
156
- pass
157
-
158
- class DomainRepositoryUncaughtError(UncaughtError):
159
- pass
160
-
161
- data_identifier_regex = "^(?!.*__.*)(?!.*time_of_removal.*)(?!.*time_of_save.*)[a-z][a-z0-9_]*[a-z0-9]$"
162
-
163
- model_identifier_regex = "^(?!.*__.*)[a-z][a-z0-9_]*[a-z0-9]$"
164
-
165
-
166
-
167
- domain_model_json_schema = {
168
- "title": "Model of Model Records",
169
- "description": "A schema for validating model records which includes properties of different model types",
170
- "type": "object",
171
- "properties": {
172
- "schema_name": {
173
- "type": "string",
174
- "pattern": model_identifier_regex
175
- },
176
- "schema_title": {
177
- "type": "string",
178
- "pattern": "^[A-Za-z0-9][A-Za-z0-9 ]+[A-Za-z0-9]$"
179
- },
180
- "schema_description": {
181
- "type": "string",
182
- # pattern enforcing description cannot be empty or contain trailing whitespace
183
- "pattern": "^\\S(.*\\S)?$"
184
- },
185
- "schema_type": {
186
- "type": "string",
187
- "enum": ["property_model", "data_model", "metamodel"]
188
- },
189
- "json_schema": {
190
- "type": "object",
191
- "required": ["type"],
192
- "properties": {
193
- "type": {
194
- "oneOf": [
195
- {
196
- "type": "string"
197
- },
198
- {
199
- "type": "array",
200
- "items": {
201
- "type": "string"
202
- }
203
- }
204
- ]
205
- }
206
- }
207
- },
208
- "metamodel_ref": {
209
- "type": ["string", "null"],
210
- "pattern": model_identifier_regex
211
- },
212
- "version_timestamp": {
213
- "type": ["datetime", "integer"],
214
- "if": { "type": "integer" },
215
- "then": {
216
- "const": 0
217
- }
218
- },
219
- "time_of_save": {
220
- "type": "datetime"
221
- },
222
- "time_of_removal": {
223
- "type": ["datetime", "null"]
224
- }
225
- },
226
- "required": ["schema_name", "schema_title", "schema_description", "schema_type", "json_schema"],
227
- "allOf": [
228
- {
229
- "if": { "allOf": [
230
- {"properties": {"schema_type": {"const": "data_model"}}},
231
- {"required": ["schema_type"]},
232
- ]
233
- },
234
- "then": {
235
- "required": ["metamodel_ref"],
236
- }
237
- },
238
- # if the schema_type is metamodel or data_model, then the json schema must have type property equal to 'object'
239
- {
240
- "if": { "anyOf": [
241
- {"properties": {"schema_type": {"const": "metamodel"}}},
242
- {"properties": {"schema_type": {"const": "data_model"}}},
243
- ]
244
- },
245
- "then": {
246
- "properties": {
247
- "json_schema": {
248
- "properties": {
249
- "type": {
250
- "const": "object"
251
- }
252
- }
253
- }
254
- }
255
- }
256
- }
257
- ],
258
- "additionalProperties": False
259
- }
260
-
261
- def is_datetime(checker, instance):
262
- return isinstance(instance, type(datetime.now().astimezone()))
263
- # Create a new type checker that adds 'datetime' as a new type
264
- type_checker = jsonschema.Draft7Validator.TYPE_CHECKER.redefine("datetime", is_datetime)
265
- # Create a new validator class using the new type checker
266
- CustomValidator = jsonschema.validators.extend(
267
- jsonschema.Draft7Validator, type_checker=type_checker
268
- )
269
-
270
- class DomainModelRepository(AbstractQueriableRepository):
271
- """A repositroy for storing Domain Model Objects such as a Controlled Vocabulary or a Object Type Schema collection.
272
- """
273
- def __init__(self, model_dao, model_metaschema=domain_model_json_schema):
274
- self._dao = model_dao
275
- self._operation_history = []
276
- self._model_metaschema = model_metaschema
277
- self._arg_options = {
278
- "schema_name": (str),
279
- "model": (dict),
280
- "filter": (dict, type(None)),
281
- "projection": (dict, type(None)),
282
- }
283
- self._validator = CustomValidator
284
-
285
- def get(self, schema_name):
286
- """Get a single domain model object."""
287
- self._check_args(schema_name=schema_name)
288
- # get the model
289
- model = self._dao.get(schema_name=schema_name)
290
- if model is None:
291
- return None
292
- self._validate(model)
293
- # return the model
294
- return model
295
-
296
- def find(self, filter=None, projection=None, **kwargs):
297
- """Apply filtering to get multiple domain model objects fitting a description."""
298
- self._check_args(filter=filter, projection=projection)
299
- models = self._dao.find(filter=filter, projection=projection, **kwargs)
300
- # validate the models
301
-
302
- for model in models:
303
- self._validate(model)
304
- # return the models
305
- return models
306
-
307
- def exists(self, schema_name):
308
- """Check if a domain model object exists."""
309
- self._check_args(schema_name=schema_name)
310
- return self._dao.exists(schema_name=schema_name)
311
-
312
- def add(self, model):
313
- """Add a single domain model object to the repository."""
314
- self._check_args(model=model)
315
- ohe = OperationHistoryEntry(self.timestamp(), self._dao.collection_name, "added", schema_name=model["schema_name"], has_file=False)
316
- # validate the model
317
- self._validate(model)
318
- if self._dao.exists(schema_name=model["schema_name"]):
319
- raise DomainRepositoryModelAlreadyExistsError(f"A model with schema_name '{model['schema_name']}' already exists in the repository.")
320
- try:
321
- self._dao.add(document=model, timestamp=ohe.timestamp)
322
- except Exception as e:
323
- raise DomainRepositoryUncaughtError(f"An uncaught error occurred while adding the model to the repository.\n\nTraceback: {e}")
324
- self._operation_history.append(ohe)
325
- return ohe
326
-
327
- def remove(self, schema_name):
328
- """Mark a single domain model object for deletion; remove it from the scope of get and list searches."""
329
- self._check_args(schema_name=schema_name)
330
- ohe = OperationHistoryEntry(self.timestamp(), self._dao.collection_name, "removed", schema_name=schema_name, has_file=False)
331
- if not self._dao.exists(schema_name=schema_name):
332
- raise DomainRepositoryModelNotFoundError(f"A model with schema_name '{schema_name}' does not exist in the repository.")
333
- try:
334
- self._dao.mark_for_deletion(schema_name=schema_name, timestamp=ohe.timestamp)
335
- except Exception as e:
336
- raise DomainRepositoryUncaughtError(f"An uncaught error occurred while marking the model for deletion.\n\nTraceback: {e}")
337
- self._operation_history.append(ohe)
338
- return ohe
339
-
340
- def undo(self):
341
- """Undo most recent CRUD operation."""
342
- try:
343
- ohe = self._operation_history[-1]
344
- if ohe is None:
345
- return None
346
- except IndexError:
347
- return None
348
- now = self.timestamp()
349
- if ohe.operation=="removed":
350
- self._dao.restore(
351
- schema_name = ohe.schema_name,
352
- nth_most_recent = 1
353
- )
354
- elif ohe.operation=="added":
355
- self._dao.mark_for_deletion(schema_name = ohe.schema_name,
356
- timestamp = ohe.timestamp)
357
- # remove the operation history entry after successfully undoing the operation
358
- self._operation_history.pop()
359
- return ohe
360
-
361
- def undo_all(self):
362
- """Undo all CRUD operations in self._operation_history."""
363
- undone_operations = []
364
- while len(self._operation_history) > 0:
365
- operation = self.undo()
366
- undone_operations.append(operation)
367
- return undone_operations
368
-
369
- def clear_operation_history(self):
370
- """Clear the history of CRUD operations."""
371
- self._operation_history = []
372
-
373
- def list_marked_for_deletion(self):
374
- """List domain model objects marked for deletion."""
375
- try:
376
- return self._dao.list_marked_for_deletion()
377
- except Exception as e:
378
- raise DomainRepositoryUncaughtError(f"An uncaught error occurred while listing the terms marked for deletion.\n\nTraceback: {e}")
379
-
380
- def purge(self, time_threshold=None):
381
- """Purge (permanently delete) domain model objects marked for deletion."""
382
- try:
383
- self._dao.purge(time_threshold)
384
- except Exception as e:
385
- raise DomainRepositoryUncaughtError(f"An uncaught error occurred while purging the repository.\n\nTraceback: {e}")
386
-
387
- def _validate(self, model):
388
- """Validate a single domain model object prior to adding it into the repository."""
389
- try:
390
- validator = self._get_validator(self._model_metaschema)
391
- validator.validate(model)
392
- except jsonschema.exceptions.ValidationError as e:
393
- message = self._validation_error_message(e, model, self._model_metaschema)
394
- raise DomainRepositoryValidationError(message)
395
- # if the model has a metamodel_ref property
396
- # check that the metamodel_ref exists in the repository
397
- # and if so, validate the model against its metamodel
398
- if model.get("metamodel_ref") is not None:
399
- metamodel_ref = model.get("metamodel_ref")
400
- if not self._dao.exists(schema_name=metamodel_ref):
401
- raise DomainRepositoryValidationError(f"The metamodel_ref '{metamodel_ref}' does not exist in the repository.")
402
- metamodel = self._dao.get(schema_name=metamodel_ref)
403
- metaschema = metamodel.get("json_schema")
404
- try:
405
- validator = self._get_validator(metaschema)
406
- validator.validate(model)
407
- except jsonschema.exceptions.ValidationError as e:
408
- message = self._validation_error_message(e, model, metaschema)
409
- raise DomainRepositoryValidationError(message)
410
-
411
-
412
- def _get_validator(self, schema):
413
- """Get a validator for a schema."""
414
- return self._validator(schema)
415
-
416
-
417
- def _validation_error_message(self, e, model, schema):
418
- """Get an enhanced validation error message."""
419
-
420
- message = f"\n\nValidation Error\n-------------------\n\n"
421
- message += f"Message: {e.message}\n\n"
422
- message += f"Instance: {e.instance}\n\n"
423
- message += f"Path: {e.path}\n\n"
424
- message += f"Relative Path: {e.relative_path}\n\n"
425
- message += f"Absolute Path: {e.absolute_path}\n\n"
426
- message += f"Schema Path: {e.schema_path}\n\n"
427
- message += f"Local Schema: {e.schema}\n\n"
428
- message += f"Args: {e.args}\n\n"
429
- message += f"Cause: {e.cause}\n\n"
430
- message += f"Context: {e.context}\n\n"
431
- message += f"Validator: {e.validator}\n\n"
432
- message += f"Validator Value: {e.validator_value}\n\n"
433
- message += f"Model: {model}\n\n"
434
- message += f"Full Schema: {schema}\n\n"
435
-
436
- return message
437
-
438
- def _check_args(self, **kwargs):
439
- for key, value in kwargs.items():
440
- if not isinstance(value, self.arg_options[key]):
441
- raise DomainRepositoryTypeError(f"{key} must be of type {self._arg_options[key]}, not {type(value)}")
442
-
443
- @property
444
- def arg_options(self):
445
- return self._arg_options
446
-
447
- # ================================
448
- # Data Repository
449
- # ================================
450
-
451
- class DataRepositoryAlreadyExistsError(AlreadyExistsError):
452
- pass
453
-
454
- class DataRepositoryNotFoundError(NotFoundError):
455
- pass
456
-
457
- class DataRepositoryRangeError(RangeError):
458
- pass
459
-
460
- class DataRepositoryTypeError(ArgumentTypeError):
461
- pass
462
-
463
- class DataRepositoryValidationError(ValidationError):
464
- pass
465
-
466
- class DataRepositoryUncaughtError(UncaughtError):
467
- pass
468
-
469
- class DataRepository(AbstractQueriableRepository):
470
- # only indexes on schema_ref, data_name, and version_timestamp
471
- """A repository for records such as session metadata, data array metadata and object state metadata."""
472
- def __init__(self, record_dao, file_dao, domain_repo):
473
- self._records = record_dao
474
- self._data = file_dao
475
- self._domain_models = domain_repo
476
- self._operation_history = []
477
- self._validator = CustomValidator
478
-
479
-
480
- def get(self, schema_ref, data_name, nth_most_recent=None, version_timestamp=0, data_adapter=None, validate=True):
481
- """Get a single record."""
482
- # if argument is a dict, try unpacking it
483
- if not nth_most_recent is None and nth_most_recent < 1:
484
- raise DataRepositoryRangeError(f"nth_most_recent must be an integer greater than 0, not {nth_most_recent}.")
485
- self._check_args(
486
- schema_ref=schema_ref,
487
- data_name=data_name,
488
- version_timestamp=version_timestamp
489
- )
490
- if nth_most_recent is not None and version_timestamp==0:
491
- record = self._records.find(filter={"schema_ref": schema_ref, "data_name": data_name}, sort=[("version_timestamp", 1)], limit=nth_most_recent)[-1]
492
- version_timestamp = record.get("version_timestamp")
493
- else:
494
- record = self._records.get(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp)
495
- if record is None:
496
- return None
497
- if validate:
498
- self._validate(record)
499
- has_file = record.get("has_file")
500
- if has_file:
501
- data = self._data.get(
502
- schema_ref=schema_ref,
503
- data_name=data_name,
504
- version_timestamp=version_timestamp,
505
- data_adapter=data_adapter
506
- )
507
- if data is None:
508
- raise DataRepositoryNotFoundError(f"Data for record with schema_ref '{schema_ref}', data_name '{data_name}', and version_timestamp '{version_timestamp}' is missing its file. The record exists and has the 'has_file' attribute set to True, but the file data access object returned None.")
509
- # check that data.attrs is a subset of the record's attrs
510
- attr_keys = set(data.attrs.keys())
511
- record_keys = set(record.keys())
512
- if not attr_keys.issubset(record_keys):
513
- raise DataRepositoryValidationError(f"The data.attrs keys {attr_keys} are not a subset of the record keys {record_keys}. The difference is {attr_keys.difference(record_keys)}.")
514
- #data.attrs.update(record)
515
- return data
516
- else:
517
- return record
518
-
519
- def find(self, filter=None, projection=None, sort=None, limit=None, get_data=False, validate=True):
520
- """Apply filtering to get multiple records fitting a description."""
521
- self._check_args(
522
- filter=filter,
523
- projection=projection)
524
- if sort is not None and limit is not None:
525
- records = self._records.find(filter=filter, projection=projection).sort(sort).limit(limit)
526
- elif sort is not None:
527
- records = self._records.find(filter=filter, projection=projection).sort(sort)
528
- elif limit is not None:
529
- records = self._records.find(filter=filter, projection=projection).limit(limit)
530
- else:
531
- records = self._records.find(filter=filter, projection=projection)
532
- # validate the records
533
- if validate:
534
- for record in records:
535
- self._validate(record)
536
- if get_data:
537
- data = []
538
- for record in records:
539
- if record.get("has_file"):
540
- schema_ref = record.get("schema_ref")
541
- data_name = record.get("data_name")
542
- version_timestamp = record.get("version_timestamp")
543
- data_object = self._data.get(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp)
544
- data.append(data_object)
545
- else:
546
- data.append(record)
547
- return data
548
- else:
549
- return records
550
-
551
- def exists(self, schema_ref, data_name, version_timestamp=0):
552
- """Check if a record exists.
553
- Caveats:
554
- Does not check if data files have been lost.
555
- """
556
- self._check_args(
557
- schema_ref=schema_ref,
558
- data_name=data_name,
559
- version_timestamp=version_timestamp)
560
- record_exists = self._records.exists(version_timestamp=version_timestamp, schema_ref=schema_ref, data_name=data_name)
561
- return record_exists
562
-
563
- def has_file(self, schema_ref, data_name, version_timestamp=0):
564
- """Check if a record has data."""
565
- self._check_args(
566
- schema_ref=schema_ref,
567
- data_name=data_name,
568
- version_timestamp=version_timestamp)
569
- has_file = self._data.exists(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp)
570
- return has_file
571
-
572
- def add(self, object, data_adapter=None, versioning_on=False):
573
- """Add a single object to the repository."""
574
- add_timestamp = self.timestamp()
575
- dttype = type(datetime.now().astimezone())
576
- if isinstance(object, dict):
577
- if versioning_on and not isinstance(object.get("version_timestamp"), dttype):
578
- object["version_timestamp"] = add_timestamp
579
- elif not versioning_on:
580
- object["version_timestamp"] = 0
581
- elif not isinstance(object.get("version_timestamp"), dttype):
582
- raise DataRepositoryTypeError(f"'version_timestamp' must be a {dttype} object or the integer 0, not {type(object.get('version_timestamp'))}.")
583
- ohe = self._add_record(
584
- object=object,
585
- add_timestamp=add_timestamp,
586
- versioning_on=versioning_on
587
- )
588
- return ohe
589
- elif hasattr(object, "attrs"):
590
- if versioning_on and not isinstance(object.attrs.get("version_timestamp"), dttype):
591
- object.attrs["version_timestamp"] = add_timestamp
592
- elif not versioning_on:
593
- object.attrs["version_timestamp"] = 0
594
- elif not isinstance(object.attrs.get("version_timestamp"), dttype):
595
- raise DataRepositoryTypeError(f"'version_timestamp' must be a {dttype} object or the integer 0, not {type(object.attrs.get('version_timestamp'))}.")
596
- ohe = self._add_data_with_file(
597
- object=object,
598
- add_timestamp=add_timestamp,
599
- versioning_on=versioning_on,
600
- data_adapter=data_adapter
601
- )
602
- return ohe
603
- else:
604
- raise DataRepositoryTypeError(f"object must be a dict or an object with an 'attrs' attribute, not {type(object)}")
605
-
606
- def _add_record(self, object, add_timestamp, versioning_on):
607
- ohe = OperationHistoryEntry(
608
- add_timestamp,
609
- self._records.collection_name,
610
- "added",
611
- schema_ref=object["schema_ref"],
612
- data_name=object["data_name"],
613
- version_timestamp=object["version_timestamp"],
614
- data_adapter = None,
615
- has_file = False)
616
- if object.get("has_file") is None:
617
- object['has_file'] = False
618
- self._validate(object)
619
- self._records.add(
620
- document=object,
621
- timestamp=add_timestamp,
622
- versioning_on=versioning_on
623
- )
624
- self._operation_history.append(ohe)
625
- return ohe
626
-
627
- def _add_data_with_file(self, object, add_timestamp, versioning_on, data_adapter=None):
628
- if data_adapter is None:
629
- data_adapter = self._data._default_data_adapter
630
- ohe = OperationHistoryEntry(
631
- add_timestamp,
632
- self._records.collection_name, "added",
633
- schema_ref=object.attrs["schema_ref"],
634
- data_name=object.attrs["data_name"],
635
- has_file = True,
636
- data_adapter = data_adapter,
637
- version_timestamp=object.attrs["version_timestamp"]
638
- )
639
- if object.attrs.get("has_file") is None:
640
- object.attrs["has_file"] = True
641
- self._validate(object.attrs)
642
- self._records.add(
643
- document=object.attrs,
644
- timestamp=ohe.timestamp,
645
- versioning_on=versioning_on
646
- )
647
- self._data.add(
648
- data_object=object,
649
- data_adapter=data_adapter
650
- )
651
- self._operation_history.append(ohe)
652
- return ohe
653
-
654
- def remove(self, schema_ref, data_name, version_timestamp=0, data_adapter=None):
655
- """Mark a single record for deletion; remove it from the scope of get and list searches."""
656
- self._check_args(
657
- schema_ref=schema_ref,
658
- data_name=data_name,
659
- version_timestamp=version_timestamp)
660
- if not self._records.exists(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp):
661
- raise DataRepositoryNotFoundError(f"A record with schema_ref '{schema_ref}', data_name '{data_name}', and version_timestamp '{version_timestamp}' does not exist in the repository.")
662
- has_file = self._data.exists(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp, data_adapter=data_adapter)
663
-
664
- ohe = OperationHistoryEntry(
665
- self.timestamp(),
666
- self._records.collection_name,
667
- "removed", schema_ref=schema_ref,
668
- data_name=data_name,
669
- version_timestamp=version_timestamp,
670
- has_file=has_file,
671
- data_adapter=data_adapter
672
- )
673
-
674
- if has_file:
675
- if data_adapter is None:
676
- data_adapter = self._data._default_data_adapter
677
- self._data.mark_for_deletion(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp, time_of_removal=ohe.timestamp, data_adapter=data_adapter)
678
- self._records.mark_for_deletion(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp, timestamp=ohe.timestamp)
679
- self._operation_history.append(ohe)
680
- return ohe
681
-
682
-
683
- def undo(self):
684
- try:
685
- ohe = self._operation_history[-1]
686
- except IndexError:
687
- return None
688
- if ohe.operation=="removed":
689
- self._records.restore(
690
- schema_ref = ohe.schema_ref,
691
- data_name = ohe.data_name,
692
- version_timestamp = ohe.version_timestamp,
693
- nth_most_recent = 1
694
- )
695
- if ohe.has_file:
696
- self._data.restore(
697
- schema_ref = ohe.schema_ref,
698
- data_name = ohe.data_name,
699
- version_timestamp = ohe.version_timestamp,
700
- data_adapter = ohe.data_adapter,
701
- nth_most_recent = 1
702
- )
703
- elif ohe.operation=="added":
704
- self._records.mark_for_deletion(schema_ref = ohe.schema_ref,
705
- data_name = ohe.data_name,
706
- version_timestamp = ohe.version_timestamp,
707
- timestamp = ohe.timestamp
708
- )
709
- if ohe.has_file:
710
- self._data.mark_for_deletion(
711
- schema_ref = ohe.schema_ref,
712
- data_name = ohe.data_name,
713
- version_timestamp = ohe.version_timestamp,
714
- time_of_removal = ohe.timestamp,
715
- data_adapter = ohe.data_adapter
716
- )
717
- # remove the operation history entry after successfully undoing the operation
718
- self._operation_history.pop()
719
- return ohe
720
-
721
- def undo_all(self):
722
- """Undo all CRUD operations in self._operation_history."""
723
- undone_operations = []
724
- while len(self._operation_history) > 0:
725
- operation = self.undo()
726
- undone_operations.append(operation)
727
- return undone_operations
728
-
729
- def clear_operation_history(self):
730
- """Clear the history of CRUD operations."""
731
- self._operation_history = []
732
-
733
- def list_marked_for_deletion(self, time_threshold=None):
734
- """List records marked for deletion."""
735
- self._check_args(time_threshold=time_threshold)
736
- records = self._records.list_marked_for_deletion(time_threshold=time_threshold)
737
- paths = self._data.list_marked_for_deletion(time_threshold=time_threshold)
738
- records_with_paths = []
739
- records_without_paths = []
740
- for record in records:
741
- if record.get("has_file"):
742
- records_with_paths.append(record)
743
- else:
744
- records_without_paths.append(record)
745
- tuples_with_paths = list(zip(records_with_paths, paths))
746
- tuples_without_paths = list(zip(records_without_paths, [None]*len(records_without_paths)))
747
- tuples = tuples_with_paths + tuples_without_paths
748
- tuples.sort(key=lambda x: x[0].get("time_of_removal"))
749
- return tuples
750
-
751
- def purge(self, time_threshold=None):
752
- """Purge (permanently delete) records marked for deletion."""
753
- self._records.purge(time_threshold=time_threshold)
754
- self._data.purge(time_threshold=time_threshold)
755
-
756
- def _validate(self, record):
757
- """Validate a single object prior to adding it into the repository."""
758
- schema_ref = record.get("schema_ref")
759
- # get teh main domain model using the schema_ref
760
- domain_model = self._domain_models.get(schema_name=schema_ref)
761
- # check that the schema_ref exists in the repository
762
- if domain_model is None:
763
- raise DataRepositoryValidationError(f"The schema_ref '{schema_ref}' does not exist in the repository. The original record is\n\n{record}.")
764
- # get the json schema from the domain model and try to validate the record
765
- record_json_schema = domain_model.get("json_schema")
766
- try:
767
- validator = self._get_validator(record_json_schema)
768
- validator.validate(record)
769
- except jsonschema.exceptions.ValidationError as e:
770
- message = self._validation_error_message(e, record, record_json_schema)
771
- raise DataRepositoryValidationError(message)
772
- # if the record has passed overall validation, then check that each property is valid
773
- # each property should have a corresponding domain model with the same schema_name as the property name
774
- def validate_property(property_name, value):
775
- # special case: if the property name ends with "_data_ref", then we use the "data_ref" domain property model
776
- # we do not expect a specific for each *_data_ref property name
777
- if property_name.endswith("_data_ref"):
778
- property_model = self._domain_models.get(schema_name="data_ref")
779
- else:
780
- # if this property is not a special case, then we expect a domain model with the same schema_name as the property name
781
- property_model = self._domain_models.get(schema_name=property_name)
782
- if property_model is None:
783
- raise DataRepositoryValidationError(f"The property '{property_name}' does not exist in the controlled vocabulary. The original record is\n\n{record}.")
784
- property_json_schema = property_model.get("json_schema")
785
- try:
786
- validator = self._get_validator(property_json_schema)
787
- validator.validate(value)
788
- except jsonschema.exceptions.ValidationError as e:
789
- message = self._validation_error_message(e, record, property_json_schema, property_name)
790
- raise DataRepositoryValidationError(message)
791
- for key, value in record.items():
792
- validate_property(key, value)
793
-
794
- def _check_args(self, **kwargs):
795
- for key, value in kwargs.items():
796
- if not isinstance(value, self._arg_options[key]):
797
- raise DataRepositoryTypeError(f"{key} must be of type {self._arg_options[key]}, not {type(value)}")
798
-
799
- @property
800
- def _arg_options(self):
801
- return {
802
- "schema_ref": (str),
803
- "data_name": (str),
804
- "version_timestamp": (datetime, int),
805
- "time_threshold": (datetime, type(None)),
806
- "filter": (dict, type(None)),
807
- "projection": (dict, type(None)),
808
- }
809
-
810
- def _get_validator(self, schema):
811
- """Get a validator for a schema."""
812
- return self._validator(schema)
813
-
814
- def _validation_error_message(self, e, record, property_json_schema, property_name=None):
815
- """Get an enhanced validation error message."""
816
-
817
- message = f"\n\nDataRepositoryValidationError\n-------------------\n\n"
818
- if property_name is not None:
819
- message += f"Property Name: {property_name}\n\n"
820
- message += f"Message: {e.message}\n\n"
821
- message += f"Instance: {e.instance}\n\n"
822
- message += f"Path: {e.path}\n\n"
823
- message += f"Relative Path: {e.relative_path}\n\n"
824
- message += f"Absolute Path: {e.absolute_path}\n\n"
825
- message += f"Schema Path: {e.schema_path}\n\n"
826
- message += f"Local Schema: {e.schema}\n\n"
827
- message += f"Args: {e.args}\n\n"
828
- message += f"Cause: {e.cause}\n\n"
829
- message += f"Context: {e.context}\n\n"
830
- message += f"Validator: {e.validator}\n\n"
831
- message += f"Validator Value: {e.validator_value}\n\n"
832
- message += f"Record: {record}\n\n"
833
- message += f"Full Schema: {property_json_schema}\n\n"
834
-
835
- return message
836
-
837
-
838
- # ================================
839
- # In Memory Repository
840
- # ================================
841
-
842
- class InMemoryRepositoryAlreadyExistsError(AlreadyExistsError):
843
- pass
844
-
845
- class InMemoryRepositoryNotFoundError(NotFoundError):
846
- pass
847
-
848
- class InMemoryRepositoryRangeError(RangeError):
849
- pass
850
-
851
- class InMemoryRepositoryTypeError(ArgumentTypeError):
852
- pass
853
-
854
- class InMemoryRepositoryValidationError(ValidationError):
855
- pass
856
-
857
- class InMemoryRepositoryUncaughtError(UncaughtError):
858
- pass
859
-
860
- class InMemoryObjectRepository(AbstractRepository):
861
- """Repository for storing objects in memory."""
862
- def __init__(self, memory_dao):
863
- self._dao = memory_dao
864
- self._operation_history = []
865
-
866
- def get(self, schema_ref, object_name):
867
- """Get a single object."""
868
- self._check_args(schema_ref=schema_ref, object_name=object_name)
869
- return self._dao.get(schema_ref, object_name)
870
-
871
- def exists(self, schema_ref, object_name):
872
- """Check if an object exists."""
873
- self._check_args(schema_ref=schema_ref, object_name=object_name)
874
- return self._dao.exists(schema_ref, object_name)
875
-
876
- def add(self, schema_ref, object_name, object):
877
- """Add a single object to the repository."""
878
- self._check_args(schema_ref=schema_ref, object_name=object_name, object=object)
879
- if self._dao.exists(schema_ref, object_name):
880
- raise InMemoryRepositoryAlreadyExistsError(f"An object with data_name '{object_name}' already exists in the repository.")
881
- ohe = OperationHistoryEntry(self.timestamp(), self._dao.collection_name, "added", schema_ref=schema_ref, object_name=object_name, has_file=False)
882
- self._dao.add(schema_ref, object_name, object)
883
- self._operation_history.append(ohe)
884
- return ohe
885
-
886
- def remove(self, schema_ref, object_name):
887
- """Mark a single object for deletion; remove it from the scope of get and list searches."""
888
- self._check_args(schema_ref=schema_ref, object_name=object_name)
889
- ohe = OperationHistoryEntry(self.timestamp(), self._dao.collection_name, "removed", schema_ref=schema_ref, object_name=object_name, has_file=False)
890
- self._dao.remove(schema_ref, object_name)
891
- self._operation_history.append(ohe)
892
- return ohe
893
-
894
- def list_marked_for_deletion(self, time_threshold=None):
895
- return super().list_marked_for_deletion(time_threshold=time_threshold)
896
-
897
- def undo(self):
898
- """Undo most recent CRUD operation."""
899
- try:
900
- ohe = self._operation_history[-1]
901
- except IndexError:
902
- raise InMemoryRepositoryRangeError(f"There are no operations in the operations history to undo. self._operations_history == {self._operation_history}")
903
- if ohe.operation=="removed":
904
- self._dao.restore(schema_ref=ohe.schema_ref, object_name=ohe.object_name)
905
- elif ohe.operation=="added":
906
- self.delete(schema_ref=ohe.schema_ref, object_name=ohe.object_name)
907
- # remove the operation history entry after successfully undoing the operation
908
- self._operation_history.pop()
909
-
910
- def undo_all(self):
911
- """Undo all CRUD operations in self._operation_history."""
912
- while len(self._operation_history) > 0:
913
- self.undo()
914
-
915
- def clear_operation_history(self):
916
- """Clear the history of CRUD operations."""
917
- self._operation_history = []
918
-
919
- def purge(self, time_threshold=None):
920
- """Purge (permanently delete) objects marked for deletion."""
921
- self._dao.purge(time_threshold)
922
-
923
- def _validate(self, obj):
924
- """Validate a single object prior to adding it into the repository."""
925
- return obj
926
-
927
-
928
-