eegdash 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/data_utils.py → data_utils.py +131 -5
- {eegdash-0.0.1.dist-info → eegdash-0.0.3.dist-info}/METADATA +75 -8
- eegdash-0.0.3.dist-info/RECORD +8 -0
- {eegdash-0.0.1.dist-info → eegdash-0.0.3.dist-info}/WHEEL +1 -1
- eegdash-0.0.3.dist-info/top_level.txt +3 -0
- main.py +199 -0
- eegdash/SignalStore/__init__.py +0 -0
- eegdash/SignalStore/signalstore/__init__.py +0 -3
- eegdash/SignalStore/signalstore/adapters/read_adapters/abstract_read_adapter.py +0 -13
- eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/schema_read_adapter.py +0 -16
- eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/vocabulary_read_adapter.py +0 -19
- eegdash/SignalStore/signalstore/adapters/read_adapters/handmade_records/excel_study_organizer_read_adapter.py +0 -114
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/axona/axona_read_adapter.py +0 -912
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/ReadIntanSpikeFile.py +0 -140
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/intan_read_adapter.py +0 -29
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/__init__.py +0 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/data_to_result.py +0 -62
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/get_bytes_per_data_block.py +0 -36
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/notch_filter.py +0 -50
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/qstring.py +0 -41
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_header.py +0 -135
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_one_data_block.py +0 -45
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/load_intan_rhd_format.py +0 -204
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/__init__.py +0 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/data_to_result.py +0 -60
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/get_bytes_per_data_block.py +0 -37
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/notch_filter.py +0 -50
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/qstring.py +0 -41
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_header.py +0 -153
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_one_data_block.py +0 -47
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/load_intan_rhs_format.py +0 -213
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/neurodata_without_borders/neurodata_without_borders_read_adapter.py +0 -14
- eegdash/SignalStore/signalstore/operations/__init__.py +0 -4
- eegdash/SignalStore/signalstore/operations/handler_executor.py +0 -22
- eegdash/SignalStore/signalstore/operations/handler_factory.py +0 -41
- eegdash/SignalStore/signalstore/operations/handlers/base_handler.py +0 -44
- eegdash/SignalStore/signalstore/operations/handlers/domain/property_model_handlers.py +0 -79
- eegdash/SignalStore/signalstore/operations/handlers/domain/schema_handlers.py +0 -3
- eegdash/SignalStore/signalstore/operations/helpers/abstract_helper.py +0 -17
- eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_extractor.py +0 -33
- eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_rawio.py +0 -165
- eegdash/SignalStore/signalstore/operations/helpers/spikeinterface_helper.py +0 -100
- eegdash/SignalStore/signalstore/operations/helpers/wrappers/neo_wrappers.py +0 -21
- eegdash/SignalStore/signalstore/operations/helpers/wrappers/nwb_wrappers.py +0 -27
- eegdash/SignalStore/signalstore/store/__init__.py +0 -8
- eegdash/SignalStore/signalstore/store/data_access_objects.py +0 -1181
- eegdash/SignalStore/signalstore/store/datafile_adapters.py +0 -131
- eegdash/SignalStore/signalstore/store/repositories.py +0 -928
- eegdash/SignalStore/signalstore/store/store_errors.py +0 -68
- eegdash/SignalStore/signalstore/store/unit_of_work.py +0 -97
- eegdash/SignalStore/signalstore/store/unit_of_work_provider.py +0 -67
- eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_recording.py +0 -1
- eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_sorter.py +0 -1
- eegdash/SignalStore/signalstore/utilities/testing/data_mocks.py +0 -513
- eegdash/SignalStore/signalstore/utilities/tools/dataarrays.py +0 -49
- eegdash/SignalStore/signalstore/utilities/tools/mongo_records.py +0 -25
- eegdash/SignalStore/signalstore/utilities/tools/operation_response.py +0 -78
- eegdash/SignalStore/signalstore/utilities/tools/purge_orchestration_response.py +0 -21
- eegdash/SignalStore/signalstore/utilities/tools/quantities.py +0 -15
- eegdash/SignalStore/signalstore/utilities/tools/strings.py +0 -38
- eegdash/SignalStore/signalstore/utilities/tools/time.py +0 -17
- eegdash/SignalStore/tests/conftest.py +0 -799
- eegdash/SignalStore/tests/data/valid_data/data_arrays/make_fake_data.py +0 -59
- eegdash/SignalStore/tests/unit/store/conftest.py +0 -0
- eegdash/SignalStore/tests/unit/store/test_data_access_objects.py +0 -1235
- eegdash/SignalStore/tests/unit/store/test_repositories.py +0 -1309
- eegdash/SignalStore/tests/unit/store/test_unit_of_work.py +0 -7
- eegdash/SignalStore/tests/unit/test_ci_cd.py +0 -8
- eegdash/aws_ingest.py +0 -29
- eegdash/main.py +0 -17
- eegdash/signalstore_data_utils.py +0 -280
- eegdash-0.0.1.dist-info/RECORD +0 -72
- eegdash-0.0.1.dist-info/top_level.txt +0 -1
- /eegdash/__init__.py → /__init__.py +0 -0
- {eegdash-0.0.1.dist-info → eegdash-0.0.3.dist-info}/LICENSE +0 -0
|
@@ -1,928 +0,0 @@
|
|
|
1
|
-
from eegdash.SignalStore.signalstore.store.data_access_objects import *
|
|
2
|
-
from eegdash.SignalStore.signalstore.store.store_errors import *
|
|
3
|
-
from eegdash.SignalStore.signalstore.utilities.tools.strings import contains_regex_characters
|
|
4
|
-
|
|
5
|
-
from abc import ABC, abstractmethod
|
|
6
|
-
import jsonschema
|
|
7
|
-
import json
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
from time import sleep
|
|
10
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
# ================================
|
|
14
|
-
# Base and Support Classes
|
|
15
|
-
# ================================
|
|
16
|
-
|
|
17
|
-
class AbstractRepository(ABC):
|
|
18
|
-
|
|
19
|
-
# Data Retrieval Operations
|
|
20
|
-
|
|
21
|
-
@abstractmethod
|
|
22
|
-
def get(self):
|
|
23
|
-
"""Get a single aggregated object."""
|
|
24
|
-
pass
|
|
25
|
-
|
|
26
|
-
@abstractmethod
|
|
27
|
-
def exists(self):
|
|
28
|
-
"""Check if an aggregated object exists."""
|
|
29
|
-
pass
|
|
30
|
-
|
|
31
|
-
# Operations That Modify the Repository
|
|
32
|
-
|
|
33
|
-
@abstractmethod
|
|
34
|
-
def add(self):
|
|
35
|
-
"""Add a single aggregated object to all relevant collections."""
|
|
36
|
-
pass
|
|
37
|
-
|
|
38
|
-
@abstractmethod
|
|
39
|
-
def remove(self):
|
|
40
|
-
"""Mark a single aggregated object for deletion in all relevant collections."""
|
|
41
|
-
pass
|
|
42
|
-
|
|
43
|
-
# Tracking Operations That Modify the Repository
|
|
44
|
-
def timestamp(self):
|
|
45
|
-
"""Get a timestamp to use for tracking and sorting CRUD operations.
|
|
46
|
-
"""
|
|
47
|
-
return datetime.now().astimezone(timezone.utc)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
@abstractmethod
|
|
51
|
-
def undo(self):
|
|
52
|
-
"""Undo most recent CRUD operation."""
|
|
53
|
-
pass
|
|
54
|
-
|
|
55
|
-
@abstractmethod
|
|
56
|
-
def undo_all(self):
|
|
57
|
-
"""Undo all CRUD operations in self._operation_history."""
|
|
58
|
-
pass
|
|
59
|
-
|
|
60
|
-
@abstractmethod
|
|
61
|
-
def clear_operation_history(self):
|
|
62
|
-
"""Clear the history of tracked operations."""
|
|
63
|
-
pass
|
|
64
|
-
|
|
65
|
-
# Purging removed Objects
|
|
66
|
-
|
|
67
|
-
@abstractmethod
|
|
68
|
-
def list_marked_for_deletion(self):
|
|
69
|
-
"""List aggregated objects marked for deletion."""
|
|
70
|
-
pass
|
|
71
|
-
|
|
72
|
-
@abstractmethod
|
|
73
|
-
def purge(self):
|
|
74
|
-
"""Purge (permanently delete) aggregated objects marked for deletion."""
|
|
75
|
-
pass
|
|
76
|
-
|
|
77
|
-
@abstractmethod
|
|
78
|
-
def _validate(self):
|
|
79
|
-
"""Validate a single aggregated object prior to adding it into the repository."""
|
|
80
|
-
pass
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
class AbstractQueriableRepository(AbstractRepository):
|
|
84
|
-
|
|
85
|
-
@abstractmethod
|
|
86
|
-
def find(self):
|
|
87
|
-
"""Apply filtering to get multiple aggregated objects fitting a description."""
|
|
88
|
-
pass
|
|
89
|
-
|
|
90
|
-
# Operation History Entry
|
|
91
|
-
|
|
92
|
-
class OperationHistoryEntry:
|
|
93
|
-
def __init__(self, timestamp: datetime, collection_name: str, operation: str, **kwargs):
|
|
94
|
-
assert isinstance(timestamp, datetime)
|
|
95
|
-
self.timestamp = timestamp
|
|
96
|
-
self.collection_name = collection_name
|
|
97
|
-
if not operation in ["added", "removed"]:
|
|
98
|
-
raise OperationHistoryEntryValueError(f"operation must be one of 'added' or 'removed', not '{operation}'")
|
|
99
|
-
self.operation = operation
|
|
100
|
-
for key, value in kwargs.items():
|
|
101
|
-
setattr(self, key, value)
|
|
102
|
-
|
|
103
|
-
def __repr__(self):
|
|
104
|
-
repstr = f"OperationHistoryEntry(timestamp={self.timestamp}, repository={self.collection_name}, operation={self.operation}"
|
|
105
|
-
for attr in self.__dict__:
|
|
106
|
-
if attr not in ["timestamp", "collection_name", "operation"]:
|
|
107
|
-
repstr += f", {attr}={getattr(self, attr)}"
|
|
108
|
-
|
|
109
|
-
def __eq__(self, other):
|
|
110
|
-
for attr in self.__dict__:
|
|
111
|
-
if getattr(self, attr) != getattr(other, attr):
|
|
112
|
-
return False
|
|
113
|
-
return True
|
|
114
|
-
|
|
115
|
-
def __ne__(self, other):
|
|
116
|
-
return not self.__eq__(other)
|
|
117
|
-
|
|
118
|
-
def __gt__(self, other):
|
|
119
|
-
return self.timestamp > other.timestamp
|
|
120
|
-
|
|
121
|
-
def __geq__(self, other):
|
|
122
|
-
return self.timestamp >= other.timestamp
|
|
123
|
-
|
|
124
|
-
def __lt__(self, other):
|
|
125
|
-
return self.timestamp < other.timestamp
|
|
126
|
-
|
|
127
|
-
def __leq__(self, other):
|
|
128
|
-
return self.timestamp <= other.timestamp
|
|
129
|
-
|
|
130
|
-
def dict(self):
|
|
131
|
-
return self.__dict__
|
|
132
|
-
|
|
133
|
-
class OperationHistoryEntryValueError(ValueError):
|
|
134
|
-
pass
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
# ================================
|
|
138
|
-
# Domain Model Repository
|
|
139
|
-
# ================================
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
class DomainRepositoryModelAlreadyExistsError(AlreadyExistsError):
|
|
144
|
-
pass
|
|
145
|
-
|
|
146
|
-
class DomainRepositoryModelNotFoundError(NotFoundError):
|
|
147
|
-
pass
|
|
148
|
-
|
|
149
|
-
class DomainRepositoryRangeError(RangeError):
|
|
150
|
-
pass
|
|
151
|
-
|
|
152
|
-
class DomainRepositoryTypeError(ArgumentTypeError):
|
|
153
|
-
pass
|
|
154
|
-
|
|
155
|
-
class DomainRepositoryValidationError(ValidationError):
|
|
156
|
-
pass
|
|
157
|
-
|
|
158
|
-
class DomainRepositoryUncaughtError(UncaughtError):
|
|
159
|
-
pass
|
|
160
|
-
|
|
161
|
-
data_identifier_regex = "^(?!.*__.*)(?!.*time_of_removal.*)(?!.*time_of_save.*)[a-z][a-z0-9_]*[a-z0-9]$"
|
|
162
|
-
|
|
163
|
-
model_identifier_regex = "^(?!.*__.*)[a-z][a-z0-9_]*[a-z0-9]$"
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
domain_model_json_schema = {
|
|
168
|
-
"title": "Model of Model Records",
|
|
169
|
-
"description": "A schema for validating model records which includes properties of different model types",
|
|
170
|
-
"type": "object",
|
|
171
|
-
"properties": {
|
|
172
|
-
"schema_name": {
|
|
173
|
-
"type": "string",
|
|
174
|
-
"pattern": model_identifier_regex
|
|
175
|
-
},
|
|
176
|
-
"schema_title": {
|
|
177
|
-
"type": "string",
|
|
178
|
-
"pattern": "^[A-Za-z0-9][A-Za-z0-9 ]+[A-Za-z0-9]$"
|
|
179
|
-
},
|
|
180
|
-
"schema_description": {
|
|
181
|
-
"type": "string",
|
|
182
|
-
# pattern enforcing description cannot be empty or contain trailing whitespace
|
|
183
|
-
"pattern": "^\\S(.*\\S)?$"
|
|
184
|
-
},
|
|
185
|
-
"schema_type": {
|
|
186
|
-
"type": "string",
|
|
187
|
-
"enum": ["property_model", "data_model", "metamodel"]
|
|
188
|
-
},
|
|
189
|
-
"json_schema": {
|
|
190
|
-
"type": "object",
|
|
191
|
-
"required": ["type"],
|
|
192
|
-
"properties": {
|
|
193
|
-
"type": {
|
|
194
|
-
"oneOf": [
|
|
195
|
-
{
|
|
196
|
-
"type": "string"
|
|
197
|
-
},
|
|
198
|
-
{
|
|
199
|
-
"type": "array",
|
|
200
|
-
"items": {
|
|
201
|
-
"type": "string"
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
]
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
},
|
|
208
|
-
"metamodel_ref": {
|
|
209
|
-
"type": ["string", "null"],
|
|
210
|
-
"pattern": model_identifier_regex
|
|
211
|
-
},
|
|
212
|
-
"version_timestamp": {
|
|
213
|
-
"type": ["datetime", "integer"],
|
|
214
|
-
"if": { "type": "integer" },
|
|
215
|
-
"then": {
|
|
216
|
-
"const": 0
|
|
217
|
-
}
|
|
218
|
-
},
|
|
219
|
-
"time_of_save": {
|
|
220
|
-
"type": "datetime"
|
|
221
|
-
},
|
|
222
|
-
"time_of_removal": {
|
|
223
|
-
"type": ["datetime", "null"]
|
|
224
|
-
}
|
|
225
|
-
},
|
|
226
|
-
"required": ["schema_name", "schema_title", "schema_description", "schema_type", "json_schema"],
|
|
227
|
-
"allOf": [
|
|
228
|
-
{
|
|
229
|
-
"if": { "allOf": [
|
|
230
|
-
{"properties": {"schema_type": {"const": "data_model"}}},
|
|
231
|
-
{"required": ["schema_type"]},
|
|
232
|
-
]
|
|
233
|
-
},
|
|
234
|
-
"then": {
|
|
235
|
-
"required": ["metamodel_ref"],
|
|
236
|
-
}
|
|
237
|
-
},
|
|
238
|
-
# if the schema_type is metamodel or data_model, then the json schema must have type property equal to 'object'
|
|
239
|
-
{
|
|
240
|
-
"if": { "anyOf": [
|
|
241
|
-
{"properties": {"schema_type": {"const": "metamodel"}}},
|
|
242
|
-
{"properties": {"schema_type": {"const": "data_model"}}},
|
|
243
|
-
]
|
|
244
|
-
},
|
|
245
|
-
"then": {
|
|
246
|
-
"properties": {
|
|
247
|
-
"json_schema": {
|
|
248
|
-
"properties": {
|
|
249
|
-
"type": {
|
|
250
|
-
"const": "object"
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
],
|
|
258
|
-
"additionalProperties": False
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
def is_datetime(checker, instance):
|
|
262
|
-
return isinstance(instance, type(datetime.now().astimezone()))
|
|
263
|
-
# Create a new type checker that adds 'datetime' as a new type
|
|
264
|
-
type_checker = jsonschema.Draft7Validator.TYPE_CHECKER.redefine("datetime", is_datetime)
|
|
265
|
-
# Create a new validator class using the new type checker
|
|
266
|
-
CustomValidator = jsonschema.validators.extend(
|
|
267
|
-
jsonschema.Draft7Validator, type_checker=type_checker
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
class DomainModelRepository(AbstractQueriableRepository):
|
|
271
|
-
"""A repositroy for storing Domain Model Objects such as a Controlled Vocabulary or a Object Type Schema collection.
|
|
272
|
-
"""
|
|
273
|
-
def __init__(self, model_dao, model_metaschema=domain_model_json_schema):
|
|
274
|
-
self._dao = model_dao
|
|
275
|
-
self._operation_history = []
|
|
276
|
-
self._model_metaschema = model_metaschema
|
|
277
|
-
self._arg_options = {
|
|
278
|
-
"schema_name": (str),
|
|
279
|
-
"model": (dict),
|
|
280
|
-
"filter": (dict, type(None)),
|
|
281
|
-
"projection": (dict, type(None)),
|
|
282
|
-
}
|
|
283
|
-
self._validator = CustomValidator
|
|
284
|
-
|
|
285
|
-
def get(self, schema_name):
|
|
286
|
-
"""Get a single domain model object."""
|
|
287
|
-
self._check_args(schema_name=schema_name)
|
|
288
|
-
# get the model
|
|
289
|
-
model = self._dao.get(schema_name=schema_name)
|
|
290
|
-
if model is None:
|
|
291
|
-
return None
|
|
292
|
-
self._validate(model)
|
|
293
|
-
# return the model
|
|
294
|
-
return model
|
|
295
|
-
|
|
296
|
-
def find(self, filter=None, projection=None, **kwargs):
|
|
297
|
-
"""Apply filtering to get multiple domain model objects fitting a description."""
|
|
298
|
-
self._check_args(filter=filter, projection=projection)
|
|
299
|
-
models = self._dao.find(filter=filter, projection=projection, **kwargs)
|
|
300
|
-
# validate the models
|
|
301
|
-
|
|
302
|
-
for model in models:
|
|
303
|
-
self._validate(model)
|
|
304
|
-
# return the models
|
|
305
|
-
return models
|
|
306
|
-
|
|
307
|
-
def exists(self, schema_name):
|
|
308
|
-
"""Check if a domain model object exists."""
|
|
309
|
-
self._check_args(schema_name=schema_name)
|
|
310
|
-
return self._dao.exists(schema_name=schema_name)
|
|
311
|
-
|
|
312
|
-
def add(self, model):
|
|
313
|
-
"""Add a single domain model object to the repository."""
|
|
314
|
-
self._check_args(model=model)
|
|
315
|
-
ohe = OperationHistoryEntry(self.timestamp(), self._dao.collection_name, "added", schema_name=model["schema_name"], has_file=False)
|
|
316
|
-
# validate the model
|
|
317
|
-
self._validate(model)
|
|
318
|
-
if self._dao.exists(schema_name=model["schema_name"]):
|
|
319
|
-
raise DomainRepositoryModelAlreadyExistsError(f"A model with schema_name '{model['schema_name']}' already exists in the repository.")
|
|
320
|
-
try:
|
|
321
|
-
self._dao.add(document=model, timestamp=ohe.timestamp)
|
|
322
|
-
except Exception as e:
|
|
323
|
-
raise DomainRepositoryUncaughtError(f"An uncaught error occurred while adding the model to the repository.\n\nTraceback: {e}")
|
|
324
|
-
self._operation_history.append(ohe)
|
|
325
|
-
return ohe
|
|
326
|
-
|
|
327
|
-
def remove(self, schema_name):
|
|
328
|
-
"""Mark a single domain model object for deletion; remove it from the scope of get and list searches."""
|
|
329
|
-
self._check_args(schema_name=schema_name)
|
|
330
|
-
ohe = OperationHistoryEntry(self.timestamp(), self._dao.collection_name, "removed", schema_name=schema_name, has_file=False)
|
|
331
|
-
if not self._dao.exists(schema_name=schema_name):
|
|
332
|
-
raise DomainRepositoryModelNotFoundError(f"A model with schema_name '{schema_name}' does not exist in the repository.")
|
|
333
|
-
try:
|
|
334
|
-
self._dao.mark_for_deletion(schema_name=schema_name, timestamp=ohe.timestamp)
|
|
335
|
-
except Exception as e:
|
|
336
|
-
raise DomainRepositoryUncaughtError(f"An uncaught error occurred while marking the model for deletion.\n\nTraceback: {e}")
|
|
337
|
-
self._operation_history.append(ohe)
|
|
338
|
-
return ohe
|
|
339
|
-
|
|
340
|
-
def undo(self):
|
|
341
|
-
"""Undo most recent CRUD operation."""
|
|
342
|
-
try:
|
|
343
|
-
ohe = self._operation_history[-1]
|
|
344
|
-
if ohe is None:
|
|
345
|
-
return None
|
|
346
|
-
except IndexError:
|
|
347
|
-
return None
|
|
348
|
-
now = self.timestamp()
|
|
349
|
-
if ohe.operation=="removed":
|
|
350
|
-
self._dao.restore(
|
|
351
|
-
schema_name = ohe.schema_name,
|
|
352
|
-
nth_most_recent = 1
|
|
353
|
-
)
|
|
354
|
-
elif ohe.operation=="added":
|
|
355
|
-
self._dao.mark_for_deletion(schema_name = ohe.schema_name,
|
|
356
|
-
timestamp = ohe.timestamp)
|
|
357
|
-
# remove the operation history entry after successfully undoing the operation
|
|
358
|
-
self._operation_history.pop()
|
|
359
|
-
return ohe
|
|
360
|
-
|
|
361
|
-
def undo_all(self):
|
|
362
|
-
"""Undo all CRUD operations in self._operation_history."""
|
|
363
|
-
undone_operations = []
|
|
364
|
-
while len(self._operation_history) > 0:
|
|
365
|
-
operation = self.undo()
|
|
366
|
-
undone_operations.append(operation)
|
|
367
|
-
return undone_operations
|
|
368
|
-
|
|
369
|
-
def clear_operation_history(self):
|
|
370
|
-
"""Clear the history of CRUD operations."""
|
|
371
|
-
self._operation_history = []
|
|
372
|
-
|
|
373
|
-
def list_marked_for_deletion(self):
|
|
374
|
-
"""List domain model objects marked for deletion."""
|
|
375
|
-
try:
|
|
376
|
-
return self._dao.list_marked_for_deletion()
|
|
377
|
-
except Exception as e:
|
|
378
|
-
raise DomainRepositoryUncaughtError(f"An uncaught error occurred while listing the terms marked for deletion.\n\nTraceback: {e}")
|
|
379
|
-
|
|
380
|
-
def purge(self, time_threshold=None):
|
|
381
|
-
"""Purge (permanently delete) domain model objects marked for deletion."""
|
|
382
|
-
try:
|
|
383
|
-
self._dao.purge(time_threshold)
|
|
384
|
-
except Exception as e:
|
|
385
|
-
raise DomainRepositoryUncaughtError(f"An uncaught error occurred while purging the repository.\n\nTraceback: {e}")
|
|
386
|
-
|
|
387
|
-
def _validate(self, model):
|
|
388
|
-
"""Validate a single domain model object prior to adding it into the repository."""
|
|
389
|
-
try:
|
|
390
|
-
validator = self._get_validator(self._model_metaschema)
|
|
391
|
-
validator.validate(model)
|
|
392
|
-
except jsonschema.exceptions.ValidationError as e:
|
|
393
|
-
message = self._validation_error_message(e, model, self._model_metaschema)
|
|
394
|
-
raise DomainRepositoryValidationError(message)
|
|
395
|
-
# if the model has a metamodel_ref property
|
|
396
|
-
# check that the metamodel_ref exists in the repository
|
|
397
|
-
# and if so, validate the model against its metamodel
|
|
398
|
-
if model.get("metamodel_ref") is not None:
|
|
399
|
-
metamodel_ref = model.get("metamodel_ref")
|
|
400
|
-
if not self._dao.exists(schema_name=metamodel_ref):
|
|
401
|
-
raise DomainRepositoryValidationError(f"The metamodel_ref '{metamodel_ref}' does not exist in the repository.")
|
|
402
|
-
metamodel = self._dao.get(schema_name=metamodel_ref)
|
|
403
|
-
metaschema = metamodel.get("json_schema")
|
|
404
|
-
try:
|
|
405
|
-
validator = self._get_validator(metaschema)
|
|
406
|
-
validator.validate(model)
|
|
407
|
-
except jsonschema.exceptions.ValidationError as e:
|
|
408
|
-
message = self._validation_error_message(e, model, metaschema)
|
|
409
|
-
raise DomainRepositoryValidationError(message)
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
def _get_validator(self, schema):
|
|
413
|
-
"""Get a validator for a schema."""
|
|
414
|
-
return self._validator(schema)
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
def _validation_error_message(self, e, model, schema):
|
|
418
|
-
"""Get an enhanced validation error message."""
|
|
419
|
-
|
|
420
|
-
message = f"\n\nValidation Error\n-------------------\n\n"
|
|
421
|
-
message += f"Message: {e.message}\n\n"
|
|
422
|
-
message += f"Instance: {e.instance}\n\n"
|
|
423
|
-
message += f"Path: {e.path}\n\n"
|
|
424
|
-
message += f"Relative Path: {e.relative_path}\n\n"
|
|
425
|
-
message += f"Absolute Path: {e.absolute_path}\n\n"
|
|
426
|
-
message += f"Schema Path: {e.schema_path}\n\n"
|
|
427
|
-
message += f"Local Schema: {e.schema}\n\n"
|
|
428
|
-
message += f"Args: {e.args}\n\n"
|
|
429
|
-
message += f"Cause: {e.cause}\n\n"
|
|
430
|
-
message += f"Context: {e.context}\n\n"
|
|
431
|
-
message += f"Validator: {e.validator}\n\n"
|
|
432
|
-
message += f"Validator Value: {e.validator_value}\n\n"
|
|
433
|
-
message += f"Model: {model}\n\n"
|
|
434
|
-
message += f"Full Schema: {schema}\n\n"
|
|
435
|
-
|
|
436
|
-
return message
|
|
437
|
-
|
|
438
|
-
def _check_args(self, **kwargs):
|
|
439
|
-
for key, value in kwargs.items():
|
|
440
|
-
if not isinstance(value, self.arg_options[key]):
|
|
441
|
-
raise DomainRepositoryTypeError(f"{key} must be of type {self._arg_options[key]}, not {type(value)}")
|
|
442
|
-
|
|
443
|
-
@property
|
|
444
|
-
def arg_options(self):
|
|
445
|
-
return self._arg_options
|
|
446
|
-
|
|
447
|
-
# ================================
|
|
448
|
-
# Data Repository
|
|
449
|
-
# ================================
|
|
450
|
-
|
|
451
|
-
class DataRepositoryAlreadyExistsError(AlreadyExistsError):
|
|
452
|
-
pass
|
|
453
|
-
|
|
454
|
-
class DataRepositoryNotFoundError(NotFoundError):
|
|
455
|
-
pass
|
|
456
|
-
|
|
457
|
-
class DataRepositoryRangeError(RangeError):
|
|
458
|
-
pass
|
|
459
|
-
|
|
460
|
-
class DataRepositoryTypeError(ArgumentTypeError):
|
|
461
|
-
pass
|
|
462
|
-
|
|
463
|
-
class DataRepositoryValidationError(ValidationError):
|
|
464
|
-
pass
|
|
465
|
-
|
|
466
|
-
class DataRepositoryUncaughtError(UncaughtError):
|
|
467
|
-
pass
|
|
468
|
-
|
|
469
|
-
class DataRepository(AbstractQueriableRepository):
|
|
470
|
-
# only indexes on schema_ref, data_name, and version_timestamp
|
|
471
|
-
"""A repository for records such as session metadata, data array metadata and object state metadata."""
|
|
472
|
-
def __init__(self, record_dao, file_dao, domain_repo):
|
|
473
|
-
self._records = record_dao
|
|
474
|
-
self._data = file_dao
|
|
475
|
-
self._domain_models = domain_repo
|
|
476
|
-
self._operation_history = []
|
|
477
|
-
self._validator = CustomValidator
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
def get(self, schema_ref, data_name, nth_most_recent=None, version_timestamp=0, data_adapter=None, validate=True):
|
|
481
|
-
"""Get a single record."""
|
|
482
|
-
# if argument is a dict, try unpacking it
|
|
483
|
-
if not nth_most_recent is None and nth_most_recent < 1:
|
|
484
|
-
raise DataRepositoryRangeError(f"nth_most_recent must be an integer greater than 0, not {nth_most_recent}.")
|
|
485
|
-
self._check_args(
|
|
486
|
-
schema_ref=schema_ref,
|
|
487
|
-
data_name=data_name,
|
|
488
|
-
version_timestamp=version_timestamp
|
|
489
|
-
)
|
|
490
|
-
if nth_most_recent is not None and version_timestamp==0:
|
|
491
|
-
record = self._records.find(filter={"schema_ref": schema_ref, "data_name": data_name}, sort=[("version_timestamp", 1)], limit=nth_most_recent)[-1]
|
|
492
|
-
version_timestamp = record.get("version_timestamp")
|
|
493
|
-
else:
|
|
494
|
-
record = self._records.get(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp)
|
|
495
|
-
if record is None:
|
|
496
|
-
return None
|
|
497
|
-
if validate:
|
|
498
|
-
self._validate(record)
|
|
499
|
-
has_file = record.get("has_file")
|
|
500
|
-
if has_file:
|
|
501
|
-
data = self._data.get(
|
|
502
|
-
schema_ref=schema_ref,
|
|
503
|
-
data_name=data_name,
|
|
504
|
-
version_timestamp=version_timestamp,
|
|
505
|
-
data_adapter=data_adapter
|
|
506
|
-
)
|
|
507
|
-
if data is None:
|
|
508
|
-
raise DataRepositoryNotFoundError(f"Data for record with schema_ref '{schema_ref}', data_name '{data_name}', and version_timestamp '{version_timestamp}' is missing its file. The record exists and has the 'has_file' attribute set to True, but the file data access object returned None.")
|
|
509
|
-
# check that data.attrs is a subset of the record's attrs
|
|
510
|
-
attr_keys = set(data.attrs.keys())
|
|
511
|
-
record_keys = set(record.keys())
|
|
512
|
-
if not attr_keys.issubset(record_keys):
|
|
513
|
-
raise DataRepositoryValidationError(f"The data.attrs keys {attr_keys} are not a subset of the record keys {record_keys}. The difference is {attr_keys.difference(record_keys)}.")
|
|
514
|
-
#data.attrs.update(record)
|
|
515
|
-
return data
|
|
516
|
-
else:
|
|
517
|
-
return record
|
|
518
|
-
|
|
519
|
-
def find(self, filter=None, projection=None, sort=None, limit=None, get_data=False, validate=True):
|
|
520
|
-
"""Apply filtering to get multiple records fitting a description."""
|
|
521
|
-
self._check_args(
|
|
522
|
-
filter=filter,
|
|
523
|
-
projection=projection)
|
|
524
|
-
if sort is not None and limit is not None:
|
|
525
|
-
records = self._records.find(filter=filter, projection=projection).sort(sort).limit(limit)
|
|
526
|
-
elif sort is not None:
|
|
527
|
-
records = self._records.find(filter=filter, projection=projection).sort(sort)
|
|
528
|
-
elif limit is not None:
|
|
529
|
-
records = self._records.find(filter=filter, projection=projection).limit(limit)
|
|
530
|
-
else:
|
|
531
|
-
records = self._records.find(filter=filter, projection=projection)
|
|
532
|
-
# validate the records
|
|
533
|
-
if validate:
|
|
534
|
-
for record in records:
|
|
535
|
-
self._validate(record)
|
|
536
|
-
if get_data:
|
|
537
|
-
data = []
|
|
538
|
-
for record in records:
|
|
539
|
-
if record.get("has_file"):
|
|
540
|
-
schema_ref = record.get("schema_ref")
|
|
541
|
-
data_name = record.get("data_name")
|
|
542
|
-
version_timestamp = record.get("version_timestamp")
|
|
543
|
-
data_object = self._data.get(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp)
|
|
544
|
-
data.append(data_object)
|
|
545
|
-
else:
|
|
546
|
-
data.append(record)
|
|
547
|
-
return data
|
|
548
|
-
else:
|
|
549
|
-
return records
|
|
550
|
-
|
|
551
|
-
def exists(self, schema_ref, data_name, version_timestamp=0):
|
|
552
|
-
"""Check if a record exists.
|
|
553
|
-
Caveats:
|
|
554
|
-
Does not check if data files have been lost.
|
|
555
|
-
"""
|
|
556
|
-
self._check_args(
|
|
557
|
-
schema_ref=schema_ref,
|
|
558
|
-
data_name=data_name,
|
|
559
|
-
version_timestamp=version_timestamp)
|
|
560
|
-
record_exists = self._records.exists(version_timestamp=version_timestamp, schema_ref=schema_ref, data_name=data_name)
|
|
561
|
-
return record_exists
|
|
562
|
-
|
|
563
|
-
def has_file(self, schema_ref, data_name, version_timestamp=0):
|
|
564
|
-
"""Check if a record has data."""
|
|
565
|
-
self._check_args(
|
|
566
|
-
schema_ref=schema_ref,
|
|
567
|
-
data_name=data_name,
|
|
568
|
-
version_timestamp=version_timestamp)
|
|
569
|
-
has_file = self._data.exists(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp)
|
|
570
|
-
return has_file
|
|
571
|
-
|
|
572
|
-
def add(self, object, data_adapter=None, versioning_on=False):
|
|
573
|
-
"""Add a single object to the repository."""
|
|
574
|
-
add_timestamp = self.timestamp()
|
|
575
|
-
dttype = type(datetime.now().astimezone())
|
|
576
|
-
if isinstance(object, dict):
|
|
577
|
-
if versioning_on and not isinstance(object.get("version_timestamp"), dttype):
|
|
578
|
-
object["version_timestamp"] = add_timestamp
|
|
579
|
-
elif not versioning_on:
|
|
580
|
-
object["version_timestamp"] = 0
|
|
581
|
-
elif not isinstance(object.get("version_timestamp"), dttype):
|
|
582
|
-
raise DataRepositoryTypeError(f"'version_timestamp' must be a {dttype} object or the integer 0, not {type(object.get('version_timestamp'))}.")
|
|
583
|
-
ohe = self._add_record(
|
|
584
|
-
object=object,
|
|
585
|
-
add_timestamp=add_timestamp,
|
|
586
|
-
versioning_on=versioning_on
|
|
587
|
-
)
|
|
588
|
-
return ohe
|
|
589
|
-
elif hasattr(object, "attrs"):
|
|
590
|
-
if versioning_on and not isinstance(object.attrs.get("version_timestamp"), dttype):
|
|
591
|
-
object.attrs["version_timestamp"] = add_timestamp
|
|
592
|
-
elif not versioning_on:
|
|
593
|
-
object.attrs["version_timestamp"] = 0
|
|
594
|
-
elif not isinstance(object.attrs.get("version_timestamp"), dttype):
|
|
595
|
-
raise DataRepositoryTypeError(f"'version_timestamp' must be a {dttype} object or the integer 0, not {type(object.attrs.get('version_timestamp'))}.")
|
|
596
|
-
ohe = self._add_data_with_file(
|
|
597
|
-
object=object,
|
|
598
|
-
add_timestamp=add_timestamp,
|
|
599
|
-
versioning_on=versioning_on,
|
|
600
|
-
data_adapter=data_adapter
|
|
601
|
-
)
|
|
602
|
-
return ohe
|
|
603
|
-
else:
|
|
604
|
-
raise DataRepositoryTypeError(f"object must be a dict or an object with an 'attrs' attribute, not {type(object)}")
|
|
605
|
-
|
|
606
|
-
def _add_record(self, object, add_timestamp, versioning_on):
|
|
607
|
-
ohe = OperationHistoryEntry(
|
|
608
|
-
add_timestamp,
|
|
609
|
-
self._records.collection_name,
|
|
610
|
-
"added",
|
|
611
|
-
schema_ref=object["schema_ref"],
|
|
612
|
-
data_name=object["data_name"],
|
|
613
|
-
version_timestamp=object["version_timestamp"],
|
|
614
|
-
data_adapter = None,
|
|
615
|
-
has_file = False)
|
|
616
|
-
if object.get("has_file") is None:
|
|
617
|
-
object['has_file'] = False
|
|
618
|
-
self._validate(object)
|
|
619
|
-
self._records.add(
|
|
620
|
-
document=object,
|
|
621
|
-
timestamp=add_timestamp,
|
|
622
|
-
versioning_on=versioning_on
|
|
623
|
-
)
|
|
624
|
-
self._operation_history.append(ohe)
|
|
625
|
-
return ohe
|
|
626
|
-
|
|
627
|
-
def _add_data_with_file(self, object, add_timestamp, versioning_on, data_adapter=None):
|
|
628
|
-
if data_adapter is None:
|
|
629
|
-
data_adapter = self._data._default_data_adapter
|
|
630
|
-
ohe = OperationHistoryEntry(
|
|
631
|
-
add_timestamp,
|
|
632
|
-
self._records.collection_name, "added",
|
|
633
|
-
schema_ref=object.attrs["schema_ref"],
|
|
634
|
-
data_name=object.attrs["data_name"],
|
|
635
|
-
has_file = True,
|
|
636
|
-
data_adapter = data_adapter,
|
|
637
|
-
version_timestamp=object.attrs["version_timestamp"]
|
|
638
|
-
)
|
|
639
|
-
if object.attrs.get("has_file") is None:
|
|
640
|
-
object.attrs["has_file"] = True
|
|
641
|
-
self._validate(object.attrs)
|
|
642
|
-
self._records.add(
|
|
643
|
-
document=object.attrs,
|
|
644
|
-
timestamp=ohe.timestamp,
|
|
645
|
-
versioning_on=versioning_on
|
|
646
|
-
)
|
|
647
|
-
self._data.add(
|
|
648
|
-
data_object=object,
|
|
649
|
-
data_adapter=data_adapter
|
|
650
|
-
)
|
|
651
|
-
self._operation_history.append(ohe)
|
|
652
|
-
return ohe
|
|
653
|
-
|
|
654
|
-
def remove(self, schema_ref, data_name, version_timestamp=0, data_adapter=None):
|
|
655
|
-
"""Mark a single record for deletion; remove it from the scope of get and list searches."""
|
|
656
|
-
self._check_args(
|
|
657
|
-
schema_ref=schema_ref,
|
|
658
|
-
data_name=data_name,
|
|
659
|
-
version_timestamp=version_timestamp)
|
|
660
|
-
if not self._records.exists(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp):
|
|
661
|
-
raise DataRepositoryNotFoundError(f"A record with schema_ref '{schema_ref}', data_name '{data_name}', and version_timestamp '{version_timestamp}' does not exist in the repository.")
|
|
662
|
-
has_file = self._data.exists(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp, data_adapter=data_adapter)
|
|
663
|
-
|
|
664
|
-
ohe = OperationHistoryEntry(
|
|
665
|
-
self.timestamp(),
|
|
666
|
-
self._records.collection_name,
|
|
667
|
-
"removed", schema_ref=schema_ref,
|
|
668
|
-
data_name=data_name,
|
|
669
|
-
version_timestamp=version_timestamp,
|
|
670
|
-
has_file=has_file,
|
|
671
|
-
data_adapter=data_adapter
|
|
672
|
-
)
|
|
673
|
-
|
|
674
|
-
if has_file:
|
|
675
|
-
if data_adapter is None:
|
|
676
|
-
data_adapter = self._data._default_data_adapter
|
|
677
|
-
self._data.mark_for_deletion(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp, time_of_removal=ohe.timestamp, data_adapter=data_adapter)
|
|
678
|
-
self._records.mark_for_deletion(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp, timestamp=ohe.timestamp)
|
|
679
|
-
self._operation_history.append(ohe)
|
|
680
|
-
return ohe
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
def undo(self):
|
|
684
|
-
try:
|
|
685
|
-
ohe = self._operation_history[-1]
|
|
686
|
-
except IndexError:
|
|
687
|
-
return None
|
|
688
|
-
if ohe.operation=="removed":
|
|
689
|
-
self._records.restore(
|
|
690
|
-
schema_ref = ohe.schema_ref,
|
|
691
|
-
data_name = ohe.data_name,
|
|
692
|
-
version_timestamp = ohe.version_timestamp,
|
|
693
|
-
nth_most_recent = 1
|
|
694
|
-
)
|
|
695
|
-
if ohe.has_file:
|
|
696
|
-
self._data.restore(
|
|
697
|
-
schema_ref = ohe.schema_ref,
|
|
698
|
-
data_name = ohe.data_name,
|
|
699
|
-
version_timestamp = ohe.version_timestamp,
|
|
700
|
-
data_adapter = ohe.data_adapter,
|
|
701
|
-
nth_most_recent = 1
|
|
702
|
-
)
|
|
703
|
-
elif ohe.operation=="added":
|
|
704
|
-
self._records.mark_for_deletion(schema_ref = ohe.schema_ref,
|
|
705
|
-
data_name = ohe.data_name,
|
|
706
|
-
version_timestamp = ohe.version_timestamp,
|
|
707
|
-
timestamp = ohe.timestamp
|
|
708
|
-
)
|
|
709
|
-
if ohe.has_file:
|
|
710
|
-
self._data.mark_for_deletion(
|
|
711
|
-
schema_ref = ohe.schema_ref,
|
|
712
|
-
data_name = ohe.data_name,
|
|
713
|
-
version_timestamp = ohe.version_timestamp,
|
|
714
|
-
time_of_removal = ohe.timestamp,
|
|
715
|
-
data_adapter = ohe.data_adapter
|
|
716
|
-
)
|
|
717
|
-
# remove the operation history entry after successfully undoing the operation
|
|
718
|
-
self._operation_history.pop()
|
|
719
|
-
return ohe
|
|
720
|
-
|
|
721
|
-
def undo_all(self):
|
|
722
|
-
"""Undo all CRUD operations in self._operation_history."""
|
|
723
|
-
undone_operations = []
|
|
724
|
-
while len(self._operation_history) > 0:
|
|
725
|
-
operation = self.undo()
|
|
726
|
-
undone_operations.append(operation)
|
|
727
|
-
return undone_operations
|
|
728
|
-
|
|
729
|
-
def clear_operation_history(self):
|
|
730
|
-
"""Clear the history of CRUD operations."""
|
|
731
|
-
self._operation_history = []
|
|
732
|
-
|
|
733
|
-
def list_marked_for_deletion(self, time_threshold=None):
|
|
734
|
-
"""List records marked for deletion."""
|
|
735
|
-
self._check_args(time_threshold=time_threshold)
|
|
736
|
-
records = self._records.list_marked_for_deletion(time_threshold=time_threshold)
|
|
737
|
-
paths = self._data.list_marked_for_deletion(time_threshold=time_threshold)
|
|
738
|
-
records_with_paths = []
|
|
739
|
-
records_without_paths = []
|
|
740
|
-
for record in records:
|
|
741
|
-
if record.get("has_file"):
|
|
742
|
-
records_with_paths.append(record)
|
|
743
|
-
else:
|
|
744
|
-
records_without_paths.append(record)
|
|
745
|
-
tuples_with_paths = list(zip(records_with_paths, paths))
|
|
746
|
-
tuples_without_paths = list(zip(records_without_paths, [None]*len(records_without_paths)))
|
|
747
|
-
tuples = tuples_with_paths + tuples_without_paths
|
|
748
|
-
tuples.sort(key=lambda x: x[0].get("time_of_removal"))
|
|
749
|
-
return tuples
|
|
750
|
-
|
|
751
|
-
def purge(self, time_threshold=None):
|
|
752
|
-
"""Purge (permanently delete) records marked for deletion."""
|
|
753
|
-
self._records.purge(time_threshold=time_threshold)
|
|
754
|
-
self._data.purge(time_threshold=time_threshold)
|
|
755
|
-
|
|
756
|
-
def _validate(self, record):
|
|
757
|
-
"""Validate a single object prior to adding it into the repository."""
|
|
758
|
-
schema_ref = record.get("schema_ref")
|
|
759
|
-
# get teh main domain model using the schema_ref
|
|
760
|
-
domain_model = self._domain_models.get(schema_name=schema_ref)
|
|
761
|
-
# check that the schema_ref exists in the repository
|
|
762
|
-
if domain_model is None:
|
|
763
|
-
raise DataRepositoryValidationError(f"The schema_ref '{schema_ref}' does not exist in the repository. The original record is\n\n{record}.")
|
|
764
|
-
# get the json schema from the domain model and try to validate the record
|
|
765
|
-
record_json_schema = domain_model.get("json_schema")
|
|
766
|
-
try:
|
|
767
|
-
validator = self._get_validator(record_json_schema)
|
|
768
|
-
validator.validate(record)
|
|
769
|
-
except jsonschema.exceptions.ValidationError as e:
|
|
770
|
-
message = self._validation_error_message(e, record, record_json_schema)
|
|
771
|
-
raise DataRepositoryValidationError(message)
|
|
772
|
-
# if the record has passed overall validation, then check that each property is valid
|
|
773
|
-
# each property should have a corresponding domain model with the same schema_name as the property name
|
|
774
|
-
def validate_property(property_name, value):
|
|
775
|
-
# special case: if the property name ends with "_data_ref", then we use the "data_ref" domain property model
|
|
776
|
-
# we do not expect a specific for each *_data_ref property name
|
|
777
|
-
if property_name.endswith("_data_ref"):
|
|
778
|
-
property_model = self._domain_models.get(schema_name="data_ref")
|
|
779
|
-
else:
|
|
780
|
-
# if this property is not a special case, then we expect a domain model with the same schema_name as the property name
|
|
781
|
-
property_model = self._domain_models.get(schema_name=property_name)
|
|
782
|
-
if property_model is None:
|
|
783
|
-
raise DataRepositoryValidationError(f"The property '{property_name}' does not exist in the controlled vocabulary. The original record is\n\n{record}.")
|
|
784
|
-
property_json_schema = property_model.get("json_schema")
|
|
785
|
-
try:
|
|
786
|
-
validator = self._get_validator(property_json_schema)
|
|
787
|
-
validator.validate(value)
|
|
788
|
-
except jsonschema.exceptions.ValidationError as e:
|
|
789
|
-
message = self._validation_error_message(e, record, property_json_schema, property_name)
|
|
790
|
-
raise DataRepositoryValidationError(message)
|
|
791
|
-
for key, value in record.items():
|
|
792
|
-
validate_property(key, value)
|
|
793
|
-
|
|
794
|
-
def _check_args(self, **kwargs):
|
|
795
|
-
for key, value in kwargs.items():
|
|
796
|
-
if not isinstance(value, self._arg_options[key]):
|
|
797
|
-
raise DataRepositoryTypeError(f"{key} must be of type {self._arg_options[key]}, not {type(value)}")
|
|
798
|
-
|
|
799
|
-
@property
|
|
800
|
-
def _arg_options(self):
|
|
801
|
-
return {
|
|
802
|
-
"schema_ref": (str),
|
|
803
|
-
"data_name": (str),
|
|
804
|
-
"version_timestamp": (datetime, int),
|
|
805
|
-
"time_threshold": (datetime, type(None)),
|
|
806
|
-
"filter": (dict, type(None)),
|
|
807
|
-
"projection": (dict, type(None)),
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
def _get_validator(self, schema):
|
|
811
|
-
"""Get a validator for a schema."""
|
|
812
|
-
return self._validator(schema)
|
|
813
|
-
|
|
814
|
-
def _validation_error_message(self, e, record, property_json_schema, property_name=None):
|
|
815
|
-
"""Get an enhanced validation error message."""
|
|
816
|
-
|
|
817
|
-
message = f"\n\nDataRepositoryValidationError\n-------------------\n\n"
|
|
818
|
-
if property_name is not None:
|
|
819
|
-
message += f"Property Name: {property_name}\n\n"
|
|
820
|
-
message += f"Message: {e.message}\n\n"
|
|
821
|
-
message += f"Instance: {e.instance}\n\n"
|
|
822
|
-
message += f"Path: {e.path}\n\n"
|
|
823
|
-
message += f"Relative Path: {e.relative_path}\n\n"
|
|
824
|
-
message += f"Absolute Path: {e.absolute_path}\n\n"
|
|
825
|
-
message += f"Schema Path: {e.schema_path}\n\n"
|
|
826
|
-
message += f"Local Schema: {e.schema}\n\n"
|
|
827
|
-
message += f"Args: {e.args}\n\n"
|
|
828
|
-
message += f"Cause: {e.cause}\n\n"
|
|
829
|
-
message += f"Context: {e.context}\n\n"
|
|
830
|
-
message += f"Validator: {e.validator}\n\n"
|
|
831
|
-
message += f"Validator Value: {e.validator_value}\n\n"
|
|
832
|
-
message += f"Record: {record}\n\n"
|
|
833
|
-
message += f"Full Schema: {property_json_schema}\n\n"
|
|
834
|
-
|
|
835
|
-
return message
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
# ================================
|
|
839
|
-
# In Memory Repository
|
|
840
|
-
# ================================
|
|
841
|
-
|
|
842
|
-
class InMemoryRepositoryAlreadyExistsError(AlreadyExistsError):
|
|
843
|
-
pass
|
|
844
|
-
|
|
845
|
-
class InMemoryRepositoryNotFoundError(NotFoundError):
|
|
846
|
-
pass
|
|
847
|
-
|
|
848
|
-
class InMemoryRepositoryRangeError(RangeError):
|
|
849
|
-
pass
|
|
850
|
-
|
|
851
|
-
class InMemoryRepositoryTypeError(ArgumentTypeError):
|
|
852
|
-
pass
|
|
853
|
-
|
|
854
|
-
class InMemoryRepositoryValidationError(ValidationError):
|
|
855
|
-
pass
|
|
856
|
-
|
|
857
|
-
class InMemoryRepositoryUncaughtError(UncaughtError):
|
|
858
|
-
pass
|
|
859
|
-
|
|
860
|
-
class InMemoryObjectRepository(AbstractRepository):
|
|
861
|
-
"""Repository for storing objects in memory."""
|
|
862
|
-
def __init__(self, memory_dao):
|
|
863
|
-
self._dao = memory_dao
|
|
864
|
-
self._operation_history = []
|
|
865
|
-
|
|
866
|
-
def get(self, schema_ref, object_name):
|
|
867
|
-
"""Get a single object."""
|
|
868
|
-
self._check_args(schema_ref=schema_ref, object_name=object_name)
|
|
869
|
-
return self._dao.get(schema_ref, object_name)
|
|
870
|
-
|
|
871
|
-
def exists(self, schema_ref, object_name):
|
|
872
|
-
"""Check if an object exists."""
|
|
873
|
-
self._check_args(schema_ref=schema_ref, object_name=object_name)
|
|
874
|
-
return self._dao.exists(schema_ref, object_name)
|
|
875
|
-
|
|
876
|
-
def add(self, schema_ref, object_name, object):
|
|
877
|
-
"""Add a single object to the repository."""
|
|
878
|
-
self._check_args(schema_ref=schema_ref, object_name=object_name, object=object)
|
|
879
|
-
if self._dao.exists(schema_ref, object_name):
|
|
880
|
-
raise InMemoryRepositoryAlreadyExistsError(f"An object with data_name '{object_name}' already exists in the repository.")
|
|
881
|
-
ohe = OperationHistoryEntry(self.timestamp(), self._dao.collection_name, "added", schema_ref=schema_ref, object_name=object_name, has_file=False)
|
|
882
|
-
self._dao.add(schema_ref, object_name, object)
|
|
883
|
-
self._operation_history.append(ohe)
|
|
884
|
-
return ohe
|
|
885
|
-
|
|
886
|
-
def remove(self, schema_ref, object_name):
|
|
887
|
-
"""Mark a single object for deletion; remove it from the scope of get and list searches."""
|
|
888
|
-
self._check_args(schema_ref=schema_ref, object_name=object_name)
|
|
889
|
-
ohe = OperationHistoryEntry(self.timestamp(), self._dao.collection_name, "removed", schema_ref=schema_ref, object_name=object_name, has_file=False)
|
|
890
|
-
self._dao.remove(schema_ref, object_name)
|
|
891
|
-
self._operation_history.append(ohe)
|
|
892
|
-
return ohe
|
|
893
|
-
|
|
894
|
-
def list_marked_for_deletion(self, time_threshold=None):
|
|
895
|
-
return super().list_marked_for_deletion(time_threshold=time_threshold)
|
|
896
|
-
|
|
897
|
-
def undo(self):
|
|
898
|
-
"""Undo most recent CRUD operation."""
|
|
899
|
-
try:
|
|
900
|
-
ohe = self._operation_history[-1]
|
|
901
|
-
except IndexError:
|
|
902
|
-
raise InMemoryRepositoryRangeError(f"There are no operations in the operations history to undo. self._operations_history == {self._operation_history}")
|
|
903
|
-
if ohe.operation=="removed":
|
|
904
|
-
self._dao.restore(schema_ref=ohe.schema_ref, object_name=ohe.object_name)
|
|
905
|
-
elif ohe.operation=="added":
|
|
906
|
-
self.delete(schema_ref=ohe.schema_ref, object_name=ohe.object_name)
|
|
907
|
-
# remove the operation history entry after successfully undoing the operation
|
|
908
|
-
self._operation_history.pop()
|
|
909
|
-
|
|
910
|
-
def undo_all(self):
|
|
911
|
-
"""Undo all CRUD operations in self._operation_history."""
|
|
912
|
-
while len(self._operation_history) > 0:
|
|
913
|
-
self.undo()
|
|
914
|
-
|
|
915
|
-
def clear_operation_history(self):
|
|
916
|
-
"""Clear the history of CRUD operations."""
|
|
917
|
-
self._operation_history = []
|
|
918
|
-
|
|
919
|
-
def purge(self, time_threshold=None):
|
|
920
|
-
"""Purge (permanently delete) objects marked for deletion."""
|
|
921
|
-
self._dao.purge(time_threshold)
|
|
922
|
-
|
|
923
|
-
def _validate(self, obj):
|
|
924
|
-
"""Validate a single object prior to adding it into the repository."""
|
|
925
|
-
return obj
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|