eegdash 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/SignalStore/__init__.py +0 -0
- eegdash/SignalStore/signalstore/__init__.py +3 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/abstract_read_adapter.py +13 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/schema_read_adapter.py +16 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/vocabulary_read_adapter.py +19 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/handmade_records/excel_study_organizer_read_adapter.py +114 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/axona/axona_read_adapter.py +912 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/ReadIntanSpikeFile.py +140 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/intan_read_adapter.py +29 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/__init__.py +0 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/data_to_result.py +62 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/get_bytes_per_data_block.py +36 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/notch_filter.py +50 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/qstring.py +41 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_header.py +135 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_one_data_block.py +45 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/load_intan_rhd_format.py +204 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/__init__.py +0 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/data_to_result.py +60 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/get_bytes_per_data_block.py +37 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/notch_filter.py +50 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/qstring.py +41 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_header.py +153 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_one_data_block.py +47 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/load_intan_rhs_format.py +213 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/neurodata_without_borders/neurodata_without_borders_read_adapter.py +14 -0
- eegdash/SignalStore/signalstore/operations/__init__.py +4 -0
- eegdash/SignalStore/signalstore/operations/handler_executor.py +22 -0
- eegdash/SignalStore/signalstore/operations/handler_factory.py +41 -0
- eegdash/SignalStore/signalstore/operations/handlers/base_handler.py +44 -0
- eegdash/SignalStore/signalstore/operations/handlers/domain/property_model_handlers.py +79 -0
- eegdash/SignalStore/signalstore/operations/handlers/domain/schema_handlers.py +3 -0
- eegdash/SignalStore/signalstore/operations/helpers/abstract_helper.py +17 -0
- eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_extractor.py +33 -0
- eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_rawio.py +165 -0
- eegdash/SignalStore/signalstore/operations/helpers/spikeinterface_helper.py +100 -0
- eegdash/SignalStore/signalstore/operations/helpers/wrappers/neo_wrappers.py +21 -0
- eegdash/SignalStore/signalstore/operations/helpers/wrappers/nwb_wrappers.py +27 -0
- eegdash/SignalStore/signalstore/store/__init__.py +8 -0
- eegdash/SignalStore/signalstore/store/data_access_objects.py +1181 -0
- eegdash/SignalStore/signalstore/store/datafile_adapters.py +131 -0
- eegdash/SignalStore/signalstore/store/repositories.py +928 -0
- eegdash/SignalStore/signalstore/store/store_errors.py +68 -0
- eegdash/SignalStore/signalstore/store/unit_of_work.py +97 -0
- eegdash/SignalStore/signalstore/store/unit_of_work_provider.py +67 -0
- eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_recording.py +1 -0
- eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_sorter.py +1 -0
- eegdash/SignalStore/signalstore/utilities/testing/data_mocks.py +513 -0
- eegdash/SignalStore/signalstore/utilities/tools/dataarrays.py +49 -0
- eegdash/SignalStore/signalstore/utilities/tools/mongo_records.py +25 -0
- eegdash/SignalStore/signalstore/utilities/tools/operation_response.py +78 -0
- eegdash/SignalStore/signalstore/utilities/tools/purge_orchestration_response.py +21 -0
- eegdash/SignalStore/signalstore/utilities/tools/quantities.py +15 -0
- eegdash/SignalStore/signalstore/utilities/tools/strings.py +38 -0
- eegdash/SignalStore/signalstore/utilities/tools/time.py +17 -0
- eegdash/SignalStore/tests/conftest.py +799 -0
- eegdash/SignalStore/tests/data/valid_data/data_arrays/make_fake_data.py +59 -0
- eegdash/SignalStore/tests/unit/store/conftest.py +0 -0
- eegdash/SignalStore/tests/unit/store/test_data_access_objects.py +1235 -0
- eegdash/SignalStore/tests/unit/store/test_repositories.py +1309 -0
- eegdash/SignalStore/tests/unit/store/test_unit_of_work.py +7 -0
- eegdash/SignalStore/tests/unit/test_ci_cd.py +8 -0
- eegdash/__init__.py +1 -0
- eegdash/aws_ingest.py +29 -0
- eegdash/data_utils.py +213 -0
- eegdash/main.py +17 -0
- eegdash/signalstore_data_utils.py +280 -0
- eegdash-0.0.1.dist-info/LICENSE +20 -0
- eegdash-0.0.1.dist-info/METADATA +72 -0
- eegdash-0.0.1.dist-info/RECORD +72 -0
- eegdash-0.0.1.dist-info/WHEEL +5 -0
- eegdash-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1181 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import numpy as np
|
|
6
|
+
import json
|
|
7
|
+
import traceback
|
|
8
|
+
from time import sleep
|
|
9
|
+
import xarray as xr
|
|
10
|
+
import pymongo
|
|
11
|
+
|
|
12
|
+
from eegdash.SignalStore.signalstore.store.store_errors import *
|
|
13
|
+
|
|
14
|
+
from eegdash.SignalStore.signalstore.store.datafile_adapters import AbstractDataFileAdapter, XarrayDataArrayNetCDFAdapter
|
|
15
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
16
|
+
|
|
17
|
+
class AbstractDataAccessObject(ABC):
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def get(self):
|
|
21
|
+
"""Get a single object."""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def exists(self):
|
|
26
|
+
"""Check if an object exists."""
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def add(self):
|
|
31
|
+
"""Add a single object."""
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def mark_for_deletion(self):
|
|
36
|
+
"""Mark a single object for deletion."""
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def restore(self):
|
|
41
|
+
"""Restore the most recent version of a single object."""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def list_marked_for_deletion(self):
|
|
46
|
+
"""List objects marked for deletion."""
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
def purge(self):
|
|
51
|
+
"""Purge (permanently delete) objects marked for deletion."""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
class AbstractQueriableDataAccessObject(AbstractDataAccessObject):
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def find(self):
|
|
57
|
+
"""Apply filtering to get multiple objects fitting a description."""
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ===================
|
|
62
|
+
# Data Access Objects
|
|
63
|
+
# ===================
|
|
64
|
+
class MongoDAOTypeError(TypeError):
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
class MongoDAODocumentNotFoundError(NotFoundError):
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
class MongoDAODocumentAlreadyExistsError(AlreadyExistsError):
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
class MongoDAORangeError(RangeError):
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
class MongoDAOArgumentNameError(ArgumentNameError):
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
class MongoDAOUncaughtError(UncaughtError):
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
class MongoDAO(AbstractQueriableDataAccessObject):
|
|
83
|
+
"""This class is a base class for accessing MongoDB documents.
|
|
84
|
+
It implements the basic CRUD operations in a way that simplifies the
|
|
85
|
+
implementation of specific data access objects for different types of
|
|
86
|
+
documents.
|
|
87
|
+
"""
|
|
88
|
+
def __init__(self,
|
|
89
|
+
client,
|
|
90
|
+
database_name: str,
|
|
91
|
+
collection_name: str,
|
|
92
|
+
index_fields: list
|
|
93
|
+
):
|
|
94
|
+
"""Initializes the data Base MongoDB Data Access Object."""
|
|
95
|
+
self._client = client # mongoDB client
|
|
96
|
+
self._db = client[database_name] # mongoDB database
|
|
97
|
+
self._collection = self._db[collection_name] # mongoDB collection
|
|
98
|
+
# index by index_fields
|
|
99
|
+
self._collection_name = collection_name
|
|
100
|
+
self._index_args = set(index_fields)
|
|
101
|
+
index_field_tuples = [(field, 1) for field in index_fields]
|
|
102
|
+
# add time_of_removal field to index
|
|
103
|
+
index_field_tuples.append(('version_timestamp', 1))
|
|
104
|
+
index_field_tuples.append(('time_of_removal', 1))
|
|
105
|
+
try:
|
|
106
|
+
self._collection.create_index(index_field_tuples, unique=True) # create index
|
|
107
|
+
except pymongo.errors.OperationFailure as e:
|
|
108
|
+
if "user is not allowed to do action" in str(e).lower():
|
|
109
|
+
# Skip index creation for users without write permissions
|
|
110
|
+
pass
|
|
111
|
+
else:
|
|
112
|
+
# Re-raise other operation failures that aren't permission related
|
|
113
|
+
raise
|
|
114
|
+
self._set_argument_types(index_fields)
|
|
115
|
+
|
|
116
|
+
def get(self, version_timestamp=0, **kwargs):
|
|
117
|
+
"""Gets a document from the repository.
|
|
118
|
+
Arguments:
|
|
119
|
+
**kwargs {dict} -- Only the index fields are allowed as keyword arguments.
|
|
120
|
+
Raises:
|
|
121
|
+
NotFoundError -- If the document is not found.
|
|
122
|
+
Returns:
|
|
123
|
+
dict -- The document.
|
|
124
|
+
"""
|
|
125
|
+
self._check_kwargs_are_only_index_args(**kwargs)
|
|
126
|
+
self._check_args(version_timestamp=version_timestamp,**kwargs)
|
|
127
|
+
if isinstance(version_timestamp, datetime):
|
|
128
|
+
version_timestamp = version_timestamp.astimezone(timezone.utc)
|
|
129
|
+
document = self._collection.find_one(
|
|
130
|
+
{
|
|
131
|
+
'time_of_removal': None,
|
|
132
|
+
'version_timestamp': version_timestamp,
|
|
133
|
+
**kwargs
|
|
134
|
+
},
|
|
135
|
+
{'_id': 0})
|
|
136
|
+
if document is None:
|
|
137
|
+
return None
|
|
138
|
+
else:
|
|
139
|
+
return self._deserialize(document)
|
|
140
|
+
|
|
141
|
+
def find(self, filter=None, projection=None, **kwargs):
|
|
142
|
+
"""Returns a filtered list of documents from the repository.
|
|
143
|
+
Arguments:
|
|
144
|
+
filter {dict} -- The filter to apply to the query.
|
|
145
|
+
Returns:
|
|
146
|
+
list[dict] -- The list of documents.
|
|
147
|
+
"""
|
|
148
|
+
self._check_args(filter=filter, projection=projection)
|
|
149
|
+
if filter is None:
|
|
150
|
+
filter = {"time_of_removal": None}
|
|
151
|
+
else:
|
|
152
|
+
filter = filter.copy() # avoid mutations to the input dict
|
|
153
|
+
filter["time_of_removal"] = None
|
|
154
|
+
if projection is None:
|
|
155
|
+
projection = {'_id': 0}
|
|
156
|
+
else:
|
|
157
|
+
projection = projection.copy() # avoid mutations to the input dict
|
|
158
|
+
projection['_id'] = 0
|
|
159
|
+
|
|
160
|
+
with ThreadPoolExecutor() as executor:
|
|
161
|
+
documents = list(executor.map(self._deserialize, self._collection.find(filter, projection, **kwargs)))
|
|
162
|
+
return documents
|
|
163
|
+
|
|
164
|
+
def exists(self, version_timestamp=0, **kwargs):
|
|
165
|
+
self._check_kwargs_are_only_index_args(**kwargs)
|
|
166
|
+
self._check_args(**kwargs)
|
|
167
|
+
return self._collection.find_one(
|
|
168
|
+
{
|
|
169
|
+
'time_of_removal': None,
|
|
170
|
+
'version_timestamp': version_timestamp,
|
|
171
|
+
**kwargs
|
|
172
|
+
}
|
|
173
|
+
) is not None
|
|
174
|
+
|
|
175
|
+
def add(self, document, timestamp, versioning_on=False):
|
|
176
|
+
"""Adds a document to the repository.
|
|
177
|
+
Arguments:
|
|
178
|
+
document {dict} -- The document to add.
|
|
179
|
+
timestamp {datetime.timestamp} -- The timestamp to add the document with.
|
|
180
|
+
Raises:
|
|
181
|
+
MongoDAODocumentAlreadyExistsError -- If the document already exists.
|
|
182
|
+
Returns:
|
|
183
|
+
None
|
|
184
|
+
"""
|
|
185
|
+
self._check_args(document=document, timestamp=timestamp)
|
|
186
|
+
# get the index fields from the document
|
|
187
|
+
if document.get('version_timestamp') is None or document.get('version_timestamp') == 0:
|
|
188
|
+
if versioning_on:
|
|
189
|
+
document['version_timestamp'] = timestamp
|
|
190
|
+
else:
|
|
191
|
+
document['version_timestamp'] = 0
|
|
192
|
+
document_index_args = {key: value for key, value in document.items() if key in self._index_args}
|
|
193
|
+
if self.exists(**document_index_args):
|
|
194
|
+
raise MongoDAODocumentAlreadyExistsError(
|
|
195
|
+
f'Cannot add document with index fields {document_index_args} because it already exists in repository.'
|
|
196
|
+
)
|
|
197
|
+
document = document.copy()
|
|
198
|
+
document['time_of_save'] = timestamp
|
|
199
|
+
document['time_of_removal'] = None
|
|
200
|
+
result = self._collection.insert_one(self._serialize(document))
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
def mark_for_deletion(self, timestamp, version_timestamp=0, **kwargs):
|
|
204
|
+
"""Marks a document for deletion.
|
|
205
|
+
Arguments:
|
|
206
|
+
timestamp {datetime.timestamp} -- The timestamp to mark the document for deletion with.
|
|
207
|
+
**kwargs {dict} -- Only the index fields are allowed as keyword arguments.
|
|
208
|
+
Raises:
|
|
209
|
+
MongoDAODocumentNotFoundError -- If the document does not exist.
|
|
210
|
+
Returns:
|
|
211
|
+
None
|
|
212
|
+
"""
|
|
213
|
+
sleep(0.0000001) # sleep for a very short time to avoid
|
|
214
|
+
self._check_args(timestamp=timestamp, version_timestamp=version_timestamp, **kwargs)
|
|
215
|
+
self._check_kwargs_are_only_index_args(**kwargs)
|
|
216
|
+
document = self.get(**kwargs, version_timestamp=version_timestamp)
|
|
217
|
+
if document is None:
|
|
218
|
+
raise MongoDAODocumentNotFoundError(
|
|
219
|
+
f'Cannot delete document with index fields {kwargs} and version_timestamp {version_timestamp} because it does not exist in repository.'
|
|
220
|
+
)
|
|
221
|
+
else:
|
|
222
|
+
self._collection.update_one({'time_of_removal': None, **kwargs}, {'$set':{"time_of_removal": datetime_to_microseconds(timestamp)}})
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
def list_marked_for_deletion(self, time_threshold=None):
|
|
226
|
+
"""Returns a list of all deleted documents from the repository."""
|
|
227
|
+
self._check_args(time_threshold=time_threshold)
|
|
228
|
+
if time_threshold is None:
|
|
229
|
+
documents = [self._deserialize(document) for document in self._collection.find({"time_of_removal": {"$ne": None}}, {'_id': 0}, sort=[('time_of_removal', -1)])]
|
|
230
|
+
else:
|
|
231
|
+
documents = [self._deserialize(document) for document in self._collection.find({"time_of_removal":
|
|
232
|
+
{"$lt": datetime_to_microseconds(time_threshold)}},
|
|
233
|
+
{'_id': 0},
|
|
234
|
+
sort=[('time_of_removal', -1)])]
|
|
235
|
+
return documents
|
|
236
|
+
|
|
237
|
+
def restore(self, nth_most_recent=1,**kwargs):
|
|
238
|
+
"""Restores a document.
|
|
239
|
+
Arguments:
|
|
240
|
+
timestamp {datetime.timestamp} -- The timestamp to restore the document with.
|
|
241
|
+
nth_most_recent {int} -- The nth most recent version of the deleted document to restore.
|
|
242
|
+
override_existing_document {bool} -- If True, overrides an existing document with the same index fields.
|
|
243
|
+
**kwargs {dict} -- Only the index fields are allowed as keyword arguments.
|
|
244
|
+
Raises:
|
|
245
|
+
MongoDAODocumentNotFoundError -- If the document does not exist.
|
|
246
|
+
MongoDAODocumentAlreadyExistsError -- If the document already exists and override_existing_document is False.
|
|
247
|
+
MongoDAORangeError -- If the nth_most_recent argument is out of range.
|
|
248
|
+
Returns:
|
|
249
|
+
"""
|
|
250
|
+
self._check_args(nth_most_recent=nth_most_recent, **kwargs)
|
|
251
|
+
self._check_kwargs_are_only_index_args(**kwargs)
|
|
252
|
+
# get the nth most recent version of the document with a numeric time_of_removal value
|
|
253
|
+
documents = self._collection.find({'time_of_removal': {'$ne': None}, **kwargs}, {'_id': 0}, sort=[('time_of_removal', 1)])
|
|
254
|
+
if len(list(documents)) == 0:
|
|
255
|
+
raise MongoDAORangeError(
|
|
256
|
+
f'Cannot restore document with index fields {kwargs}: no deleted instances of {kwargs} were found in repository.'
|
|
257
|
+
)
|
|
258
|
+
# check for an existing document with the same index fields and no time_of_removal value
|
|
259
|
+
document_exists = self.exists(**kwargs)
|
|
260
|
+
if document_exists:
|
|
261
|
+
raise MongoDAODocumentAlreadyExistsError(
|
|
262
|
+
f'Cannot restore document with index fields {kwargs} because it already exists in repository.'
|
|
263
|
+
)
|
|
264
|
+
# restore the document by adding it with a time_of_removal value of None and a time_of_save value of the current time
|
|
265
|
+
try:
|
|
266
|
+
nth_document = documents[nth_most_recent-1]
|
|
267
|
+
nth_doc_kwargs = {key: value for key, value in nth_document.items() if key in self._index_args}
|
|
268
|
+
except IndexError:
|
|
269
|
+
raise MongoDAORangeError(
|
|
270
|
+
f'Arg nth_most_recent={nth_most_recent} out of range. The record of deleted documents only contains {len(list(documents))} entries.'
|
|
271
|
+
)
|
|
272
|
+
# update the time_of_removal field to None
|
|
273
|
+
self._collection.update_one({'time_of_removal': nth_document['time_of_removal'], **nth_doc_kwargs}, {'$set':{"time_of_removal": None}})
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def purge(self, time_threshold=None):
|
|
278
|
+
"""Purges deleted documents from the repository older than the time threshold."""
|
|
279
|
+
self._check_args(time_threshold=time_threshold)
|
|
280
|
+
if time_threshold is None:
|
|
281
|
+
result = self._collection.delete_many({"time_of_removal": {"$ne": None}})
|
|
282
|
+
count = result.deleted_count
|
|
283
|
+
else:
|
|
284
|
+
result = self._collection.delete_many({"time_of_removal": {"$lt": datetime_to_microseconds(time_threshold)}})
|
|
285
|
+
count = result.deleted_count
|
|
286
|
+
return count
|
|
287
|
+
|
|
288
|
+
def _check_args(self, **kwargs):
|
|
289
|
+
for key, value in kwargs.items():
|
|
290
|
+
try:
|
|
291
|
+
arg_types = self._argument_types[key]
|
|
292
|
+
except KeyError:
|
|
293
|
+
raise MongoDAOArgumentNameError(
|
|
294
|
+
f'Invalid keyword argument name {key}.'
|
|
295
|
+
)
|
|
296
|
+
if not isinstance(value, arg_types) and value is not None:
|
|
297
|
+
raise MongoDAOTypeError(
|
|
298
|
+
f'Invalid type {type(value)} for argument {key}. Must be one of {arg_types}.'
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
def _check_kwargs_are_only_index_args(self, **kwargs):
|
|
302
|
+
keys = set(kwargs.keys())
|
|
303
|
+
index_2 = self._index_args - {'version_timestamp'} # sometimes we don't use version_timestamp
|
|
304
|
+
if not (self._index_args == keys or index_2 == keys):
|
|
305
|
+
raise MongoDAOArgumentNameError(
|
|
306
|
+
f"Invalid keyword arguments: {keys}.\nRequired arguments: {self._index_args - {'version_timestamp'}}.\nOptional arguments: 'version_timestamp'."
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
def _serialize(self, document):
|
|
310
|
+
"""Serializes a document object.
|
|
311
|
+
Arguments:
|
|
312
|
+
document {dict} -- The document object to serialize.
|
|
313
|
+
Returns:
|
|
314
|
+
dict -- The serialized document.
|
|
315
|
+
"""
|
|
316
|
+
result = document.copy()
|
|
317
|
+
for key, value in document.items():
|
|
318
|
+
serializer = self.property_serializers.get(key)
|
|
319
|
+
if serializer is not None:
|
|
320
|
+
try:
|
|
321
|
+
result[key] = self.property_serializers[key](value)
|
|
322
|
+
except Exception as e:
|
|
323
|
+
raise MongoDAOUncaughtError(
|
|
324
|
+
f'An error occurred while serializing property {key}\n\nof document {document}.\n\nThe error was: {e}'
|
|
325
|
+
)
|
|
326
|
+
return result
|
|
327
|
+
|
|
328
|
+
def _deserialize(self, document):
|
|
329
|
+
"""Deserializes a document object.
|
|
330
|
+
Arguments:
|
|
331
|
+
document {dict} -- The document object to deserialize.
|
|
332
|
+
Returns:
|
|
333
|
+
dict -- The deserialized document.
|
|
334
|
+
"""
|
|
335
|
+
result = document.copy()
|
|
336
|
+
for key, value in result.items():
|
|
337
|
+
deserializer = self.property_deserializers.get(key)
|
|
338
|
+
if deserializer is not None:
|
|
339
|
+
try:
|
|
340
|
+
result[key] = self.property_deserializers[key](value)
|
|
341
|
+
except Exception as e:
|
|
342
|
+
raise MongoDAOUncaughtError(
|
|
343
|
+
f'An uncaught error occurred while deserializing property: "{key}" from document: \n\n{document}.\n\nThe error was: {e}'
|
|
344
|
+
)
|
|
345
|
+
return result
|
|
346
|
+
|
|
347
|
+
@property
|
|
348
|
+
def property_serializers(self):
|
|
349
|
+
return {
|
|
350
|
+
'time_of_save': datetime_to_microseconds,
|
|
351
|
+
'time_of_removal': datetime_to_microseconds,
|
|
352
|
+
'version_timestamp': self._serialize_version_timestamp,
|
|
353
|
+
'json_schema': dict_to_json_bytes,
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
@property
|
|
357
|
+
def property_deserializers(self):
|
|
358
|
+
return {
|
|
359
|
+
'time_of_save': microseconds_to_datetime,
|
|
360
|
+
'time_of_removal': microseconds_to_datetime,
|
|
361
|
+
'version_timestamp': self._deserialize_version_timestamp,
|
|
362
|
+
'json_schema': json_bytes_to_dict,
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
@property
|
|
366
|
+
def collection_name(self):
|
|
367
|
+
return self._collection_name
|
|
368
|
+
|
|
369
|
+
def _set_argument_types(self, index_fields):
|
|
370
|
+
# set argument types dictionary for checking argument types
|
|
371
|
+
nowtype = type(datetime.now(timezone.utc))
|
|
372
|
+
self._argument_types = {
|
|
373
|
+
'version_timestamp': (nowtype, int),
|
|
374
|
+
'filter': (dict, type(None)),
|
|
375
|
+
'projection': (dict, type(None)),
|
|
376
|
+
'timestamp': (nowtype),
|
|
377
|
+
'time_threshold': (nowtype, type(None)),
|
|
378
|
+
'nth_most_recent': (int),
|
|
379
|
+
'override_existing_document': (bool),
|
|
380
|
+
'not_exist_ok': (bool),
|
|
381
|
+
'document': (dict),
|
|
382
|
+
}
|
|
383
|
+
# set all the index names as argument options with string type
|
|
384
|
+
for field in index_fields:
|
|
385
|
+
if field not in self._argument_types:
|
|
386
|
+
self._argument_types[field] = (str) # only string type because they can never be None
|
|
387
|
+
|
|
388
|
+
def _serialize_version_timestamp(self, value):
|
|
389
|
+
if value == 0 or value is None:
|
|
390
|
+
return 0
|
|
391
|
+
else:
|
|
392
|
+
try:
|
|
393
|
+
return value.astimezone(timezone.utc)
|
|
394
|
+
except AttributeError:
|
|
395
|
+
raise MongoDAOTypeError(
|
|
396
|
+
f"Invalid type {type(value)} for argument version_timestamp. Must be of type {type(datetime.now(timezone.utc))}."
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def _deserialize_version_timestamp(self, value):
|
|
400
|
+
if value == 0 or value is None:
|
|
401
|
+
return 0
|
|
402
|
+
else:
|
|
403
|
+
try:
|
|
404
|
+
return value.replace(tzinfo=timezone.utc) # we don't want to shift the time twice, since it's already in UTC (MongoDB only stores UTC time)
|
|
405
|
+
except AttributeError:
|
|
406
|
+
raise MongoDAOTypeError(
|
|
407
|
+
f"Invalid type {type(value)} for argument version_timestamp. Must be of type {type(datetime.now(timezone.utc))}."
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# ===================
|
|
411
|
+
|
|
412
|
+
class FileSystemDAOConfigError(ConfigError):
|
|
413
|
+
pass
|
|
414
|
+
|
|
415
|
+
class FileSystemDAOTypeError(ArgumentTypeError):
|
|
416
|
+
pass
|
|
417
|
+
|
|
418
|
+
class FileSystemDAOFileNotFoundError(NotFoundError):
|
|
419
|
+
pass
|
|
420
|
+
|
|
421
|
+
class FileSystemDAOFileAlreadyExistsError(AlreadyExistsError):
|
|
422
|
+
pass
|
|
423
|
+
|
|
424
|
+
class FileSystemDAORangeError(RangeError):
|
|
425
|
+
pass
|
|
426
|
+
|
|
427
|
+
class FileSystemDAOArgumentNameError(ArgumentNameError):
|
|
428
|
+
pass
|
|
429
|
+
|
|
430
|
+
class FileSystemDAOUncaughtError(UncaughtError):
|
|
431
|
+
pass
|
|
432
|
+
|
|
433
|
+
class FileSystemDAO(AbstractDataAccessObject):
|
|
434
|
+
def __init__(self, filesystem, project_dir, default_data_adapter=XarrayDataArrayNetCDFAdapter()):
|
|
435
|
+
# add / to end of directory if it doesn't already exist
|
|
436
|
+
self._fs = filesystem
|
|
437
|
+
# make sure the project directory exists
|
|
438
|
+
if not self._fs.exists(project_dir):
|
|
439
|
+
self._fs.mkdir(project_dir)
|
|
440
|
+
self._directory = project_dir
|
|
441
|
+
default_data_adapter.set_filesystem(self._fs)
|
|
442
|
+
self._default_data_adapter = default_data_adapter
|
|
443
|
+
|
|
444
|
+
def get(self, schema_ref, data_name, version_timestamp=0, nth_most_recent=1, data_adapter=None):
|
|
445
|
+
"""Gets an object from the repository.
|
|
446
|
+
Arguments:
|
|
447
|
+
schema_ref {str} -- The type of object to get.
|
|
448
|
+
data_name {str} -- The name of the object to get.
|
|
449
|
+
version_timestamp {str} -- The version_timestamp of the object to get.
|
|
450
|
+
Raises:
|
|
451
|
+
FileSystemDAOFileNotFoundError -- If the object is not found.
|
|
452
|
+
Returns:
|
|
453
|
+
dict -- The object.
|
|
454
|
+
"""
|
|
455
|
+
self._check_args(
|
|
456
|
+
schema_ref=schema_ref,
|
|
457
|
+
data_name=data_name,
|
|
458
|
+
nth_most_recent=nth_most_recent,
|
|
459
|
+
version_timestamp=version_timestamp,
|
|
460
|
+
data_adapter=data_adapter
|
|
461
|
+
)
|
|
462
|
+
if data_adapter is None:
|
|
463
|
+
data_adapter = self._default_data_adapter
|
|
464
|
+
else:
|
|
465
|
+
data_adapter.set_filesystem(self._fs)
|
|
466
|
+
path = self._get_file_path(schema_ref, data_name, version_timestamp, nth_most_recent, data_adapter)
|
|
467
|
+
if path is None:
|
|
468
|
+
return None
|
|
469
|
+
data_object = data_adapter.read_file(path)
|
|
470
|
+
data_object = self._deserialize(data_object)
|
|
471
|
+
return data_object
|
|
472
|
+
|
|
473
|
+
def _get_file_path(self, schema_ref, data_name, version_timestamp, nth_most_recent, data_adapter):
|
|
474
|
+
if data_adapter is None:
|
|
475
|
+
data_adapter = self._default_data_adapter
|
|
476
|
+
else:
|
|
477
|
+
data_adapter.set_filesystem(self._fs)
|
|
478
|
+
path = self.make_filepath(schema_ref, data_name, version_timestamp, data_adapter)
|
|
479
|
+
if not self._fs.exists(path):
|
|
480
|
+
# if the version_timestamp was specified, that's the only version we want to get
|
|
481
|
+
# if it doesn't exist, we check if the precision was too high
|
|
482
|
+
if isinstance(version_timestamp, datetime):
|
|
483
|
+
# try searching for the most recent version that matches up to millisecond precision
|
|
484
|
+
ms_vts = str(datetime_to_microseconds(version_timestamp))[:-3]
|
|
485
|
+
# find any file matching __version_{ms_vts} in the filename
|
|
486
|
+
pattern = self._directory + '/' + self.make_base_filename(schema_ref, data_name) + f'__version_{ms_vts}[0-9][0-9][0-9]' + data_adapter.file_extension
|
|
487
|
+
glob = self._fs.glob(pattern)
|
|
488
|
+
paths = list(filter(lambda path: '_time_of_removal_' not in path, glob))
|
|
489
|
+
# sort by version
|
|
490
|
+
paths.sort()
|
|
491
|
+
if len(glob) == 0:
|
|
492
|
+
wrong_file_pattern = self._directory + '/' + self.make_base_filename(schema_ref, data_name) + f'__version_{ms_vts}[0-9][0-9][0-9]*'
|
|
493
|
+
wrong_file_glob = self._fs.glob(wrong_file_pattern)
|
|
494
|
+
bad_paths = list(filter(lambda path: '_time_of_removal_' not in path, wrong_file_glob))
|
|
495
|
+
if len(bad_paths) == 0:
|
|
496
|
+
return None
|
|
497
|
+
else:
|
|
498
|
+
raise FileSystemDAOFileNotFoundError(
|
|
499
|
+
f"Cannot find a file fitting any pattern like {pattern}. However, there may be a file with a different file extension matching the pattern. The following files were found: {bad_paths}."
|
|
500
|
+
)
|
|
501
|
+
return None
|
|
502
|
+
else:
|
|
503
|
+
path = glob[0]
|
|
504
|
+
# if the version_timestamp was 0 (not specified) then we get the nth_most_recent not deleted one
|
|
505
|
+
else:
|
|
506
|
+
basename = self.make_base_filename(schema_ref, data_name)
|
|
507
|
+
pattern = self._directory + '/' + basename + '*_version_*' + data_adapter.file_extension
|
|
508
|
+
glob = self._fs.glob(pattern)
|
|
509
|
+
# filter out the paths that have a time_of_removal value
|
|
510
|
+
paths = list(filter(lambda path: '_time_of_removal_' not in path, glob))
|
|
511
|
+
if len(paths) == 0:
|
|
512
|
+
return None
|
|
513
|
+
else:
|
|
514
|
+
# get the most recent version
|
|
515
|
+
paths.sort()
|
|
516
|
+
try:
|
|
517
|
+
path = paths[-nth_most_recent]
|
|
518
|
+
except IndexError:
|
|
519
|
+
return None
|
|
520
|
+
return path
|
|
521
|
+
|
|
522
|
+
def exists(self, schema_ref, data_name, version_timestamp=0, data_adapter=None):
|
|
523
|
+
"""Checks if an object exists in the repository.
|
|
524
|
+
Arguments:
|
|
525
|
+
schema_ref {str} -- The type of object to check.
|
|
526
|
+
data_name {str} -- The name of the object to check.
|
|
527
|
+
version_timestamp {str} -- The version of the object to check.
|
|
528
|
+
Returns:
|
|
529
|
+
bool -- True if the object exists, else False.
|
|
530
|
+
"""
|
|
531
|
+
self._check_args(schema_ref=schema_ref, data_name=data_name, version_timestamp=version_timestamp, data_adapter=data_adapter)
|
|
532
|
+
# try getting the object
|
|
533
|
+
try:
|
|
534
|
+
return self.get(schema_ref, data_name, version_timestamp, data_adapter=data_adapter) is not None
|
|
535
|
+
except FileSystemDAOFileNotFoundError as e:
|
|
536
|
+
raise FileSystemDAOFileNotFoundError(
|
|
537
|
+
f"An error occurred while checking if the object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} exists in the repository. Traceback was: {traceback.format_exc()}"
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
def n_versions(self, schema_ref, data_name):
|
|
541
|
+
"""Returns the number of versions of an object in the repository."""
|
|
542
|
+
self._check_args(schema_ref=schema_ref, data_name=data_name)
|
|
543
|
+
pattern = self.make_base_filename(schema_ref, data_name) + '*'
|
|
544
|
+
globstring = self._directory + '/' + pattern
|
|
545
|
+
glob = self._fs.glob(globstring)
|
|
546
|
+
# filter out the paths that have a time_of_removal value
|
|
547
|
+
paths = filter(lambda path: '_time_of_removal_' not in path, glob)
|
|
548
|
+
return len(list(paths))
|
|
549
|
+
|
|
550
|
+
def add(self, data_object, data_adapter=None):
|
|
551
|
+
"""Adds an object to the repository.
|
|
552
|
+
Arguments:
|
|
553
|
+
data_object -- The object to add.
|
|
554
|
+
data_adapter -- The data adapter to use.
|
|
555
|
+
If none, the default data adapter is used.
|
|
556
|
+
The default data adapter is given to the constructor.
|
|
557
|
+
Raises:
|
|
558
|
+
FileSystemDAOFileAlreadyExistsError -- If the object already exists.
|
|
559
|
+
Returns:
|
|
560
|
+
None
|
|
561
|
+
"""
|
|
562
|
+
self._check_args(data_adapter=data_adapter)
|
|
563
|
+
if data_adapter is None:
|
|
564
|
+
data_adapter = self._default_data_adapter
|
|
565
|
+
else:
|
|
566
|
+
data_adapter.set_filesystem(self._fs)
|
|
567
|
+
# separately check object using data adapter
|
|
568
|
+
if not isinstance(data_object, data_adapter.data_object_type):
|
|
569
|
+
raise FileSystemDAOTypeError(
|
|
570
|
+
f"Type mismatch: Received {type(data_object).__name__}, but expected {data_adapter.data_object_type.__name__}.Ensure 'data_object' matches the type required by the current 'data_adapter'. If you're using the default data adapter, it may not be compatible with 'data_object'. Consider specifying a different data adapter that accepts {type(data_object).__name__}. Current data_adapter type: {type(data_adapter).__name__}."
|
|
571
|
+
)
|
|
572
|
+
if data_object.attrs.get('version_timestamp') is None:
|
|
573
|
+
data_object.attrs['version_timestamp'] = 0
|
|
574
|
+
idkwargs = data_adapter.get_id_kwargs(data_object) # (schema_ref, data_name, version_timestamp)
|
|
575
|
+
path = self.make_filepath(**idkwargs, data_adapter=data_adapter)
|
|
576
|
+
|
|
577
|
+
if self.exists(**idkwargs, data_adapter=data_adapter):
|
|
578
|
+
raise FileSystemDAOFileAlreadyExistsError(
|
|
579
|
+
f'Cannot add object with path "{path}" because it already exists in repository.'
|
|
580
|
+
)
|
|
581
|
+
data_object = self._serialize(data_object)
|
|
582
|
+
#get os environment variable 'DEBUG' to check if we should print the data_object
|
|
583
|
+
data_adapter.write_file(path=path, data_object=data_object)
|
|
584
|
+
self._deserialize(data_object) # undo the serialization in case the object is mutated
|
|
585
|
+
return None
|
|
586
|
+
|
|
587
|
+
def mark_for_deletion(self, schema_ref, data_name, time_of_removal, version_timestamp=0, data_adapter=None):
|
|
588
|
+
"""Marks an object for deletion.
|
|
589
|
+
Arguments:
|
|
590
|
+
schema_ref {str} -- The type of object to mark for deletion.
|
|
591
|
+
data_name {str} -- The name of the object to mark for deletion.
|
|
592
|
+
time_of_removal {datetime.timestamp} -- The timestamp to mark the object for deletion with.
|
|
593
|
+
version_timestamp {str} -- The version_timestamp of the object to mark for deletion.
|
|
594
|
+
data_adapter {AbstractDataFileAdapter} -- The data adapter to use.
|
|
595
|
+
If none, the default data adapter is used.
|
|
596
|
+
The default data adapter is given to the constructor.
|
|
597
|
+
Raises:
|
|
598
|
+
FileSystemDAOFileNotFoundError -- If the object does not exist.
|
|
599
|
+
Returns:
|
|
600
|
+
None
|
|
601
|
+
"""
|
|
602
|
+
sleep(0.0000001) # to prevent non-unique time_of_removal values
|
|
603
|
+
self._check_args(time_of_removal=time_of_removal,
|
|
604
|
+
schema_ref=schema_ref,
|
|
605
|
+
data_name=data_name,
|
|
606
|
+
version_timestamp=version_timestamp,
|
|
607
|
+
data_adapter=data_adapter)
|
|
608
|
+
if data_adapter is None:
|
|
609
|
+
data_adapter = self._default_data_adapter
|
|
610
|
+
else:
|
|
611
|
+
data_adapter.set_filesystem(self._fs)
|
|
612
|
+
path = self._get_file_path(schema_ref, data_name, version_timestamp, 1, data_adapter)
|
|
613
|
+
if path is None:
|
|
614
|
+
raise FileSystemDAOFileNotFoundError(
|
|
615
|
+
f'Cannot remove object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} because it does not exist in repository. The path would have been {self.make_filepath(schema_ref, data_name, version_timestamp, data_adapter)} if this error had not occurred. The exists method returned False.'
|
|
616
|
+
)
|
|
617
|
+
# insert __time_of_removal_{time_of_removal} into the filename before the file extension
|
|
618
|
+
if not data_adapter.file_extension == '':
|
|
619
|
+
new_path = path.replace(data_adapter.file_extension, f'__time_of_removal_{datetime_to_microseconds(time_of_removal)}{data_adapter.file_extension}')
|
|
620
|
+
else:
|
|
621
|
+
new_path = path + f'__time_of_removal_{datetime_to_microseconds(time_of_removal)}'
|
|
622
|
+
if self._fs.exists(new_path):
|
|
623
|
+
raise FileSystemDAOFileAlreadyExistsError(
|
|
624
|
+
f'Cannot mark object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} as marked for deletion because the path {new_path} already exists in repository. The time_of_removal may need to be updated to a more recent time.'
|
|
625
|
+
)
|
|
626
|
+
# check if path is file or directory
|
|
627
|
+
try:
|
|
628
|
+
self._fs.mv(path1=str(path), path2=str(new_path), recursive=True)
|
|
629
|
+
except Exception as e:
|
|
630
|
+
trace = traceback.format_exc()
|
|
631
|
+
raise FileSystemDAOUncaughtError(
|
|
632
|
+
f'An error occurred while renaming the object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} as marked for deletion. The old path was {path} and the new (trash) path was going to be {new_path} The error was: {e} and the traceback was \n\n{trace}'
|
|
633
|
+
)
|
|
634
|
+
return None
|
|
635
|
+
|
|
636
|
+
def list_marked_for_deletion(self, time_threshold=None):
|
|
637
|
+
"""Returns a list of all deleted objects from the repository.
|
|
638
|
+
Arguments:
|
|
639
|
+
time_threshold {datetime.timestamp} -- The time threshold.
|
|
640
|
+
data_adapter {AbstractDataFileAdapter} -- The data adapter to use.
|
|
641
|
+
Returns:
|
|
642
|
+
list[dict] -- The list of deleted objects.
|
|
643
|
+
"""
|
|
644
|
+
self._check_args(
|
|
645
|
+
time_threshold=time_threshold,
|
|
646
|
+
)
|
|
647
|
+
glob_pattern = self._directory + '/*_time_of_removal_*'
|
|
648
|
+
paths = self._fs.glob(glob_pattern)
|
|
649
|
+
if time_threshold is None:
|
|
650
|
+
result = list(paths)
|
|
651
|
+
else:
|
|
652
|
+
result = []
|
|
653
|
+
for path in paths:
|
|
654
|
+
tor = self._get_time_of_removal_from_path(path)
|
|
655
|
+
if tor <= time_threshold:
|
|
656
|
+
result.append(path)
|
|
657
|
+
return result
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def restore(self, schema_ref, data_name, version_timestamp=0, nth_most_recent=1, data_adapter=None):
|
|
661
|
+
"""Restores an object.
|
|
662
|
+
Arguments:
|
|
663
|
+
schema_ref {str} -- The type of object to restore.
|
|
664
|
+
data_name {str} -- The name of the object to restore.
|
|
665
|
+
version_timestamp {str} -- The version of the object to restore.
|
|
666
|
+
time_of_removal {datetime.timestamp} -- The timestamp to restore the object with.
|
|
667
|
+
nth_most_recent {int} -- The nth most recent version of the object to restore.
|
|
668
|
+
data_adapter {AbstractDataFileAdapter} -- The data adapter to use.
|
|
669
|
+
Raises:
|
|
670
|
+
FileSystemDAOFileNotFoundError -- If the object does not exist.
|
|
671
|
+
FileSystemDAOFileAlreadyExistsError -- If the object already exists and override_existing_object is False.
|
|
672
|
+
FileSystemDAORangeError -- If the nth_most_recent argument is out of range.
|
|
673
|
+
Returns:
|
|
674
|
+
None
|
|
675
|
+
"""
|
|
676
|
+
self._check_args(schema_ref=schema_ref,
|
|
677
|
+
data_name=data_name,
|
|
678
|
+
version_timestamp=version_timestamp,
|
|
679
|
+
nth_most_recent=nth_most_recent,
|
|
680
|
+
data_adapter=data_adapter)
|
|
681
|
+
if not nth_most_recent > 0:
|
|
682
|
+
raise FileSystemDAORangeError(
|
|
683
|
+
f'Arg nth_most_recent={nth_most_recent} out of range. Must be greater than 0.'
|
|
684
|
+
)
|
|
685
|
+
if data_adapter is None:
|
|
686
|
+
data_adapter = self._default_data_adapter
|
|
687
|
+
# get the nth most recent version of the object with a numeric time_of_removal value
|
|
688
|
+
basefilename = self.make_base_filename(schema_ref, data_name, version_timestamp)
|
|
689
|
+
pattern = self._directory + "/" + basefilename + '__time_of_removal_*' + data_adapter.file_extension
|
|
690
|
+
glob = self._fs.glob(pattern)
|
|
691
|
+
paths = list(sorted(glob))
|
|
692
|
+
if len(paths) == 0:
|
|
693
|
+
raise FileSystemDAORangeError(
|
|
694
|
+
f'Cannot restore object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp}: no deleted instances of {schema_ref}, {data_name}, and {version_timestamp} were found in repository.'
|
|
695
|
+
)
|
|
696
|
+
# check for an existing object with the same data_name and no time_of_removal value
|
|
697
|
+
object_exists = self.exists(schema_ref, data_name, version_timestamp, data_adapter)
|
|
698
|
+
if object_exists:
|
|
699
|
+
raise FileSystemDAOFileAlreadyExistsError(
|
|
700
|
+
f'Cannot restore object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} because it already exists in repository.'
|
|
701
|
+
)
|
|
702
|
+
# restore the object by adding it with a time_of_removal value of None and a time_of_save value of the current time
|
|
703
|
+
try:
|
|
704
|
+
nth_path = paths[-nth_most_recent]
|
|
705
|
+
except IndexError:
|
|
706
|
+
raise FileSystemDAORangeError(
|
|
707
|
+
f'Arg nth_most_recent={nth_most_recent} out of range. The record of deleted objects only contains {len(paths)} entries.'
|
|
708
|
+
)
|
|
709
|
+
# set path to the new path
|
|
710
|
+
new_path = self.make_filepath(schema_ref, data_name, version_timestamp, data_adapter)
|
|
711
|
+
try:
|
|
712
|
+
self._fs.mv(str(nth_path), str(new_path), recursive=True)
|
|
713
|
+
except Exception as e:
|
|
714
|
+
raise FileSystemDAOUncaughtError(
|
|
715
|
+
f'An error occurred while moving object with schema_ref: {schema_ref}, data_name: {data_name}, and version_timestamp: {version_timestamp} to the trash. The error was: {e}'
|
|
716
|
+
)
|
|
717
|
+
return None
|
|
718
|
+
|
|
719
|
+
def purge(self, time_threshold=None):
|
|
720
|
+
"""Purges deleted objects from the repository older than the time threshold.
|
|
721
|
+
Arguments:
|
|
722
|
+
schema_ref {str} -- The type of object to purge.
|
|
723
|
+
data_name {str} -- The name of the object to purge.
|
|
724
|
+
version_timestamp {str} -- The version of the object to purge.
|
|
725
|
+
time_threshold {datetime.timestamp} -- The time threshold.
|
|
726
|
+
Returns:
|
|
727
|
+
None
|
|
728
|
+
"""
|
|
729
|
+
self._check_args(time_threshold=time_threshold)
|
|
730
|
+
paths = self.list_marked_for_deletion(time_threshold)
|
|
731
|
+
count = len(paths)
|
|
732
|
+
for path in paths:
|
|
733
|
+
self._fs.rm(path, recursive=True)
|
|
734
|
+
return count
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
def make_filepath(self, schema_ref, data_name, version_timestamp=0, data_adapter = None, time_of_removal=None):
|
|
738
|
+
"""Returns the filepath for a data array."""
|
|
739
|
+
if data_adapter is None:
|
|
740
|
+
data_adapter = self._default_data_adapter
|
|
741
|
+
else:
|
|
742
|
+
data_adapter.set_filesystem(self._fs)
|
|
743
|
+
basename = self.make_base_filename(schema_ref, data_name, version_timestamp)
|
|
744
|
+
if time_of_removal is not None:
|
|
745
|
+
basename += f"__time_of_removal_{datetime_to_microseconds(time_of_removal)}"
|
|
746
|
+
filename = f"/{basename}{data_adapter.file_extension}"
|
|
747
|
+
return self._directory + filename
|
|
748
|
+
|
|
749
|
+
def make_base_filename(self, schema_ref, data_name, version_timestamp=0):
|
|
750
|
+
"""Returns the base filename for a data array."""
|
|
751
|
+
basename = f"{schema_ref}__{data_name}"
|
|
752
|
+
if version_timestamp != 0:
|
|
753
|
+
basename += f"__version_{datetime_to_microseconds(version_timestamp)}"
|
|
754
|
+
return basename
|
|
755
|
+
|
|
756
|
+
def _serialize(self, data_object):
|
|
757
|
+
"""Serializes a data object.
|
|
758
|
+
Arguments:
|
|
759
|
+
data_object {dict} -- The data object to serialize.
|
|
760
|
+
Returns:
|
|
761
|
+
dict -- The serialized data object.
|
|
762
|
+
"""
|
|
763
|
+
attrs = data_object.attrs.copy()
|
|
764
|
+
for key, value in attrs.items():
|
|
765
|
+
if isinstance(value, bool):
|
|
766
|
+
attrs[key] = str(value)
|
|
767
|
+
if isinstance(value, type(None)):
|
|
768
|
+
attrs[key] = 'None'
|
|
769
|
+
if isinstance(value, dict):
|
|
770
|
+
attrs[key] = json.dumps(value)
|
|
771
|
+
if isinstance(value, list):
|
|
772
|
+
attrs[key] = json.dumps(value)
|
|
773
|
+
if key == 'version_timestamp' and isinstance(value, type(None)):
|
|
774
|
+
attrs[key] = 0
|
|
775
|
+
data_object.attrs = attrs
|
|
776
|
+
return data_object
|
|
777
|
+
|
|
778
|
+
def _deserialize(self, data_object):
|
|
779
|
+
"""Deserializes a data object.
|
|
780
|
+
Arguments:
|
|
781
|
+
data_object {dict} -- The data object to deserialize.
|
|
782
|
+
Returns:
|
|
783
|
+
dict -- The deserialized data object.
|
|
784
|
+
"""
|
|
785
|
+
attrs = data_object.attrs.copy()
|
|
786
|
+
for key, value in attrs.items():
|
|
787
|
+
if isinstance(value, str):
|
|
788
|
+
if value == 'True':
|
|
789
|
+
attrs[key] = True
|
|
790
|
+
elif value == 'False':
|
|
791
|
+
attrs[key] = False
|
|
792
|
+
elif value == 'None':
|
|
793
|
+
attrs[key] = None
|
|
794
|
+
elif value.startswith('{'):
|
|
795
|
+
attrs[key] = json.loads(value)
|
|
796
|
+
# check if value is a list by looking for brackets and commas
|
|
797
|
+
elif value.startswith('[') and value.endswith(']') and ',' in value:
|
|
798
|
+
attrs[key] = json.loads(value)
|
|
799
|
+
try:
|
|
800
|
+
attrs[key] = int(value)
|
|
801
|
+
except ValueError:
|
|
802
|
+
pass
|
|
803
|
+
elif isinstance(value, np.ndarray):
|
|
804
|
+
attrs[key] = value.tolist()
|
|
805
|
+
data_object.attrs = attrs
|
|
806
|
+
return data_object
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
def _get_time_of_removal_from_path(self, path):
|
|
810
|
+
"""Returns the time of removal from a path."""
|
|
811
|
+
filename = os.path.basename(path)
|
|
812
|
+
match = re.search(r'__time_of_removal_(\d+)', filename)
|
|
813
|
+
if match is None:
|
|
814
|
+
raise FileSystemDAOUncaughtError(
|
|
815
|
+
f'An error occurred while parsing the time_of_removal from path {path}.'
|
|
816
|
+
)
|
|
817
|
+
return microseconds_to_datetime(int(match.group(1)))
|
|
818
|
+
|
|
819
|
+
def _check_args(self, **kwargs):
|
|
820
|
+
for key, value in kwargs.items():
|
|
821
|
+
try:
|
|
822
|
+
arg_types = self._argument_types[key]
|
|
823
|
+
except KeyError:
|
|
824
|
+
raise FileSystemDAOArgumentNameError(
|
|
825
|
+
f'Invalid keyword argument data_name {key}.'
|
|
826
|
+
)
|
|
827
|
+
if not isinstance(value, arg_types):
|
|
828
|
+
raise FileSystemDAOTypeError(
|
|
829
|
+
f'Invalid type {type(value)} for argument {key}. Must be one of {arg_types}.'
|
|
830
|
+
)
|
|
831
|
+
|
|
832
|
+
@property
|
|
833
|
+
def _argument_types(self):
|
|
834
|
+
nonetype = type(None)
|
|
835
|
+
nowtype = type(datetime.now(timezone.utc))
|
|
836
|
+
return {
|
|
837
|
+
'schema_ref': (str),
|
|
838
|
+
'data_name': (str),
|
|
839
|
+
'version_timestamp': (nowtype, int),
|
|
840
|
+
'time_of_removal': (nowtype, nonetype),
|
|
841
|
+
'nth_most_recent': (int),
|
|
842
|
+
'time_threshold': (nowtype, nonetype),
|
|
843
|
+
'data_adapter': (AbstractDataFileAdapter, nonetype),
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
# ===================
|
|
849
|
+
|
|
850
|
+
class InMemoryObjectDAOObjectNotFoundError(NotFoundError):
|
|
851
|
+
pass
|
|
852
|
+
|
|
853
|
+
class InMemoryObjectDAOObjectAlreadyExistsError(Exception):
|
|
854
|
+
pass
|
|
855
|
+
|
|
856
|
+
class InMemoryObjectDAOTypeError(TypeError):
|
|
857
|
+
pass
|
|
858
|
+
|
|
859
|
+
class InMemoryObjectDAORangeError(IndexError):
|
|
860
|
+
pass
|
|
861
|
+
|
|
862
|
+
class InMemoryObjectDAOArgumentNameError(ArgumentNameError):
|
|
863
|
+
pass
|
|
864
|
+
|
|
865
|
+
class InMemoryObjectDAOUncaughtError(UncaughtError):
|
|
866
|
+
pass
|
|
867
|
+
|
|
868
|
+
class InMemoryObjectDAO(AbstractQueriableDataAccessObject):
|
|
869
|
+
def __init__(self, memory_store: dict):
|
|
870
|
+
self._collection = memory_store
|
|
871
|
+
self._collection['objects'] = {}
|
|
872
|
+
self._collection['tags'] = {}
|
|
873
|
+
self._collection['removed'] = {}
|
|
874
|
+
|
|
875
|
+
@property
|
|
876
|
+
def collection(self):
|
|
877
|
+
return self._collection
|
|
878
|
+
|
|
879
|
+
def get(self, tag: str):
|
|
880
|
+
"""Gets an object from the repository.
|
|
881
|
+
Arguments:
|
|
882
|
+
tag {str} -- The tag of the object to get.
|
|
883
|
+
Raises:
|
|
884
|
+
InMemoryObjectDAONotFoundError -- If the object is not found.
|
|
885
|
+
Returns:
|
|
886
|
+
dict -- The object.
|
|
887
|
+
"""
|
|
888
|
+
self._check_args(tag=tag)
|
|
889
|
+
id = self._collection['tags'].get(tag)
|
|
890
|
+
if id is None:
|
|
891
|
+
return None
|
|
892
|
+
else:
|
|
893
|
+
return self._collection['objects'].get(id)
|
|
894
|
+
|
|
895
|
+
def exists(self, tag: str):
|
|
896
|
+
"""Checks if an object exists in the repository.
|
|
897
|
+
Arguments:
|
|
898
|
+
schema_ref {str} -- The type of the object to check.
|
|
899
|
+
data_name {str} -- The name of the object to check.
|
|
900
|
+
Returns:
|
|
901
|
+
bool -- True if the object exists, else False.
|
|
902
|
+
"""
|
|
903
|
+
self._check_args(tag=tag)
|
|
904
|
+
return self.get(tag) is not None
|
|
905
|
+
|
|
906
|
+
def find(self, key_filter=None, value_filter=None, filter_relation: str='and'):
|
|
907
|
+
"""Returns a filtered list of objects from the repository.
|
|
908
|
+
Arguments:
|
|
909
|
+
filter {function} -- The filter to apply to the query.
|
|
910
|
+
Returns:
|
|
911
|
+
list[dict] -- The list of objects.
|
|
912
|
+
"""
|
|
913
|
+
self._check_args(key_filter=key_filter, value_filter=value_filter, filter_relation=filter_relation)
|
|
914
|
+
if key_filter is None and value_filter is None:
|
|
915
|
+
filter_relation = 'none'
|
|
916
|
+
elif key_filter is None and value_filter is not None:
|
|
917
|
+
filter_relation = 'value'
|
|
918
|
+
elif key_filter is not None and value_filter is None:
|
|
919
|
+
filter_relation = 'key'
|
|
920
|
+
elif key_filter is not None and value_filter is not None:
|
|
921
|
+
filter_relation = filter_relation.lower()
|
|
922
|
+
item_filter_options = {
|
|
923
|
+
'none': lambda item: True,
|
|
924
|
+
'key': lambda item: key_filter(item[0]),
|
|
925
|
+
'value': lambda item: value_filter(item[1]),
|
|
926
|
+
'and': lambda item: key_filter(item[0]) and value_filter(item[1]),
|
|
927
|
+
'or': lambda item: key_filter(item[0]) or value_filter(item[1]),
|
|
928
|
+
'xor': lambda item: key_filter(item[0]) ^ value_filter(item[1]),
|
|
929
|
+
'nand': lambda item: not (key_filter(item[0]) and value_filter(item[1])),
|
|
930
|
+
'nor': lambda item: not (key_filter(item[0]) or value_filter(item[1])),
|
|
931
|
+
'xnor': lambda item: not (key_filter(item[0]) ^ value_filter(item[1])),
|
|
932
|
+
}
|
|
933
|
+
item_filter = item_filter_options[filter_relation]
|
|
934
|
+
filtered_collection = filter(item_filter, self._collection.items())
|
|
935
|
+
return dict(filtered_collection)
|
|
936
|
+
|
|
937
|
+
def add(self, object, tag: str):
|
|
938
|
+
"""Adds an object to the repository.
|
|
939
|
+
Arguments:
|
|
940
|
+
object {Any} -- The object to add.
|
|
941
|
+
tag {str} -- The tag to add the object with.
|
|
942
|
+
Raises:
|
|
943
|
+
InMemoryObjectDAOObjectAlreadyExistsError -- If the object already exists.
|
|
944
|
+
Returns:
|
|
945
|
+
None
|
|
946
|
+
"""
|
|
947
|
+
self._check_args(tag=tag)
|
|
948
|
+
if self.exists(tag):
|
|
949
|
+
raise InMemoryObjectDAOObjectAlreadyExistsError(
|
|
950
|
+
f'Cannot add object with tag {tag} and id {id(object)} because it already exists in object repository.'
|
|
951
|
+
)
|
|
952
|
+
# check if tag is already in use
|
|
953
|
+
if tag in self._collection['tags']:
|
|
954
|
+
raise InMemoryObjectDAOObjectAlreadyExistsError(
|
|
955
|
+
f'Cannot add object with tag {tag} because the tag already exists in object repository. Use a different tag.'
|
|
956
|
+
)
|
|
957
|
+
# check if id(object) is already in use
|
|
958
|
+
if id(object) in self._collection['objects']:
|
|
959
|
+
raise InMemoryObjectDAOObjectAlreadyExistsError(
|
|
960
|
+
f'Cannot add object with id {id(object)} because it already exists in object repository. This tag is not taken, but you must use a different object.'
|
|
961
|
+
)
|
|
962
|
+
self._collection['objects'][id(object)] = object
|
|
963
|
+
self._collection['tags'][tag] = id(object)
|
|
964
|
+
return None
|
|
965
|
+
|
|
966
|
+
def mark_for_deletion(self, tag, time_of_removal: datetime):
|
|
967
|
+
"""Marks an object for deletion.
|
|
968
|
+
Arguments:
|
|
969
|
+
schema_ref {str} -- The type of the object to mark for deletion.
|
|
970
|
+
data_name {str} -- The name of the object to mark for deletion.
|
|
971
|
+
timestamp {datetime} -- The timestamp to mark the object for deletion with.
|
|
972
|
+
not_exist_ok {bool} -- If True, no error is raised if the object does not exist.
|
|
973
|
+
Raises:
|
|
974
|
+
InMemoryObjectDAONotFoundError -- If the object does not exist.
|
|
975
|
+
Returns:
|
|
976
|
+
None
|
|
977
|
+
"""
|
|
978
|
+
sleep(0.0000001) # to prevent non-unique timestamps
|
|
979
|
+
self._check_args(tag=tag, time_of_removal=time_of_removal)
|
|
980
|
+
if not self.exists(tag):
|
|
981
|
+
raise InMemoryObjectDAOObjectNotFoundError(
|
|
982
|
+
f'Cannot delete object with tag {tag} because it does not exist in object repository.'
|
|
983
|
+
)
|
|
984
|
+
# check if key is already in use
|
|
985
|
+
if time_of_removal in self._collection['removed'].keys():
|
|
986
|
+
entry = self._collection['removed'][time_of_removal]
|
|
987
|
+
raise InMemoryObjectDAOUncaughtError(
|
|
988
|
+
f"Cannot mark object with tag {tag} for deletion because {entry['tag']} has already been marked for deletion with the exact same timestamp ({time_of_removal}). If you received this error, something is wrong with the package. Pleae reach out to the package maintainer."
|
|
989
|
+
)
|
|
990
|
+
# Add the object id to the removed dict using the key
|
|
991
|
+
self._collection['removed'][tag] = {'tag': tag, 'time_of_removal': time_of_removal, 'id': self._collection['tags'][tag]}
|
|
992
|
+
# Remove the object id from the objects dict; note that the 'objects' collection is not modified
|
|
993
|
+
del self._collection['tags'][tag]
|
|
994
|
+
return None
|
|
995
|
+
|
|
996
|
+
def list_marked_for_deletion(self, time_threshold: datetime=None):
|
|
997
|
+
"""Returns an alphabetically, then time sorted list of deleted objects from the repository that are older than the time threshold.
|
|
998
|
+
Arguments:
|
|
999
|
+
time_threshold {datetime} -- The time threshold.
|
|
1000
|
+
Returns:
|
|
1001
|
+
list[dict] -- The list of deleted objects.
|
|
1002
|
+
"""
|
|
1003
|
+
self._check_args(time_threshold=time_threshold)
|
|
1004
|
+
if time_threshold is None:
|
|
1005
|
+
return list(sorted(self._collection['removed'].values(), key=lambda x: x['time_of_removal']))
|
|
1006
|
+
else:
|
|
1007
|
+
return list(sorted(filter(lambda x: x['time_of_removal'] <= time_threshold, self._collection['removed'].values()), key=lambda x: x['time_of_removal']))
|
|
1008
|
+
|
|
1009
|
+
def restore(self, tag):
|
|
1010
|
+
"""Restores an object specified by a tag and either a specific time_of_removal or the nth most recent time_of_removal.
|
|
1011
|
+
Arguments:
|
|
1012
|
+
tag {str} -- The tag of the object to restore.
|
|
1013
|
+
nth_most_recent {int} -- The nth most recent version of the object to restore.
|
|
1014
|
+
time_of_removal {datetime} -- The time_of_removal of the object to restore.
|
|
1015
|
+
Raises:
|
|
1016
|
+
InMemoryObjectDAOObjectNotFoundError -- If the object does not exist.
|
|
1017
|
+
InMemoryObjectDAOObjectAlreadyExistsError -- If the object already exists and override_existing_object is False.
|
|
1018
|
+
InMemoryObjectDAORangeError -- If the nth_most_recent argument is out of range.
|
|
1019
|
+
"""
|
|
1020
|
+
self._check_args(tag=tag)
|
|
1021
|
+
if self.exists(tag):
|
|
1022
|
+
raise InMemoryObjectDAOObjectAlreadyExistsError(
|
|
1023
|
+
f"Cannot restore object with tag {tag} because it already exists in object repository."
|
|
1024
|
+
)
|
|
1025
|
+
elif self._collection['removed'].get(tag) is None:
|
|
1026
|
+
raise InMemoryObjectDAOObjectNotFoundError(
|
|
1027
|
+
f"Cannot restore object with tag {tag} because it is not present in the cue of removed objects."
|
|
1028
|
+
)
|
|
1029
|
+
# move the object id from the removed dict to the tags dict
|
|
1030
|
+
self._collection['tags'][tag] = self._collection['removed'][tag]['id']
|
|
1031
|
+
# remove the object id from the removed dict
|
|
1032
|
+
del self._collection['removed'][tag]
|
|
1033
|
+
return None
|
|
1034
|
+
|
|
1035
|
+
def purge(self, time_threshold: datetime=None) -> int:
|
|
1036
|
+
"""Purges deleted objects from the repository older than the time threshold.
|
|
1037
|
+
Arguments:
|
|
1038
|
+
time_threshold {datetime} -- The time threshold.
|
|
1039
|
+
Returns:
|
|
1040
|
+
int -- The number of deleted objects purged.
|
|
1041
|
+
"""
|
|
1042
|
+
self._check_args(time_threshold=time_threshold)
|
|
1043
|
+
to_delete = self.list_marked_for_deletion(time_threshold)
|
|
1044
|
+
count = len(to_delete)
|
|
1045
|
+
for entry in to_delete:
|
|
1046
|
+
del self._collection['objects'][entry['id']]
|
|
1047
|
+
del self._collection['removed'][entry['tag']]
|
|
1048
|
+
return count
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
def _get_schema_name(self, object):
|
|
1052
|
+
# extract a human readable typestring without all the <>'s and stuff
|
|
1053
|
+
schema_ref = str(type(object))
|
|
1054
|
+
schema_ref = schema_ref.split("'")[1].lower()
|
|
1055
|
+
if '.' in schema_ref:
|
|
1056
|
+
schema_ref = schema_ref.split('.')[-1]
|
|
1057
|
+
return schema_ref
|
|
1058
|
+
|
|
1059
|
+
def _check_args(self, **kwargs):
|
|
1060
|
+
for key, value in kwargs.items():
|
|
1061
|
+
try:
|
|
1062
|
+
arg_types = self._arg_types[key]
|
|
1063
|
+
except KeyError:
|
|
1064
|
+
raise InMemoryObjectDAOArgumentNameError(
|
|
1065
|
+
f'Invalid argument name {key}. The only valid argument names are {self._arg_types.keys()}.'
|
|
1066
|
+
)
|
|
1067
|
+
# check if value is one of the allowed types
|
|
1068
|
+
if not isinstance(value, arg_types):
|
|
1069
|
+
raise InMemoryObjectDAOTypeError(
|
|
1070
|
+
f'Invalid type {type(value)} for argument {key}. Must be one of {arg_types}.'
|
|
1071
|
+
)
|
|
1072
|
+
|
|
1073
|
+
@property
|
|
1074
|
+
def _arg_types(self):
|
|
1075
|
+
nonetype = type(None)
|
|
1076
|
+
functiontype = type(lambda x: x)
|
|
1077
|
+
return {
|
|
1078
|
+
'tag': (str),
|
|
1079
|
+
'time_of_removal': (datetime),
|
|
1080
|
+
'nth_most_recent': (int),
|
|
1081
|
+
'time_threshold': (datetime, nonetype),
|
|
1082
|
+
'key_filter': (functiontype, nonetype),
|
|
1083
|
+
'value_filter': (functiontype, nonetype),
|
|
1084
|
+
'filter_relation': (str),
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
# ===================
|
|
1090
|
+
# Helper Functions
|
|
1091
|
+
# ===================
|
|
1092
|
+
|
|
1093
|
+
# Microseconds
|
|
1094
|
+
|
|
1095
|
+
def datetime_to_microseconds(timestamp: datetime) -> int:
|
|
1096
|
+
"""Converts a datetime object to microseconds, with microsecond precition.
|
|
1097
|
+
Arguments:
|
|
1098
|
+
timestamp {datetime} -- The timestamp to convert.
|
|
1099
|
+
Returns:
|
|
1100
|
+
int -- The timestamp in microseconds (full precision).
|
|
1101
|
+
"""
|
|
1102
|
+
if isinstance(timestamp, str):
|
|
1103
|
+
timestamp = string_to_datetime(timestamp)
|
|
1104
|
+
if timestamp is None:
|
|
1105
|
+
return None
|
|
1106
|
+
elif timestamp == 0:
|
|
1107
|
+
return None
|
|
1108
|
+
try:
|
|
1109
|
+
return int(timestamp.astimezone(timezone.utc).timestamp() * 1000000)
|
|
1110
|
+
except Exception as e:
|
|
1111
|
+
raise TypeError(f'Invalid type {type(timestamp)} for argument timestamp == {timestamp}. Must be datetime\n\ntraceback: {e}')
|
|
1112
|
+
|
|
1113
|
+
def microseconds_to_datetime(timestamp: int) -> datetime.utcnow:
|
|
1114
|
+
"""Converts microseconds to a datetime object.
|
|
1115
|
+
Arguments:
|
|
1116
|
+
timestamp {int} -- The timestamp in microsecnods (full precision).
|
|
1117
|
+
Returns:
|
|
1118
|
+
datetime -- The timestamp as a datetime object.
|
|
1119
|
+
"""
|
|
1120
|
+
if timestamp is None:
|
|
1121
|
+
return None
|
|
1122
|
+
elif timestamp == 0:
|
|
1123
|
+
return 0
|
|
1124
|
+
try:
|
|
1125
|
+
return datetime.fromtimestamp(float(timestamp) / 1000000, tz=timezone.utc)
|
|
1126
|
+
except Exception as e:
|
|
1127
|
+
raise TypeError(f'Invalid type {type(timestamp)} for argument timestamp = {timestamp}. Must be int\n\ntraceback: {e}')
|
|
1128
|
+
|
|
1129
|
+
# JSON
|
|
1130
|
+
def dict_to_json_bytes(dictionary: dict) -> str:
|
|
1131
|
+
"""Converts a dictionary to a JSON string.
|
|
1132
|
+
Arguments:
|
|
1133
|
+
dictionary {dict} -- The dictionary to convert.
|
|
1134
|
+
Returns:
|
|
1135
|
+
str -- The JSON string.
|
|
1136
|
+
"""
|
|
1137
|
+
if dictionary is None:
|
|
1138
|
+
return None
|
|
1139
|
+
if not isinstance(dictionary, dict):
|
|
1140
|
+
raise PropertySerializerArgumentTypeError(f'Invalid dict_to_json_bytes property serializer argument; argument type: {type(dictionary)}. Must be dict.')
|
|
1141
|
+
return json.dumps(dictionary).encode('utf-8')
|
|
1142
|
+
|
|
1143
|
+
class PropertySerializerArgumentTypeError(TypeError):
|
|
1144
|
+
pass
|
|
1145
|
+
|
|
1146
|
+
def json_bytes_to_dict(json_string: str) -> dict:
|
|
1147
|
+
"""Converts a JSON string to a dictionary.
|
|
1148
|
+
Arguments:
|
|
1149
|
+
json_string {str} -- The JSON string to convert.
|
|
1150
|
+
Returns:
|
|
1151
|
+
dict -- The dictionary.
|
|
1152
|
+
"""
|
|
1153
|
+
if not isinstance(json_string, bytes):
|
|
1154
|
+
raise TypeError(f'Invalid type {type(json_string)} for argument json_string. Must be str.')
|
|
1155
|
+
if json_string is None:
|
|
1156
|
+
return None
|
|
1157
|
+
return json.loads(json_string.decode('utf-8'))
|
|
1158
|
+
|
|
1159
|
+
# String
|
|
1160
|
+
|
|
1161
|
+
def datetime_to_string(timestamp: datetime) -> str:
|
|
1162
|
+
"""Converts a datetime object to a string with microsecond precision.
|
|
1163
|
+
Arguments:
|
|
1164
|
+
timestamp {datetime} -- The timestamp to convert.
|
|
1165
|
+
Returns:
|
|
1166
|
+
str -- The timestamp as a string.
|
|
1167
|
+
"""
|
|
1168
|
+
if datetime == 0:
|
|
1169
|
+
return '0'
|
|
1170
|
+
return timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %z')
|
|
1171
|
+
|
|
1172
|
+
def string_to_datetime(timestamp: str) -> datetime:
|
|
1173
|
+
"""Converts a string to a datetime object with microsecond precision.
|
|
1174
|
+
Arguments:
|
|
1175
|
+
timestamp {str} -- The timestamp to convert.
|
|
1176
|
+
Returns:
|
|
1177
|
+
datetime -- The timestamp as a datetime object.
|
|
1178
|
+
"""
|
|
1179
|
+
if timestamp == '0':
|
|
1180
|
+
return 0
|
|
1181
|
+
return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S.%f %z')
|