nuthatch 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nuthatch might be problematic. Click here for more details.
- nuthatch/__init__.py +14 -0
- nuthatch/backend.py +301 -0
- nuthatch/backends/__init__.py +8 -0
- nuthatch/backends/basic.py +28 -0
- nuthatch/backends/delta.py +46 -0
- nuthatch/backends/parquet.py +130 -0
- nuthatch/backends/sql.py +147 -0
- nuthatch/backends/terracotta.py +199 -0
- nuthatch/backends/zarr.py +207 -0
- nuthatch/cache.py +529 -0
- nuthatch/cli.py +174 -0
- nuthatch/config.py +94 -0
- nuthatch/memoizer.py +67 -0
- nuthatch/nuthatch.py +498 -0
- nuthatch/processor.py +89 -0
- nuthatch/processors/__init__.py +6 -0
- nuthatch/processors/timeseries.py +157 -0
- nuthatch-0.1.0.dist-info/METADATA +38 -0
- nuthatch-0.1.0.dist-info/RECORD +21 -0
- nuthatch-0.1.0.dist-info/WHEEL +4 -0
- nuthatch-0.1.0.dist-info/entry_points.txt +2 -0
nuthatch/cache.py
ADDED
|
@@ -0,0 +1,529 @@
|
|
|
1
|
+
from deltalake import DeltaTable, write_deltalake, QueryBuilder
|
|
2
|
+
from os.path import join
|
|
3
|
+
import copy
|
|
4
|
+
import git
|
|
5
|
+
import getpass
|
|
6
|
+
import datetime
|
|
7
|
+
from .backend import get_backend_by_name
|
|
8
|
+
from .config import get_config
|
|
9
|
+
import pyarrow as pa
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import sqlalchemy
|
|
12
|
+
|
|
13
|
+
class Cache():
|
|
14
|
+
"""The cache class is the main class that manages the cache.
|
|
15
|
+
|
|
16
|
+
It is responsible for:
|
|
17
|
+
- Instantiating the correct backend
|
|
18
|
+
- Managing the metadata in the metadata database or delta table
|
|
19
|
+
- Writing and reading data to the backend
|
|
20
|
+
"""
|
|
21
|
+
database_parameters = ["driver", "host", "port", "database", "username", "password"]
|
|
22
|
+
config_parameters = ['filesystem', 'filesystem_options', 'metadata_location'] + database_parameters
|
|
23
|
+
backend_name = "cache_metadata"
|
|
24
|
+
delta_tables = {}
|
|
25
|
+
delta_table_configs = {}
|
|
26
|
+
|
|
27
|
+
def __init__(self, config, cache_key, namespace, args, backend_location, requested_backend, backend_kwargs):
|
|
28
|
+
self.cache_key = cache_key
|
|
29
|
+
self.config = config
|
|
30
|
+
self.namespace = namespace
|
|
31
|
+
self.location = backend_location
|
|
32
|
+
self.args = args
|
|
33
|
+
self. backend_kwargs = backend_kwargs
|
|
34
|
+
|
|
35
|
+
# Either instantiate a delta table or a postgres table
|
|
36
|
+
if (('metadata_location' in config and config['metadata_location'] == 'filesystem') or
|
|
37
|
+
('metadata_location' not in config and 'filesystem' in config) or
|
|
38
|
+
(any(param not in config for param in self.__class__.database_parameters))):
|
|
39
|
+
# This is a delta/filesystem type
|
|
40
|
+
base_path = self.config['filesystem']
|
|
41
|
+
table_path = join(base_path, 'nuthatch_metadata.delta')
|
|
42
|
+
|
|
43
|
+
options = None
|
|
44
|
+
if 'filesystem_options' in self.config:
|
|
45
|
+
options = self.config['filesystem_options']
|
|
46
|
+
for key, value in options.items():
|
|
47
|
+
options[key] = str(value)
|
|
48
|
+
|
|
49
|
+
self.store = 'delta'
|
|
50
|
+
if (backend_location in self.__class__.delta_tables and
|
|
51
|
+
self.__class__.delta_table_configs[backend_location] == config):
|
|
52
|
+
self.dt = self.__class__.delta_tables[backend_location]
|
|
53
|
+
else:
|
|
54
|
+
# Instantiate the metadata store here so that _get_backend_from_metadata() works
|
|
55
|
+
if not DeltaTable.is_deltatable(table_path, storage_options=options):
|
|
56
|
+
print("Instantiating empty delta table.")
|
|
57
|
+
DeltaTable.create(table_path,
|
|
58
|
+
schema=pa.schema(
|
|
59
|
+
[pa.field("cache_key", pa.string()), pa.field("backend", pa.string()),
|
|
60
|
+
pa.field("namespace", pa.string()), pa.field("state", pa.string()),
|
|
61
|
+
pa.field("last_modified", pa.int64()), pa.field("commit_hash", pa.string()),
|
|
62
|
+
pa.field("user", pa.string()), pa.field("path", pa.string())]
|
|
63
|
+
),
|
|
64
|
+
storage_options=options,
|
|
65
|
+
partition_by="cache_key")
|
|
66
|
+
|
|
67
|
+
self.dt = DeltaTable(table_path, storage_options=options)
|
|
68
|
+
self.__class__.delta_tables[backend_location] = self.dt
|
|
69
|
+
self.__class__.delta_table_configs[backend_location] = config
|
|
70
|
+
else:
|
|
71
|
+
# This is a database type
|
|
72
|
+
database_url = sqlalchemy.URL.create(self.config['driver'],
|
|
73
|
+
username = self.config['username'],
|
|
74
|
+
password = self.config['password'],
|
|
75
|
+
host = self.config['host'],
|
|
76
|
+
port = self.config['port'],
|
|
77
|
+
database = self.config['database'])
|
|
78
|
+
self.engine = sqlalchemy.create_engine(database_url)
|
|
79
|
+
metadata = sqlalchemy.MetaData()
|
|
80
|
+
|
|
81
|
+
self.db_table = sqlalchemy.Table(
|
|
82
|
+
'nuthatch_metadata', metadata,
|
|
83
|
+
sqlalchemy.Column('cache_key', sqlalchemy.String),
|
|
84
|
+
sqlalchemy.Column('backend', sqlalchemy.String),
|
|
85
|
+
sqlalchemy.Column('namespace', sqlalchemy.String),
|
|
86
|
+
sqlalchemy.Column('state', sqlalchemy.String),
|
|
87
|
+
sqlalchemy.Column('last_modified', sqlalchemy.BigInteger),
|
|
88
|
+
sqlalchemy.Column('commit_hash', sqlalchemy.String),
|
|
89
|
+
sqlalchemy.Column('user', sqlalchemy.String),
|
|
90
|
+
sqlalchemy.Column('path', sqlalchemy.String)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
metadata.create_all(self.engine, checkfirst=True)
|
|
94
|
+
self.store = 'database'
|
|
95
|
+
|
|
96
|
+
self.backend = None
|
|
97
|
+
self.backend_name = None
|
|
98
|
+
|
|
99
|
+
backend_class = None
|
|
100
|
+
if requested_backend:
|
|
101
|
+
backend_class = get_backend_by_name(requested_backend)
|
|
102
|
+
self.backend_name = requested_backend
|
|
103
|
+
elif self.cache_key:
|
|
104
|
+
stored_backend = self._get_backend_from_metadata()
|
|
105
|
+
if stored_backend:
|
|
106
|
+
backend_class = get_backend_by_name(stored_backend)
|
|
107
|
+
self.backend_name = stored_backend
|
|
108
|
+
|
|
109
|
+
if backend_class and self.cache_key:
|
|
110
|
+
backend_config = get_config(location=backend_location, requested_parameters=backend_class.config_parameters,
|
|
111
|
+
backend_name=backend_class.backend_name)
|
|
112
|
+
if backend_config:
|
|
113
|
+
self.backend = backend_class(backend_config, cache_key, namespace, args, copy.deepcopy(backend_kwargs))
|
|
114
|
+
|
|
115
|
+
def _delta_check_exists(self, state=None, include_backend=False):
|
|
116
|
+
"""Check if the metadata exists in the delta table.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
state (str, optional): The state of the metadata to check for.
|
|
120
|
+
include_backend (bool, optional): Whether to include the backend in the check.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
bool: True if the metadata exists in the delta table, False otherwise.
|
|
124
|
+
"""
|
|
125
|
+
base = f"""select * from metadata where cache_key = '{self.cache_key}' AND namespace = '{self.namespace}'"""
|
|
126
|
+
if include_backend:
|
|
127
|
+
base += f" AND backend = '{self.backend_name}'"
|
|
128
|
+
if state:
|
|
129
|
+
base += f" AND state = '{state}'"
|
|
130
|
+
|
|
131
|
+
rows = QueryBuilder().register('metadata', self.dt).execute(base).read_all()
|
|
132
|
+
|
|
133
|
+
if len(rows) > 0:
|
|
134
|
+
return True
|
|
135
|
+
else:
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
def _sql_check_exists(self, state=None, include_backend=False):
|
|
139
|
+
"""Check if the metadata exists in the database.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
state (str, optional): The state of the metadata to check for.
|
|
143
|
+
include_backend (bool, optional): Whether to include the backend in the check.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
bool: True if the metadata exists in the database, False otherwise.
|
|
147
|
+
"""
|
|
148
|
+
statement = sqlalchemy.select(sqlalchemy.func.count(self.db_table.c.cache_key)).where(self.db_table.c.cache_key == self.cache_key)\
|
|
149
|
+
.where(self.db_table.c.namespace == self.namespace)
|
|
150
|
+
if state:
|
|
151
|
+
statement = statement.where(self.db_table.c.state == state)
|
|
152
|
+
if include_backend:
|
|
153
|
+
statement = statement.where(self.db_table.c.backend == self.backend_name)
|
|
154
|
+
|
|
155
|
+
with self.engine.connect() as conn:
|
|
156
|
+
num = conn.execute(statement)
|
|
157
|
+
if num.fetchone()[0] > 0:
|
|
158
|
+
return True
|
|
159
|
+
else:
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
def _check_row_exists(self, state=None, include_backend=False):
|
|
163
|
+
"""Check if the metadata exists in the database or delta table.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
state (str, optional): The state of the metadata to check for.
|
|
167
|
+
include_backend (bool, optional): Whether to include the backend in the check.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
bool: True if the metadata exists in the database or delta table, False otherwise.
|
|
171
|
+
"""
|
|
172
|
+
if self.store == 'delta':
|
|
173
|
+
return self._delta_check_exists(state, include_backend)
|
|
174
|
+
else:
|
|
175
|
+
return self._sql_check_exists(state, include_backend)
|
|
176
|
+
|
|
177
|
+
def _sql_get_row(self, select, include_backend=False):
|
|
178
|
+
"""Get a row from the database.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
select (str): The column to select.
|
|
182
|
+
include_backend (bool, optional): Whether to include the backend in the check.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
The row from the database.
|
|
186
|
+
"""
|
|
187
|
+
if isinstance(select, str):
|
|
188
|
+
select = [select]
|
|
189
|
+
|
|
190
|
+
statement = sqlalchemy.select(self.db_table.c[*select]).where(self.db_table.c.namespace == self.namespace)\
|
|
191
|
+
.where(self.db_table.c.cache_key.like(self.cache_key))
|
|
192
|
+
|
|
193
|
+
if include_backend:
|
|
194
|
+
statement = statement.where(self.db_table.c.backend == self.backend_name)
|
|
195
|
+
|
|
196
|
+
with self.engine.connect() as conn:
|
|
197
|
+
rows = conn.execute(statement)
|
|
198
|
+
return rows.mappings().all()
|
|
199
|
+
|
|
200
|
+
def _delta_get_row(self, select, include_backend=False):
|
|
201
|
+
"""Get a row from the delta table.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
select (str): The column to select.
|
|
205
|
+
include_backend (bool, optional): Whether to include the backend in the check.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
The row from the delta table.
|
|
209
|
+
"""
|
|
210
|
+
if isinstance(select, str):
|
|
211
|
+
select = [select]
|
|
212
|
+
|
|
213
|
+
base = f"""select {', '.join(f'"{s}"' for s in select)} from metadata where namespace = '{self.namespace}' AND
|
|
214
|
+
cache_key LIKE '{self.cache_key}'"""
|
|
215
|
+
if include_backend:
|
|
216
|
+
base += f" AND backend = '{self.backend_name}'"
|
|
217
|
+
|
|
218
|
+
rows = QueryBuilder().register('metadata', self.dt).execute(base).read_all()
|
|
219
|
+
|
|
220
|
+
return rows.to_struct_array().to_pylist()
|
|
221
|
+
|
|
222
|
+
def _get_row(self, select, include_backend=False):
|
|
223
|
+
"""Get a row from the database or delta table.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
select (str): The column to select.
|
|
227
|
+
include_backend (bool, optional): Whether to include the backend in the check.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
The row from the database or delta table.
|
|
231
|
+
"""
|
|
232
|
+
if self.store == 'delta':
|
|
233
|
+
return self._delta_get_row(select, include_backend)
|
|
234
|
+
else:
|
|
235
|
+
return self._sql_get_row(select, include_backend)
|
|
236
|
+
|
|
237
|
+
def list(self, cache_key):
|
|
238
|
+
#convert cache_key glob to valid sql pattern matching
|
|
239
|
+
if not self.cache_key:
|
|
240
|
+
cache_key = cache_key.replace('*', '%')
|
|
241
|
+
cache_key = cache_key.replace('?', '_')
|
|
242
|
+
self.cache_key = cache_key
|
|
243
|
+
|
|
244
|
+
if self.backend_name:
|
|
245
|
+
include_backend = True
|
|
246
|
+
else:
|
|
247
|
+
include_backend = False
|
|
248
|
+
|
|
249
|
+
return self._get_row(['cache_key',
|
|
250
|
+
'namespace',
|
|
251
|
+
'backend',
|
|
252
|
+
'state',
|
|
253
|
+
'last_modified',
|
|
254
|
+
'user',
|
|
255
|
+
'commit_hash',
|
|
256
|
+
'path'], include_backend=include_backend)
|
|
257
|
+
|
|
258
|
+
def is_null(self):
|
|
259
|
+
"""Check if the metadata is null.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
bool: True if the metadata is null, False otherwise.
|
|
263
|
+
"""
|
|
264
|
+
return self._check_row_exists(state='null', include_backend=False)
|
|
265
|
+
|
|
266
|
+
def set_null(self):
|
|
267
|
+
"""Set the metadata to null."""
|
|
268
|
+
self._update_metadata_state(state='null')
|
|
269
|
+
|
|
270
|
+
def delete_null(self):
|
|
271
|
+
"""Delete the metadata that is null."""
|
|
272
|
+
# Deleting a null is really just deleting a metadata
|
|
273
|
+
self._delete_metadata(null=True)
|
|
274
|
+
|
|
275
|
+
def _delete_metadata(self, null=False):
|
|
276
|
+
"""Delete the metadata from the database or delta table.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
null (bool, optional): Whether to delete the metadata that is null.
|
|
280
|
+
"""
|
|
281
|
+
if self.store == 'delta':
|
|
282
|
+
if null:
|
|
283
|
+
self.dt.delete(predicate=f"cache_key = '{self.cache_key}' AND namespace = '{self.namespace}' AND state = 'null'")
|
|
284
|
+
else:
|
|
285
|
+
if self.backend_name:
|
|
286
|
+
self.dt.delete(predicate=f"cache_key = '{self.cache_key}' AND namespace = '{self.namespace}' AND backend = '{self.backend_name}'")
|
|
287
|
+
else:
|
|
288
|
+
raise RuntimeError("Can only delete non-null metadata with a valid backend")
|
|
289
|
+
else:
|
|
290
|
+
statement = sqlalchemy.delete(self.db_table).where(self.db_table.c.cache_key == self.cache_key)\
|
|
291
|
+
.where(self.db_table.c.namespace == self.namespace)
|
|
292
|
+
if null:
|
|
293
|
+
statement = statement.where(self.db_table.c.state == 'null')
|
|
294
|
+
else:
|
|
295
|
+
statement = statement.where(self.db_table.c.backend == self.backend_name)
|
|
296
|
+
with self.engine.connect() as conn:
|
|
297
|
+
conn.execute(statement)
|
|
298
|
+
conn.commit()
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _metadata_confirmed(self):
|
|
302
|
+
"""Check if the metadata is confirmed.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
bool: True if the metadata is confirmed, False otherwise.
|
|
306
|
+
"""
|
|
307
|
+
if not self.backend:
|
|
308
|
+
return False
|
|
309
|
+
|
|
310
|
+
return self._check_row_exists(state='confirmed', include_backend=True)
|
|
311
|
+
|
|
312
|
+
def _metadata_exists(self):
|
|
313
|
+
"""Check if the metadata exists.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
bool: True if the metadata exists, False otherwise.
|
|
317
|
+
"""
|
|
318
|
+
return self._check_row_exists(state=None, include_backend=True)
|
|
319
|
+
|
|
320
|
+
def _get_backend_from_metadata(self):
|
|
321
|
+
"""Get the backend from the metadata.
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
The backend from the metadata.
|
|
325
|
+
"""
|
|
326
|
+
rows = self._get_row('backend', include_backend=False)
|
|
327
|
+
if len(rows) == 0:
|
|
328
|
+
return None
|
|
329
|
+
else:
|
|
330
|
+
return rows[0]['backend']
|
|
331
|
+
|
|
332
|
+
def last_modified(self):
|
|
333
|
+
"""Get the last modified time of the metadata.
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
The last modified time of the metadata.
|
|
337
|
+
"""
|
|
338
|
+
rows = self._get_row('last_modified', include_backend=True)
|
|
339
|
+
if len(rows) == 0:
|
|
340
|
+
return None
|
|
341
|
+
else:
|
|
342
|
+
return rows[0]['last_modified']
|
|
343
|
+
|
|
344
|
+
def _set_metadata_pending(self):
|
|
345
|
+
"""Set the metadata to pending."""
|
|
346
|
+
self._update_metadata_state(state='pending')
|
|
347
|
+
|
|
348
|
+
def _commit_metadata(self):
|
|
349
|
+
"""Commit the metadata."""
|
|
350
|
+
self._update_metadata_state(state='confirmed')
|
|
351
|
+
|
|
352
|
+
def _update_metadata_state(self, state=None):
|
|
353
|
+
"""Update the state of the metadata.
|
|
354
|
+
|
|
355
|
+
If metadata doesn't exist, it will be created.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
state (str, optional): The state to update the metadata to.
|
|
359
|
+
"""
|
|
360
|
+
repo = git.Repo(search_parent_directories=True)
|
|
361
|
+
if repo:
|
|
362
|
+
sha = repo.head.object.hexsha
|
|
363
|
+
else:
|
|
364
|
+
sha = 'no_git_repo'
|
|
365
|
+
|
|
366
|
+
path = 'None'
|
|
367
|
+
if self.backend:
|
|
368
|
+
path = self.backend.get_file_path()
|
|
369
|
+
|
|
370
|
+
if self.store == 'delta':
|
|
371
|
+
if self._metadata_exists():
|
|
372
|
+
values = {'state': state, 'last_modified': datetime.datetime.now(datetime.timezone.utc).timestamp() * 1000000,
|
|
373
|
+
'commit_hash': sha, 'user': getpass.getuser(), 'path': path}
|
|
374
|
+
if state == 'null':
|
|
375
|
+
self.dt.update(predicate=f"cache_key = '{self.cache_key}' AND namespace = '{self.namespace}'",
|
|
376
|
+
new_values = values)
|
|
377
|
+
else:
|
|
378
|
+
self.dt.update(predicate=f"cache_key = '{self.cache_key}' AND namespace = '{self.namespace}' AND backend = '{self.backend_name}'",
|
|
379
|
+
new_values= values)
|
|
380
|
+
else:
|
|
381
|
+
df = pd.DataFrame({'cache_key': [self.cache_key],
|
|
382
|
+
'namespace': [str(self.namespace)],
|
|
383
|
+
'backend': [self.backend_name],
|
|
384
|
+
'commit_hash': [sha],
|
|
385
|
+
'user': [getpass.getuser()],
|
|
386
|
+
'path' : [path],
|
|
387
|
+
'state': [state],
|
|
388
|
+
'last_modified': [datetime.datetime.now(datetime.timezone.utc).timestamp() * 1000000]})
|
|
389
|
+
|
|
390
|
+
write_deltalake(self.dt, df, mode='append')
|
|
391
|
+
else:
|
|
392
|
+
if self._metadata_exists():
|
|
393
|
+
statement = sqlalchemy.update(self.db_table).where(self.db_table.c.cache_key == self.cache_key)\
|
|
394
|
+
.where(self.db_table.c.namespace == self.namespace)
|
|
395
|
+
|
|
396
|
+
if state != 'null':
|
|
397
|
+
statement = statement.where(self.db_table.c.backend == self.backend_name)
|
|
398
|
+
|
|
399
|
+
statement = statement.values(state=state, last_modified=datetime.datetime.now(datetime.timezone.utc).timestamp()*1000000,
|
|
400
|
+
commit_hash=sha, user=getpass.getuser(), path=path)
|
|
401
|
+
else:
|
|
402
|
+
statement = sqlalchemy.insert(self.db_table).values(state=state,
|
|
403
|
+
last_modified=datetime.datetime.now(datetime.timezone.utc).timestamp()*1000000,
|
|
404
|
+
commit_hash=sha, user=getpass.getuser(), path=path,
|
|
405
|
+
backend=self.backend_name,
|
|
406
|
+
cache_key=self.cache_key,
|
|
407
|
+
namespace=self.namespace)
|
|
408
|
+
|
|
409
|
+
with self.engine.connect() as conn:
|
|
410
|
+
conn.execute(statement)
|
|
411
|
+
conn.commit()
|
|
412
|
+
|
|
413
|
+
def get_backend(self):
|
|
414
|
+
"""Get the backend name.
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
The backend name.
|
|
418
|
+
"""
|
|
419
|
+
return self.backend.backend_name
|
|
420
|
+
|
|
421
|
+
def exists(self):
|
|
422
|
+
"""Check if the metadata exists, is confirmed, and the data exists in the backend.
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
bool: True if the metadata exists, False otherwise.
|
|
426
|
+
"""
|
|
427
|
+
if self._metadata_confirmed() and not self.backend:
|
|
428
|
+
raise RuntimeError("If metadata exists then there should be a backend. Inconsistent state error.")
|
|
429
|
+
elif not self.backend:
|
|
430
|
+
# We just aren't initialized yet
|
|
431
|
+
return False
|
|
432
|
+
elif self._metadata_confirmed() and self.backend.exists():
|
|
433
|
+
# Both exists!
|
|
434
|
+
return True
|
|
435
|
+
elif self._metadata_confirmed() and not self.backend.exists():
|
|
436
|
+
# The data doesn't exist in the backend, so delete the metadata
|
|
437
|
+
self._delete_metadata()
|
|
438
|
+
return False
|
|
439
|
+
elif not self._metadata_confirmed():
|
|
440
|
+
return False
|
|
441
|
+
else:
|
|
442
|
+
raise ValueError("Inconsistent and unknown cache state.")
|
|
443
|
+
|
|
444
|
+
def write(self, ds, upsert=False, primary_keys=None):
|
|
445
|
+
"""Write data to the backend.
|
|
446
|
+
|
|
447
|
+
First we set the metadata to pending, then we write the data to the backend,
|
|
448
|
+
and then we commit the metadata.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
ds (any): The data to write to the backend.
|
|
452
|
+
upsert (bool, optional): Whether to upsert the data.
|
|
453
|
+
primary_keys (list, optional): The primary keys to use for upsert.
|
|
454
|
+
"""
|
|
455
|
+
if self.backend:
|
|
456
|
+
self._set_metadata_pending()
|
|
457
|
+
self.backend.write(ds, upsert, primary_keys)
|
|
458
|
+
self._commit_metadata()
|
|
459
|
+
else:
|
|
460
|
+
raise RuntimeError("Cannot not write to an uninitialized backend")
|
|
461
|
+
|
|
462
|
+
def read(self, engine=None):
|
|
463
|
+
"""Read data from the backend.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
engine (str or type, optional): The data processing engine to use for
|
|
467
|
+
reading data from the backend.
|
|
468
|
+
|
|
469
|
+
Returns:
|
|
470
|
+
The data from the backend.
|
|
471
|
+
"""
|
|
472
|
+
if self.backend:
|
|
473
|
+
return self.backend.read(engine)
|
|
474
|
+
else:
|
|
475
|
+
raise RuntimeError("Cannot not read from an uninitialized backend")
|
|
476
|
+
|
|
477
|
+
def delete(self):
|
|
478
|
+
"""Delete the metadata and the data from the backend."""
|
|
479
|
+
if self.backend and self.backend.exists():
|
|
480
|
+
self.backend.delete()
|
|
481
|
+
|
|
482
|
+
self._delete_metadata()
|
|
483
|
+
|
|
484
|
+
def get_file_path(self):
|
|
485
|
+
"""Get the file path of the data in the backend.
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
The file path of the data in the backend.
|
|
489
|
+
"""
|
|
490
|
+
if self.backend:
|
|
491
|
+
return self.backend.get_file_path()
|
|
492
|
+
else:
|
|
493
|
+
raise RuntimeError("Cannot not get file path for an uninitialized backend")
|
|
494
|
+
|
|
495
|
+
def sync(self, from_cache):
|
|
496
|
+
"""Sync the data from one cache to another.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
from_cache (Cache): The cache to sync data from.
|
|
500
|
+
"""
|
|
501
|
+
if self.exists():
|
|
502
|
+
if from_cache.exists():
|
|
503
|
+
# If we both exists, copy if last modified is before other cache
|
|
504
|
+
if self.last_modified() < from_cache.last_modified():
|
|
505
|
+
self._set_metadata_pending()
|
|
506
|
+
self.backend.sync(from_cache.backend)
|
|
507
|
+
self._commit_metadata()
|
|
508
|
+
else:
|
|
509
|
+
return
|
|
510
|
+
else:
|
|
511
|
+
# If it's not in the form cache either don't sync
|
|
512
|
+
return
|
|
513
|
+
else:
|
|
514
|
+
if from_cache.exists():
|
|
515
|
+
# We don't exist at all. Setup backend and write
|
|
516
|
+
backend_class = get_backend_by_name(from_cache.backend_name)
|
|
517
|
+
backend_config = get_config(location=self.location, requested_parameters=backend_class.config_parameters,
|
|
518
|
+
backend_name=backend_class.backend_name)
|
|
519
|
+
if backend_config:
|
|
520
|
+
self.backend = backend_class(backend_config, self.cache_key, self.namespace, self.args, copy.deepcopy(self.backend_kwargs))
|
|
521
|
+
self.backend_name = backend_class.backend_name
|
|
522
|
+
else:
|
|
523
|
+
raise RuntimeError("Error finding backend config for syncing.")
|
|
524
|
+
|
|
525
|
+
self._set_metadata_pending()
|
|
526
|
+
self.backend.sync(from_cache.backend)
|
|
527
|
+
self._commit_metadata()
|
|
528
|
+
else:
|
|
529
|
+
return
|