pastastore 1.10.2__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pastastore/__init__.py +5 -1
- pastastore/base.py +739 -270
- pastastore/connectors.py +217 -800
- pastastore/datasets.py +23 -29
- pastastore/extensions/__init__.py +7 -3
- pastastore/extensions/hpd.py +39 -17
- pastastore/plotting.py +71 -38
- pastastore/store.py +191 -184
- pastastore/typing.py +12 -0
- pastastore/util.py +321 -88
- pastastore/validator.py +474 -0
- pastastore/version.py +1 -2
- pastastore/yaml_interface.py +37 -39
- {pastastore-1.10.2.dist-info → pastastore-1.11.0.dist-info}/METADATA +14 -8
- pastastore-1.11.0.dist-info/RECORD +30 -0
- tests/conftest.py +4 -9
- tests/test_001_import.py +2 -1
- tests/test_002_connectors.py +40 -3
- tests/test_003_pastastore.py +57 -28
- tests/test_005_maps_plots.py +12 -0
- tests/test_006_benchmark.py +1 -1
- tests/test_007_hpdextension.py +46 -8
- pastastore-1.10.2.dist-info/RECORD +0 -28
- {pastastore-1.10.2.dist-info → pastastore-1.11.0.dist-info}/WHEEL +0 -0
- {pastastore-1.10.2.dist-info → pastastore-1.11.0.dist-info}/licenses/LICENSE +0 -0
- {pastastore-1.10.2.dist-info → pastastore-1.11.0.dist-info}/top_level.txt +0 -0
pastastore/connectors.py
CHANGED
|
@@ -4,650 +4,39 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
6
|
import warnings
|
|
7
|
-
from collections.abc import Iterable
|
|
8
7
|
from concurrent.futures import ProcessPoolExecutor
|
|
9
8
|
from copy import deepcopy
|
|
10
9
|
from functools import partial
|
|
10
|
+
from pathlib import Path
|
|
11
11
|
|
|
12
12
|
# import weakref
|
|
13
13
|
from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
14
14
|
|
|
15
15
|
import pandas as pd
|
|
16
|
-
import pastas as ps
|
|
17
|
-
from numpy import isin
|
|
18
|
-
from packaging.version import parse as parse_version
|
|
19
|
-
from pandas.testing import assert_series_equal
|
|
20
16
|
from pastas.io.pas import PastasEncoder, pastas_hook
|
|
21
17
|
from tqdm.auto import tqdm
|
|
22
18
|
from tqdm.contrib.concurrent import process_map
|
|
23
19
|
|
|
24
20
|
from pastastore.base import BaseConnector, ModelAccessor
|
|
25
|
-
from pastastore.
|
|
26
|
-
from pastastore.
|
|
21
|
+
from pastastore.typing import AllLibs, FrameOrSeriesUnion, TimeSeriesLibs
|
|
22
|
+
from pastastore.util import _custom_warning, metadata_from_json, series_from_json
|
|
23
|
+
from pastastore.validator import Validator
|
|
27
24
|
|
|
28
|
-
FrameorSeriesUnion = Union[pd.DataFrame, pd.Series]
|
|
29
25
|
warnings.showwarning = _custom_warning
|
|
30
26
|
|
|
31
27
|
logger = logging.getLogger(__name__)
|
|
32
28
|
|
|
29
|
+
# Global connector for multiprocessing workaround
|
|
30
|
+
# This is required for connectors (like ArcticDBConnector) that cannot be pickled.
|
|
31
|
+
# The initializer function in _parallel() sets this global variable in each worker
|
|
32
|
+
# process, allowing unpicklable connectors to be used with multiprocessing.
|
|
33
|
+
# See: https://docs.python.org/3/library/concurrent.futures.html#processpoolexecutor
|
|
34
|
+
# Note: Using simple None type to avoid circular import issues
|
|
35
|
+
conn = None
|
|
33
36
|
|
|
34
|
-
class ConnectorUtil:
|
|
35
|
-
"""Mix-in class for general Connector helper functions.
|
|
36
37
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
def _parse_names(
|
|
42
|
-
self,
|
|
43
|
-
names: Optional[Union[list, str]] = None,
|
|
44
|
-
libname: Optional[str] = "oseries",
|
|
45
|
-
) -> list:
|
|
46
|
-
"""Parse names kwarg, returns iterable with name(s) (internal method).
|
|
47
|
-
|
|
48
|
-
Parameters
|
|
49
|
-
----------
|
|
50
|
-
names : Union[list, str], optional
|
|
51
|
-
str or list of str or None or 'all' (last two options
|
|
52
|
-
retrieves all names)
|
|
53
|
-
libname : str, optional
|
|
54
|
-
name of library, default is 'oseries'
|
|
55
|
-
|
|
56
|
-
Returns
|
|
57
|
-
-------
|
|
58
|
-
list
|
|
59
|
-
list of names
|
|
60
|
-
"""
|
|
61
|
-
if not isinstance(names, str) and isinstance(names, Iterable):
|
|
62
|
-
return names
|
|
63
|
-
elif isinstance(names, str) and names != "all":
|
|
64
|
-
return [names]
|
|
65
|
-
elif names is None or names == "all":
|
|
66
|
-
if libname == "oseries":
|
|
67
|
-
return self.oseries_names
|
|
68
|
-
elif libname == "stresses":
|
|
69
|
-
return self.stresses_names
|
|
70
|
-
elif libname == "models":
|
|
71
|
-
return self.model_names
|
|
72
|
-
elif libname == "oseries_models":
|
|
73
|
-
return self.oseries_with_models
|
|
74
|
-
else:
|
|
75
|
-
raise ValueError(f"No library '{libname}'!")
|
|
76
|
-
else:
|
|
77
|
-
raise NotImplementedError(f"Cannot parse 'names': {names}")
|
|
78
|
-
|
|
79
|
-
@staticmethod
|
|
80
|
-
def _meta_list_to_frame(metalist: list, names: list):
|
|
81
|
-
"""Convert list of metadata dictionaries to DataFrame.
|
|
82
|
-
|
|
83
|
-
Parameters
|
|
84
|
-
----------
|
|
85
|
-
metalist : list
|
|
86
|
-
list of metadata dictionaries
|
|
87
|
-
names : list
|
|
88
|
-
list of names corresponding to data in metalist
|
|
89
|
-
|
|
90
|
-
Returns
|
|
91
|
-
-------
|
|
92
|
-
pandas.DataFrame
|
|
93
|
-
DataFrame containing overview of metadata
|
|
94
|
-
"""
|
|
95
|
-
# convert to dataframe
|
|
96
|
-
if len(metalist) > 1:
|
|
97
|
-
meta = pd.DataFrame(metalist)
|
|
98
|
-
if len({"x", "y"}.difference(meta.columns)) == 0:
|
|
99
|
-
meta["x"] = meta["x"].astype(float)
|
|
100
|
-
meta["y"] = meta["y"].astype(float)
|
|
101
|
-
elif len(metalist) == 1:
|
|
102
|
-
meta = pd.DataFrame(metalist)
|
|
103
|
-
elif len(metalist) == 0:
|
|
104
|
-
meta = pd.DataFrame()
|
|
105
|
-
|
|
106
|
-
meta.index = names
|
|
107
|
-
meta.index.name = "name"
|
|
108
|
-
return meta
|
|
109
|
-
|
|
110
|
-
def _parse_model_dict(self, mdict: dict, update_ts_settings: bool = False):
|
|
111
|
-
"""Parse dictionary describing pastas models (internal method).
|
|
112
|
-
|
|
113
|
-
Parameters
|
|
114
|
-
----------
|
|
115
|
-
mdict : dict
|
|
116
|
-
dictionary describing pastas.Model
|
|
117
|
-
update_ts_settings : bool, optional
|
|
118
|
-
update stored tmin and tmax in time series settings
|
|
119
|
-
based on time series loaded from store.
|
|
120
|
-
|
|
121
|
-
Returns
|
|
122
|
-
-------
|
|
123
|
-
ml : pastas.Model
|
|
124
|
-
time series analysis model
|
|
125
|
-
"""
|
|
126
|
-
PASFILE_LEQ_022 = parse_version(
|
|
127
|
-
mdict["file_info"]["pastas_version"]
|
|
128
|
-
) <= parse_version("0.22.0")
|
|
129
|
-
|
|
130
|
-
# oseries
|
|
131
|
-
if "series" not in mdict["oseries"]:
|
|
132
|
-
name = str(mdict["oseries"]["name"])
|
|
133
|
-
if name not in self.oseries.index:
|
|
134
|
-
msg = "oseries '{}' not present in library".format(name)
|
|
135
|
-
raise LookupError(msg)
|
|
136
|
-
mdict["oseries"]["series"] = self.get_oseries(name).squeeze()
|
|
137
|
-
# update tmin/tmax from time series
|
|
138
|
-
if update_ts_settings:
|
|
139
|
-
mdict["oseries"]["settings"]["tmin"] = mdict["oseries"]["series"].index[
|
|
140
|
-
0
|
|
141
|
-
]
|
|
142
|
-
mdict["oseries"]["settings"]["tmax"] = mdict["oseries"]["series"].index[
|
|
143
|
-
-1
|
|
144
|
-
]
|
|
145
|
-
|
|
146
|
-
# StressModel, WellModel
|
|
147
|
-
for ts in mdict["stressmodels"].values():
|
|
148
|
-
if "stress" in ts.keys():
|
|
149
|
-
# WellModel
|
|
150
|
-
classkey = "stressmodel" if PASFILE_LEQ_022 else "class"
|
|
151
|
-
if ts[classkey] == "WellModel":
|
|
152
|
-
for stress in ts["stress"]:
|
|
153
|
-
if "series" not in stress:
|
|
154
|
-
name = str(stress["name"])
|
|
155
|
-
if name in self.stresses.index:
|
|
156
|
-
stress["series"] = self.get_stresses(name).squeeze()
|
|
157
|
-
# update tmin/tmax from time series
|
|
158
|
-
if update_ts_settings:
|
|
159
|
-
stress["settings"]["tmin"] = stress["series"].index[
|
|
160
|
-
0
|
|
161
|
-
]
|
|
162
|
-
stress["settings"]["tmax"] = stress["series"].index[
|
|
163
|
-
-1
|
|
164
|
-
]
|
|
165
|
-
# StressModel
|
|
166
|
-
else:
|
|
167
|
-
for stress in ts["stress"] if PASFILE_LEQ_022 else [ts["stress"]]:
|
|
168
|
-
if "series" not in stress:
|
|
169
|
-
name = str(stress["name"])
|
|
170
|
-
if name in self.stresses.index:
|
|
171
|
-
stress["series"] = self.get_stresses(name).squeeze()
|
|
172
|
-
# update tmin/tmax from time series
|
|
173
|
-
if update_ts_settings:
|
|
174
|
-
stress["settings"]["tmin"] = stress["series"].index[
|
|
175
|
-
0
|
|
176
|
-
]
|
|
177
|
-
stress["settings"]["tmax"] = stress["series"].index[
|
|
178
|
-
-1
|
|
179
|
-
]
|
|
180
|
-
|
|
181
|
-
# RechargeModel, TarsoModel
|
|
182
|
-
if ("prec" in ts.keys()) and ("evap" in ts.keys()):
|
|
183
|
-
for stress in [ts["prec"], ts["evap"]]:
|
|
184
|
-
if "series" not in stress:
|
|
185
|
-
name = str(stress["name"])
|
|
186
|
-
if name in self.stresses.index:
|
|
187
|
-
stress["series"] = self.get_stresses(name).squeeze()
|
|
188
|
-
# update tmin/tmax from time series
|
|
189
|
-
if update_ts_settings:
|
|
190
|
-
stress["settings"]["tmin"] = stress["series"].index[0]
|
|
191
|
-
stress["settings"]["tmax"] = stress["series"].index[-1]
|
|
192
|
-
else:
|
|
193
|
-
msg = "stress '{}' not present in library".format(name)
|
|
194
|
-
raise KeyError(msg)
|
|
195
|
-
|
|
196
|
-
# hack for pcov w dtype object (when filled with NaNs on store?)
|
|
197
|
-
if "fit" in mdict:
|
|
198
|
-
if "pcov" in mdict["fit"]:
|
|
199
|
-
pcov = mdict["fit"]["pcov"]
|
|
200
|
-
if pcov.dtypes.apply(lambda dtyp: isinstance(dtyp, object)).any():
|
|
201
|
-
mdict["fit"]["pcov"] = pcov.astype(float)
|
|
202
|
-
|
|
203
|
-
# check pastas version vs pas-file version
|
|
204
|
-
file_version = mdict["file_info"]["pastas_version"]
|
|
205
|
-
|
|
206
|
-
# check file version and pastas version
|
|
207
|
-
# if file<0.23 and pastas>=1.0 --> error
|
|
208
|
-
PASTAS_GT_023 = parse_version(ps.__version__) > parse_version("0.23.1")
|
|
209
|
-
if PASFILE_LEQ_022 and PASTAS_GT_023:
|
|
210
|
-
raise UserWarning(
|
|
211
|
-
f"This file was created with Pastas v{file_version} "
|
|
212
|
-
f"and cannot be loaded with Pastas v{ps.__version__} Please load and "
|
|
213
|
-
"save the file with Pastas 0.23 first to update the file "
|
|
214
|
-
"format."
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
try:
|
|
218
|
-
# pastas>=0.15.0
|
|
219
|
-
ml = ps.io.base._load_model(mdict)
|
|
220
|
-
except AttributeError:
|
|
221
|
-
# pastas<0.15.0
|
|
222
|
-
ml = ps.io.base.load_model(mdict)
|
|
223
|
-
return ml
|
|
224
|
-
|
|
225
|
-
@staticmethod
|
|
226
|
-
def _validate_input_series(series):
|
|
227
|
-
"""Check if series is pandas.DataFrame or pandas.Series.
|
|
228
|
-
|
|
229
|
-
Parameters
|
|
230
|
-
----------
|
|
231
|
-
series : object
|
|
232
|
-
object to validate
|
|
233
|
-
|
|
234
|
-
Raises
|
|
235
|
-
------
|
|
236
|
-
TypeError
|
|
237
|
-
if object is not of type pandas.DataFrame or pandas.Series
|
|
238
|
-
"""
|
|
239
|
-
if not (isinstance(series, pd.DataFrame) or isinstance(series, pd.Series)):
|
|
240
|
-
raise TypeError("Please provide pandas.DataFrame or pandas.Series!")
|
|
241
|
-
if isinstance(series, pd.DataFrame):
|
|
242
|
-
if series.columns.size > 1:
|
|
243
|
-
raise ValueError("Only DataFrames with one column are supported!")
|
|
244
|
-
|
|
245
|
-
@staticmethod
|
|
246
|
-
def _set_series_name(series, name):
|
|
247
|
-
"""Set series name to match user defined name in store.
|
|
248
|
-
|
|
249
|
-
Parameters
|
|
250
|
-
----------
|
|
251
|
-
series : pandas.Series or pandas.DataFrame
|
|
252
|
-
set name for this time series
|
|
253
|
-
name : str
|
|
254
|
-
name of the time series (used in the pastastore)
|
|
255
|
-
"""
|
|
256
|
-
if isinstance(series, pd.Series):
|
|
257
|
-
series.name = name
|
|
258
|
-
# empty string on index name causes trouble when reading
|
|
259
|
-
# data from ArcticDB: TODO: check if still an issue?
|
|
260
|
-
if series.index.name == "":
|
|
261
|
-
series.index.name = None
|
|
262
|
-
|
|
263
|
-
if isinstance(series, pd.DataFrame):
|
|
264
|
-
series.columns = [name]
|
|
265
|
-
# check for hydropandas objects which are instances of DataFrame but
|
|
266
|
-
# do have a name attribute
|
|
267
|
-
if hasattr(series, "name"):
|
|
268
|
-
series.name = name
|
|
269
|
-
return series
|
|
270
|
-
|
|
271
|
-
@staticmethod
|
|
272
|
-
def _check_stressmodels_supported(ml):
|
|
273
|
-
supported_stressmodels = [
|
|
274
|
-
"StressModel",
|
|
275
|
-
"StressModel2",
|
|
276
|
-
"RechargeModel",
|
|
277
|
-
"WellModel",
|
|
278
|
-
"TarsoModel",
|
|
279
|
-
"Constant",
|
|
280
|
-
"LinearTrend",
|
|
281
|
-
"StepModel",
|
|
282
|
-
]
|
|
283
|
-
if isinstance(ml, ps.Model):
|
|
284
|
-
smtyps = [sm._name for sm in ml.stressmodels.values()]
|
|
285
|
-
elif isinstance(ml, dict):
|
|
286
|
-
classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
|
|
287
|
-
smtyps = [sm[classkey] for sm in ml["stressmodels"].values()]
|
|
288
|
-
check = isin(smtyps, supported_stressmodels)
|
|
289
|
-
if not all(check):
|
|
290
|
-
unsupported = set(smtyps) - set(supported_stressmodels)
|
|
291
|
-
raise NotImplementedError(
|
|
292
|
-
"PastaStore does not support storing models with the "
|
|
293
|
-
f"following stressmodels: {unsupported}"
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
@staticmethod
|
|
297
|
-
def _check_model_series_names_for_store(ml):
|
|
298
|
-
prec_evap_model = ["RechargeModel", "TarsoModel"]
|
|
299
|
-
|
|
300
|
-
if isinstance(ml, ps.Model):
|
|
301
|
-
series_names = [
|
|
302
|
-
istress.series.name
|
|
303
|
-
for sm in ml.stressmodels.values()
|
|
304
|
-
for istress in sm.stress
|
|
305
|
-
]
|
|
306
|
-
|
|
307
|
-
elif isinstance(ml, dict):
|
|
308
|
-
# non RechargeModel, Tarsomodel, WellModel stressmodels
|
|
309
|
-
classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
|
|
310
|
-
if PASTAS_LEQ_022:
|
|
311
|
-
series_names = [
|
|
312
|
-
istress["name"]
|
|
313
|
-
for sm in ml["stressmodels"].values()
|
|
314
|
-
if sm[classkey] not in (prec_evap_model + ["WellModel"])
|
|
315
|
-
for istress in sm["stress"]
|
|
316
|
-
]
|
|
317
|
-
else:
|
|
318
|
-
series_names = [
|
|
319
|
-
sm["stress"]["name"]
|
|
320
|
-
for sm in ml["stressmodels"].values()
|
|
321
|
-
if sm[classkey] not in (prec_evap_model + ["WellModel"])
|
|
322
|
-
]
|
|
323
|
-
|
|
324
|
-
# WellModel
|
|
325
|
-
if isin(
|
|
326
|
-
["WellModel"],
|
|
327
|
-
[i[classkey] for i in ml["stressmodels"].values()],
|
|
328
|
-
).any():
|
|
329
|
-
series_names += [
|
|
330
|
-
istress["name"]
|
|
331
|
-
for sm in ml["stressmodels"].values()
|
|
332
|
-
if sm[classkey] in ["WellModel"]
|
|
333
|
-
for istress in sm["stress"]
|
|
334
|
-
]
|
|
335
|
-
|
|
336
|
-
# RechargeModel, TarsoModel
|
|
337
|
-
if isin(
|
|
338
|
-
prec_evap_model,
|
|
339
|
-
[i[classkey] for i in ml["stressmodels"].values()],
|
|
340
|
-
).any():
|
|
341
|
-
series_names += [
|
|
342
|
-
istress["name"]
|
|
343
|
-
for sm in ml["stressmodels"].values()
|
|
344
|
-
if sm[classkey] in prec_evap_model
|
|
345
|
-
for istress in [sm["prec"], sm["evap"]]
|
|
346
|
-
]
|
|
347
|
-
|
|
348
|
-
else:
|
|
349
|
-
raise TypeError("Expected pastas.Model or dict!")
|
|
350
|
-
if len(series_names) - len(set(series_names)) > 0:
|
|
351
|
-
msg = (
|
|
352
|
-
"There are multiple stresses series with the same name! "
|
|
353
|
-
"Each series name must be unique for the PastaStore!"
|
|
354
|
-
)
|
|
355
|
-
raise ValueError(msg)
|
|
356
|
-
|
|
357
|
-
def _check_oseries_in_store(self, ml: Union[ps.Model, dict]):
|
|
358
|
-
"""Check if Model oseries are contained in PastaStore (internal method).
|
|
359
|
-
|
|
360
|
-
Parameters
|
|
361
|
-
----------
|
|
362
|
-
ml : Union[ps.Model, dict]
|
|
363
|
-
pastas Model
|
|
364
|
-
"""
|
|
365
|
-
if isinstance(ml, ps.Model):
|
|
366
|
-
name = ml.oseries.name
|
|
367
|
-
elif isinstance(ml, dict):
|
|
368
|
-
name = str(ml["oseries"]["name"])
|
|
369
|
-
else:
|
|
370
|
-
raise TypeError("Expected pastas.Model or dict!")
|
|
371
|
-
if name not in self.oseries.index:
|
|
372
|
-
msg = (
|
|
373
|
-
f"Cannot add model because oseries '{name}' is not contained in store."
|
|
374
|
-
)
|
|
375
|
-
raise LookupError(msg)
|
|
376
|
-
# expensive check
|
|
377
|
-
if self.CHECK_MODEL_SERIES_VALUES and isinstance(ml, ps.Model):
|
|
378
|
-
s_org = self.get_oseries(name).squeeze().dropna()
|
|
379
|
-
if PASTAS_LEQ_022:
|
|
380
|
-
so = ml.oseries.series_original
|
|
381
|
-
else:
|
|
382
|
-
so = ml.oseries._series_original
|
|
383
|
-
try:
|
|
384
|
-
assert_series_equal(
|
|
385
|
-
so.dropna(),
|
|
386
|
-
s_org,
|
|
387
|
-
atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
|
|
388
|
-
rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
|
|
389
|
-
)
|
|
390
|
-
except AssertionError as e:
|
|
391
|
-
raise ValueError(
|
|
392
|
-
f"Cannot add model because model oseries '{name}'"
|
|
393
|
-
" is different from stored oseries! See stacktrace for differences."
|
|
394
|
-
) from e
|
|
395
|
-
|
|
396
|
-
def _check_stresses_in_store(self, ml: Union[ps.Model, dict]):
|
|
397
|
-
"""Check if stresses time series are contained in PastaStore (internal method).
|
|
398
|
-
|
|
399
|
-
Parameters
|
|
400
|
-
----------
|
|
401
|
-
ml : Union[ps.Model, dict]
|
|
402
|
-
pastas Model
|
|
403
|
-
"""
|
|
404
|
-
prec_evap_model = ["RechargeModel", "TarsoModel"]
|
|
405
|
-
if isinstance(ml, ps.Model):
|
|
406
|
-
for sm in ml.stressmodels.values():
|
|
407
|
-
if sm._name in prec_evap_model:
|
|
408
|
-
stresses = [sm.prec, sm.evap]
|
|
409
|
-
else:
|
|
410
|
-
stresses = sm.stress
|
|
411
|
-
for s in stresses:
|
|
412
|
-
if str(s.name) not in self.stresses.index:
|
|
413
|
-
msg = (
|
|
414
|
-
f"Cannot add model because stress '{s.name}' "
|
|
415
|
-
"is not contained in store."
|
|
416
|
-
)
|
|
417
|
-
raise LookupError(msg)
|
|
418
|
-
if self.CHECK_MODEL_SERIES_VALUES:
|
|
419
|
-
s_org = self.get_stresses(s.name).squeeze()
|
|
420
|
-
if PASTAS_LEQ_022:
|
|
421
|
-
so = s.series_original
|
|
422
|
-
else:
|
|
423
|
-
so = s._series_original
|
|
424
|
-
try:
|
|
425
|
-
assert_series_equal(
|
|
426
|
-
so,
|
|
427
|
-
s_org,
|
|
428
|
-
atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
|
|
429
|
-
rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
|
|
430
|
-
)
|
|
431
|
-
except AssertionError as e:
|
|
432
|
-
raise ValueError(
|
|
433
|
-
f"Cannot add model because model stress "
|
|
434
|
-
f"'{s.name}' is different from stored stress! "
|
|
435
|
-
"See stacktrace for differences."
|
|
436
|
-
) from e
|
|
437
|
-
elif isinstance(ml, dict):
|
|
438
|
-
for sm in ml["stressmodels"].values():
|
|
439
|
-
classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
|
|
440
|
-
if sm[classkey] in prec_evap_model:
|
|
441
|
-
stresses = [sm["prec"], sm["evap"]]
|
|
442
|
-
elif sm[classkey] in ["WellModel"]:
|
|
443
|
-
stresses = sm["stress"]
|
|
444
|
-
else:
|
|
445
|
-
stresses = sm["stress"] if PASTAS_LEQ_022 else [sm["stress"]]
|
|
446
|
-
for s in stresses:
|
|
447
|
-
if str(s["name"]) not in self.stresses.index:
|
|
448
|
-
msg = (
|
|
449
|
-
f"Cannot add model because stress '{s['name']}' "
|
|
450
|
-
"is not contained in store."
|
|
451
|
-
)
|
|
452
|
-
raise LookupError(msg)
|
|
453
|
-
else:
|
|
454
|
-
raise TypeError("Expected pastas.Model or dict!")
|
|
455
|
-
|
|
456
|
-
def _stored_series_to_json(
|
|
457
|
-
self,
|
|
458
|
-
libname: str,
|
|
459
|
-
names: Optional[Union[list, str]] = None,
|
|
460
|
-
squeeze: bool = True,
|
|
461
|
-
progressbar: bool = False,
|
|
462
|
-
):
|
|
463
|
-
"""Write stored series to JSON.
|
|
464
|
-
|
|
465
|
-
Parameters
|
|
466
|
-
----------
|
|
467
|
-
libname : str
|
|
468
|
-
library name
|
|
469
|
-
names : Optional[Union[list, str]], optional
|
|
470
|
-
names of series, by default None
|
|
471
|
-
squeeze : bool, optional
|
|
472
|
-
return single entry as json string instead
|
|
473
|
-
of list, by default True
|
|
474
|
-
progressbar : bool, optional
|
|
475
|
-
show progressbar, by default False
|
|
476
|
-
|
|
477
|
-
Returns
|
|
478
|
-
-------
|
|
479
|
-
files : list or str
|
|
480
|
-
list of series converted to JSON string or single string
|
|
481
|
-
if single entry is returned and squeeze is True
|
|
482
|
-
"""
|
|
483
|
-
names = self._parse_names(names, libname=libname)
|
|
484
|
-
files = []
|
|
485
|
-
for n in tqdm(names, desc=libname) if progressbar else names:
|
|
486
|
-
s = self._get_series(libname, n, progressbar=False)
|
|
487
|
-
if isinstance(s, pd.Series):
|
|
488
|
-
s = s.to_frame()
|
|
489
|
-
try:
|
|
490
|
-
sjson = s.to_json(orient="columns")
|
|
491
|
-
except ValueError as e:
|
|
492
|
-
msg = (
|
|
493
|
-
f"DatetimeIndex of '{n}' probably contains NaT "
|
|
494
|
-
"or duplicate timestamps!"
|
|
495
|
-
)
|
|
496
|
-
raise ValueError(msg) from e
|
|
497
|
-
files.append(sjson)
|
|
498
|
-
if len(files) == 1 and squeeze:
|
|
499
|
-
return files[0]
|
|
500
|
-
else:
|
|
501
|
-
return files
|
|
502
|
-
|
|
503
|
-
def _stored_metadata_to_json(
|
|
504
|
-
self,
|
|
505
|
-
libname: str,
|
|
506
|
-
names: Optional[Union[list, str]] = None,
|
|
507
|
-
squeeze: bool = True,
|
|
508
|
-
progressbar: bool = False,
|
|
509
|
-
):
|
|
510
|
-
"""Write metadata from stored series to JSON.
|
|
511
|
-
|
|
512
|
-
Parameters
|
|
513
|
-
----------
|
|
514
|
-
libname : str
|
|
515
|
-
library containing series
|
|
516
|
-
names : Optional[Union[list, str]], optional
|
|
517
|
-
names to parse, by default None
|
|
518
|
-
squeeze : bool, optional
|
|
519
|
-
return single entry as json string instead of list, by default True
|
|
520
|
-
progressbar : bool, optional
|
|
521
|
-
show progressbar, by default False
|
|
522
|
-
|
|
523
|
-
Returns
|
|
524
|
-
-------
|
|
525
|
-
files : list or str
|
|
526
|
-
list of json string
|
|
527
|
-
"""
|
|
528
|
-
names = self._parse_names(names, libname=libname)
|
|
529
|
-
files = []
|
|
530
|
-
for n in tqdm(names, desc=libname) if progressbar else names:
|
|
531
|
-
meta = self.get_metadata(libname, n, as_frame=False)
|
|
532
|
-
meta_json = json.dumps(meta, cls=PastasEncoder, indent=4)
|
|
533
|
-
files.append(meta_json)
|
|
534
|
-
if len(files) == 1 and squeeze:
|
|
535
|
-
return files[0]
|
|
536
|
-
else:
|
|
537
|
-
return files
|
|
538
|
-
|
|
539
|
-
def _series_to_archive(
|
|
540
|
-
self,
|
|
541
|
-
archive,
|
|
542
|
-
libname: str,
|
|
543
|
-
names: Optional[Union[list, str]] = None,
|
|
544
|
-
progressbar: bool = True,
|
|
545
|
-
):
|
|
546
|
-
"""Write DataFrame or Series to zipfile (internal method).
|
|
547
|
-
|
|
548
|
-
Parameters
|
|
549
|
-
----------
|
|
550
|
-
archive : zipfile.ZipFile
|
|
551
|
-
reference to an archive to write data to
|
|
552
|
-
libname : str
|
|
553
|
-
name of the library to write to zipfile
|
|
554
|
-
names : str or list of str, optional
|
|
555
|
-
names of the time series to write to archive, by default None,
|
|
556
|
-
which writes all time series to archive
|
|
557
|
-
progressbar : bool, optional
|
|
558
|
-
show progressbar, by default True
|
|
559
|
-
"""
|
|
560
|
-
names = self._parse_names(names, libname=libname)
|
|
561
|
-
for n in tqdm(names, desc=libname) if progressbar else names:
|
|
562
|
-
sjson = self._stored_series_to_json(
|
|
563
|
-
libname, names=n, progressbar=False, squeeze=True
|
|
564
|
-
)
|
|
565
|
-
meta_json = self._stored_metadata_to_json(
|
|
566
|
-
libname, names=n, progressbar=False, squeeze=True
|
|
567
|
-
)
|
|
568
|
-
archive.writestr(f"{libname}/{n}.pas", sjson)
|
|
569
|
-
archive.writestr(f"{libname}/{n}_meta.pas", meta_json)
|
|
570
|
-
|
|
571
|
-
def _models_to_archive(self, archive, names=None, progressbar=True):
|
|
572
|
-
"""Write pastas.Model to zipfile (internal method).
|
|
573
|
-
|
|
574
|
-
Parameters
|
|
575
|
-
----------
|
|
576
|
-
archive : zipfile.ZipFile
|
|
577
|
-
reference to an archive to write data to
|
|
578
|
-
names : str or list of str, optional
|
|
579
|
-
names of the models to write to archive, by default None,
|
|
580
|
-
which writes all models to archive
|
|
581
|
-
progressbar : bool, optional
|
|
582
|
-
show progressbar, by default True
|
|
583
|
-
"""
|
|
584
|
-
names = self._parse_names(names, libname="models")
|
|
585
|
-
for n in tqdm(names, desc="models") if progressbar else names:
|
|
586
|
-
m = self.get_models(n, return_dict=True)
|
|
587
|
-
jsondict = json.dumps(m, cls=PastasEncoder, indent=4)
|
|
588
|
-
archive.writestr(f"models/{n}.pas", jsondict)
|
|
589
|
-
|
|
590
|
-
@staticmethod
|
|
591
|
-
def _series_from_json(fjson: str, squeeze: bool = True):
|
|
592
|
-
"""Load time series from JSON.
|
|
593
|
-
|
|
594
|
-
Parameters
|
|
595
|
-
----------
|
|
596
|
-
fjson : str
|
|
597
|
-
path to file
|
|
598
|
-
squeeze : bool, optional
|
|
599
|
-
squeeze time series object to obtain pandas Series
|
|
600
|
-
|
|
601
|
-
Returns
|
|
602
|
-
-------
|
|
603
|
-
s : pd.DataFrame
|
|
604
|
-
DataFrame containing time series
|
|
605
|
-
"""
|
|
606
|
-
s = pd.read_json(fjson, orient="columns", precise_float=True, dtype=False)
|
|
607
|
-
if not isinstance(s.index, pd.DatetimeIndex):
|
|
608
|
-
s.index = pd.to_datetime(s.index, unit="ms")
|
|
609
|
-
s = s.sort_index() # needed for some reason ...
|
|
610
|
-
if squeeze:
|
|
611
|
-
return s.squeeze(axis="columns")
|
|
612
|
-
return s
|
|
613
|
-
|
|
614
|
-
@staticmethod
|
|
615
|
-
def _metadata_from_json(fjson: str):
|
|
616
|
-
"""Load metadata dictionary from JSON.
|
|
617
|
-
|
|
618
|
-
Parameters
|
|
619
|
-
----------
|
|
620
|
-
fjson : str
|
|
621
|
-
path to file
|
|
622
|
-
|
|
623
|
-
Returns
|
|
624
|
-
-------
|
|
625
|
-
meta : dict
|
|
626
|
-
dictionary containing metadata
|
|
627
|
-
"""
|
|
628
|
-
with open(fjson, "r") as f:
|
|
629
|
-
meta = json.load(f)
|
|
630
|
-
return meta
|
|
631
|
-
|
|
632
|
-
def _get_model_orphans(self):
|
|
633
|
-
"""Get models whose oseries no longer exist in database.
|
|
634
|
-
|
|
635
|
-
Returns
|
|
636
|
-
-------
|
|
637
|
-
dict
|
|
638
|
-
dictionary with oseries names as keys and lists of model names
|
|
639
|
-
as values
|
|
640
|
-
"""
|
|
641
|
-
d = {}
|
|
642
|
-
for mlnam in tqdm(self.model_names, desc="Identifying model orphans"):
|
|
643
|
-
mdict = self.get_models(mlnam, return_dict=True)
|
|
644
|
-
onam = mdict["oseries"]["name"]
|
|
645
|
-
if onam not in self.oseries_names:
|
|
646
|
-
if onam in d:
|
|
647
|
-
d[onam] = d[onam].append(mlnam)
|
|
648
|
-
else:
|
|
649
|
-
d[onam] = [mlnam]
|
|
650
|
-
return d
|
|
38
|
+
class ParallelUtil:
|
|
39
|
+
"""Mix-in class for storing parallelizable methods."""
|
|
651
40
|
|
|
652
41
|
@staticmethod
|
|
653
42
|
def _solve_model(
|
|
@@ -675,11 +64,11 @@ class ConnectorUtil:
|
|
|
675
64
|
arguments are passed to the solve method.
|
|
676
65
|
"""
|
|
677
66
|
if connector is not None:
|
|
678
|
-
|
|
67
|
+
_conn = connector
|
|
679
68
|
else:
|
|
680
|
-
|
|
69
|
+
_conn = globals()["conn"]
|
|
681
70
|
|
|
682
|
-
ml =
|
|
71
|
+
ml = _conn.get_models(ml_name)
|
|
683
72
|
m_kwargs = {}
|
|
684
73
|
for key, value in kwargs.items():
|
|
685
74
|
if isinstance(value, pd.Series):
|
|
@@ -693,14 +82,14 @@ class ConnectorUtil:
|
|
|
693
82
|
|
|
694
83
|
try:
|
|
695
84
|
ml.solve(report=report, **m_kwargs)
|
|
696
|
-
except Exception as e:
|
|
85
|
+
except Exception as e: # pylint: disable=broad-except
|
|
697
86
|
if ignore_solve_errors:
|
|
698
|
-
warning = "Solve error ignored for '
|
|
87
|
+
warning = f"Solve error ignored for '{ml.name}': {e}"
|
|
699
88
|
logger.warning(warning)
|
|
700
89
|
else:
|
|
701
90
|
raise e
|
|
702
|
-
|
|
703
|
-
|
|
91
|
+
# store the updated model back in the database
|
|
92
|
+
_conn.add_model(ml, overwrite=True)
|
|
704
93
|
|
|
705
94
|
@staticmethod
|
|
706
95
|
def _get_statistics(
|
|
@@ -717,13 +106,14 @@ class ConnectorUtil:
|
|
|
717
106
|
of the apply method.
|
|
718
107
|
"""
|
|
719
108
|
if connector is not None:
|
|
720
|
-
|
|
109
|
+
_conn = connector
|
|
721
110
|
else:
|
|
722
|
-
|
|
111
|
+
_conn = globals()["conn"]
|
|
723
112
|
|
|
724
|
-
ml =
|
|
113
|
+
ml = _conn.get_model(name)
|
|
725
114
|
series = pd.Series(index=statistics, dtype=float)
|
|
726
115
|
for stat in statistics:
|
|
116
|
+
# Note: ml.stats is part of pastas.Model public API
|
|
727
117
|
series.loc[stat] = getattr(ml.stats, stat)(**kwargs)
|
|
728
118
|
return series
|
|
729
119
|
|
|
@@ -739,15 +129,18 @@ class ConnectorUtil:
|
|
|
739
129
|
min(32, os.cpu_count() + 4) if max_workers is None else max_workers
|
|
740
130
|
)
|
|
741
131
|
if chunksize is None:
|
|
742
|
-
|
|
132
|
+
# 14 chunks per worker balances overhead vs granularity
|
|
133
|
+
# from stackoverflow link posted in docstring.
|
|
134
|
+
CHUNKS_PER_WORKER = 14
|
|
135
|
+
num_chunks = max_workers * CHUNKS_PER_WORKER
|
|
743
136
|
chunksize = max(njobs // num_chunks, 1)
|
|
744
137
|
return max_workers, chunksize
|
|
745
138
|
|
|
746
139
|
|
|
747
|
-
class ArcticDBConnector(BaseConnector,
|
|
140
|
+
class ArcticDBConnector(BaseConnector, ParallelUtil):
|
|
748
141
|
"""ArcticDBConnector object using ArcticDB to store data."""
|
|
749
142
|
|
|
750
|
-
|
|
143
|
+
_conn_type = "arcticdb"
|
|
751
144
|
|
|
752
145
|
def __init__(self, name: str, uri: str, verbose: bool = True):
|
|
753
146
|
"""Create an ArcticDBConnector object using ArcticDB to store data.
|
|
@@ -759,39 +152,48 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
759
152
|
uri : str
|
|
760
153
|
URI connection string (e.g. 'lmdb://<your path here>')
|
|
761
154
|
verbose : bool, optional
|
|
762
|
-
whether to
|
|
155
|
+
whether to log messages when database is initialized, by default True
|
|
763
156
|
"""
|
|
764
157
|
try:
|
|
765
158
|
import arcticdb
|
|
766
159
|
|
|
767
160
|
except ModuleNotFoundError as e:
|
|
768
|
-
|
|
161
|
+
logger.error("Please install arcticdb with `pip install arcticdb`!")
|
|
769
162
|
raise e
|
|
163
|
+
super().__init__()
|
|
770
164
|
self.uri = uri
|
|
771
165
|
self.name = name
|
|
772
166
|
|
|
167
|
+
# initialize validator class to check inputs
|
|
168
|
+
self._validator = Validator(self)
|
|
169
|
+
|
|
170
|
+
# create libraries
|
|
773
171
|
self.libs: dict = {}
|
|
774
172
|
self.arc = arcticdb.Arctic(uri)
|
|
775
173
|
self._initialize(verbose=verbose)
|
|
776
174
|
self.models = ModelAccessor(self)
|
|
777
175
|
# for older versions of PastaStore, if oseries_models library is empty
|
|
778
176
|
# populate oseries - models database
|
|
779
|
-
self.
|
|
177
|
+
self._update_time_series_model_links()
|
|
780
178
|
# write pstore file to store database info that can be used to load pstore
|
|
781
179
|
if "lmdb" in self.uri:
|
|
782
180
|
self.write_pstore_config_file()
|
|
783
181
|
|
|
784
182
|
def _initialize(self, verbose: bool = True) -> None:
|
|
785
183
|
"""Initialize the libraries (internal method)."""
|
|
184
|
+
if "lmdb" in self.uri.lower(): # only check for LMDB
|
|
185
|
+
self.validator.check_config_connector_type(
|
|
186
|
+
Path(self.uri.split("://")[1]) / self.name
|
|
187
|
+
)
|
|
786
188
|
for libname in self._default_library_names:
|
|
787
189
|
if self._library_name(libname) not in self.arc.list_libraries():
|
|
788
190
|
self.arc.create_library(self._library_name(libname))
|
|
789
191
|
else:
|
|
790
192
|
if verbose:
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
193
|
+
logger.info(
|
|
194
|
+
"ArcticDBConnector: library '%s' already exists. "
|
|
195
|
+
"Linking to existing library.",
|
|
196
|
+
self._library_name(libname),
|
|
795
197
|
)
|
|
796
198
|
self.libs[libname] = self._get_library(libname)
|
|
797
199
|
|
|
@@ -809,20 +211,21 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
809
211
|
"uri": self.uri,
|
|
810
212
|
}
|
|
811
213
|
if path is None and "lmdb" in self.uri:
|
|
812
|
-
path = self.uri.split("://")[1]
|
|
214
|
+
path = Path(self.uri.split("://")[1])
|
|
813
215
|
elif path is None and "lmdb" not in self.uri:
|
|
814
216
|
raise ValueError("Please provide a path to write the pastastore file!")
|
|
815
217
|
|
|
816
|
-
with open(
|
|
817
|
-
|
|
218
|
+
with (path / self.name / f"{self.name}.pastastore").open(
|
|
219
|
+
"w",
|
|
220
|
+
encoding="utf-8",
|
|
818
221
|
) as f:
|
|
819
222
|
json.dump(config, f)
|
|
820
223
|
|
|
821
|
-
def _library_name(self, libname:
|
|
224
|
+
def _library_name(self, libname: AllLibs) -> str:
|
|
822
225
|
"""Get full library name according to ArcticDB (internal method)."""
|
|
823
226
|
return ".".join([self.name, libname])
|
|
824
227
|
|
|
825
|
-
def _get_library(self, libname:
|
|
228
|
+
def _get_library(self, libname: AllLibs):
|
|
826
229
|
"""Get ArcticDB library handle.
|
|
827
230
|
|
|
828
231
|
Parameters
|
|
@@ -836,13 +239,15 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
836
239
|
handle to the library
|
|
837
240
|
"""
|
|
838
241
|
# get library handle
|
|
839
|
-
|
|
840
|
-
|
|
242
|
+
if libname in self.libs:
|
|
243
|
+
return self.libs[libname]
|
|
244
|
+
else:
|
|
245
|
+
return self.arc.get_library(self._library_name(libname))
|
|
841
246
|
|
|
842
247
|
def _add_item(
|
|
843
248
|
self,
|
|
844
|
-
libname:
|
|
845
|
-
item: Union[
|
|
249
|
+
libname: AllLibs,
|
|
250
|
+
item: Union[FrameOrSeriesUnion, Dict],
|
|
846
251
|
name: str,
|
|
847
252
|
metadata: Optional[Dict] = None,
|
|
848
253
|
**_,
|
|
@@ -861,6 +266,10 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
861
266
|
dictionary containing metadata, by default None
|
|
862
267
|
"""
|
|
863
268
|
lib = self._get_library(libname)
|
|
269
|
+
|
|
270
|
+
# check file name for illegal characters
|
|
271
|
+
name = self.validator.check_filename_illegal_chars(libname, name)
|
|
272
|
+
|
|
864
273
|
# only normalizable datatypes can be written with write, else use write_pickle
|
|
865
274
|
# normalizable: Series, DataFrames, Numpy Arrays
|
|
866
275
|
if isinstance(item, (dict, list)):
|
|
@@ -868,7 +277,7 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
868
277
|
else:
|
|
869
278
|
lib.write(name, item, metadata=metadata)
|
|
870
279
|
|
|
871
|
-
def _get_item(self, libname:
|
|
280
|
+
def _get_item(self, libname: AllLibs, name: str) -> Union[FrameOrSeriesUnion, Dict]:
|
|
872
281
|
"""Retrieve item from library (internal method).
|
|
873
282
|
|
|
874
283
|
Parameters
|
|
@@ -886,7 +295,7 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
886
295
|
lib = self._get_library(libname)
|
|
887
296
|
return lib.read(name).data
|
|
888
297
|
|
|
889
|
-
def _del_item(self, libname:
|
|
298
|
+
def _del_item(self, libname: AllLibs, name: str, force: bool = False) -> None:
|
|
890
299
|
"""Delete items (series or models) (internal method).
|
|
891
300
|
|
|
892
301
|
Parameters
|
|
@@ -895,11 +304,15 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
895
304
|
name of library to delete item from
|
|
896
305
|
name : str
|
|
897
306
|
name of item to delete
|
|
307
|
+
force : bool, optional
|
|
308
|
+
force deletion even if series is used in models, by default False
|
|
898
309
|
"""
|
|
899
310
|
lib = self._get_library(libname)
|
|
311
|
+
if self.validator.PROTECT_SERIES_IN_MODELS and not force:
|
|
312
|
+
self.validator.check_series_in_models(libname, name)
|
|
900
313
|
lib.delete(name)
|
|
901
314
|
|
|
902
|
-
def _get_metadata(self, libname:
|
|
315
|
+
def _get_metadata(self, libname: TimeSeriesLibs, name: str) -> dict:
|
|
903
316
|
"""Retrieve metadata for an item (internal method).
|
|
904
317
|
|
|
905
318
|
Parameters
|
|
@@ -931,6 +344,20 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
931
344
|
|
|
932
345
|
Does not return results, so function must store results in database.
|
|
933
346
|
|
|
347
|
+
Note
|
|
348
|
+
----
|
|
349
|
+
ArcticDB connection objects cannot be pickled, which is required for
|
|
350
|
+
multiprocessing. This implementation uses an initializer function that
|
|
351
|
+
creates a new ArcticDBConnector instance in each worker process and stores
|
|
352
|
+
it in the global `conn` variable. User-provided functions can access this
|
|
353
|
+
connector via the global `conn` variable.
|
|
354
|
+
|
|
355
|
+
This is the standard Python multiprocessing pattern for unpicklable objects.
|
|
356
|
+
See: https://docs.python.org/3/library/concurrent.futures.html#processpoolexecutor
|
|
357
|
+
|
|
358
|
+
For a connector that supports direct method passing (no global variable
|
|
359
|
+
required), use PasConnector instead.
|
|
360
|
+
|
|
934
361
|
Parameters
|
|
935
362
|
----------
|
|
936
363
|
func : function
|
|
@@ -948,13 +375,13 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
948
375
|
desc : str, optional
|
|
949
376
|
description for progressbar, by default ""
|
|
950
377
|
"""
|
|
951
|
-
max_workers, chunksize =
|
|
378
|
+
max_workers, chunksize = self._get_max_workers_and_chunksize(
|
|
952
379
|
max_workers, len(names), chunksize
|
|
953
380
|
)
|
|
954
381
|
|
|
955
382
|
def initializer(*args):
|
|
956
|
-
global
|
|
957
|
-
conn = ArcticDBConnector(*args)
|
|
383
|
+
# assign to module-level variable without using 'global' statement
|
|
384
|
+
globals()["conn"] = ArcticDBConnector(*args)
|
|
958
385
|
|
|
959
386
|
initargs = (self.name, self.uri, False)
|
|
960
387
|
|
|
@@ -981,49 +408,26 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
981
408
|
)
|
|
982
409
|
return result
|
|
983
410
|
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
"""List of oseries names.
|
|
987
|
-
|
|
988
|
-
Returns
|
|
989
|
-
-------
|
|
990
|
-
list
|
|
991
|
-
list of oseries in library
|
|
992
|
-
"""
|
|
993
|
-
return self._get_library("oseries").list_symbols()
|
|
994
|
-
|
|
995
|
-
@property
|
|
996
|
-
def stresses_names(self):
|
|
997
|
-
"""List of stresses names.
|
|
411
|
+
def _list_symbols(self, libname: AllLibs) -> List[str]:
|
|
412
|
+
"""List symbols in a library (internal method).
|
|
998
413
|
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
"""
|
|
1004
|
-
return self._get_library("stresses").list_symbols()
|
|
1005
|
-
|
|
1006
|
-
@property
|
|
1007
|
-
def model_names(self):
|
|
1008
|
-
"""List of model names.
|
|
414
|
+
Parameters
|
|
415
|
+
----------
|
|
416
|
+
libname : str
|
|
417
|
+
name of the library
|
|
1009
418
|
|
|
1010
419
|
Returns
|
|
1011
420
|
-------
|
|
1012
421
|
list
|
|
1013
|
-
list of
|
|
422
|
+
list of symbols in the library
|
|
1014
423
|
"""
|
|
1015
|
-
return self._get_library(
|
|
424
|
+
return self._get_library(libname).list_symbols()
|
|
1016
425
|
|
|
1017
|
-
@property
|
|
1018
|
-
def oseries_with_models(self):
|
|
1019
|
-
"""List of oseries with models."""
|
|
1020
|
-
return self._get_library("oseries_models").list_symbols()
|
|
1021
426
|
|
|
1022
|
-
|
|
1023
|
-
class DictConnector(BaseConnector, ConnectorUtil):
|
|
427
|
+
class DictConnector(BaseConnector, ParallelUtil):
|
|
1024
428
|
"""DictConnector object that stores timeseries and models in dictionaries."""
|
|
1025
429
|
|
|
1026
|
-
|
|
430
|
+
_conn_type = "dict"
|
|
1027
431
|
|
|
1028
432
|
def __init__(self, name: str = "pastas_db"):
|
|
1029
433
|
"""Create DictConnector object that stores data in dictionaries.
|
|
@@ -1033,17 +437,19 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1033
437
|
name : str, optional
|
|
1034
438
|
user-specified name of the connector
|
|
1035
439
|
"""
|
|
440
|
+
super().__init__()
|
|
1036
441
|
self.name = name
|
|
1037
442
|
|
|
1038
443
|
# create empty dictionaries for series and models
|
|
1039
444
|
for val in self._default_library_names:
|
|
1040
445
|
setattr(self, "lib_" + val, {})
|
|
446
|
+
self._validator = Validator(self)
|
|
1041
447
|
self.models = ModelAccessor(self)
|
|
1042
448
|
# for older versions of PastaStore, if oseries_models library is empty
|
|
1043
449
|
# populate oseries - models database
|
|
1044
|
-
self.
|
|
450
|
+
self._update_time_series_model_links()
|
|
1045
451
|
|
|
1046
|
-
def _get_library(self, libname:
|
|
452
|
+
def _get_library(self, libname: AllLibs):
|
|
1047
453
|
"""Get reference to dictionary holding data.
|
|
1048
454
|
|
|
1049
455
|
Parameters
|
|
@@ -1061,7 +467,7 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1061
467
|
def _add_item(
|
|
1062
468
|
self,
|
|
1063
469
|
libname: str,
|
|
1064
|
-
item: Union[
|
|
470
|
+
item: Union[FrameOrSeriesUnion, Dict],
|
|
1065
471
|
name: str,
|
|
1066
472
|
metadata: Optional[Dict] = None,
|
|
1067
473
|
**_,
|
|
@@ -1080,12 +486,16 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1080
486
|
dictionary containing metadata, by default None
|
|
1081
487
|
"""
|
|
1082
488
|
lib = self._get_library(libname)
|
|
1083
|
-
|
|
489
|
+
|
|
490
|
+
# check file name for illegal characters
|
|
491
|
+
name = self.validator.check_filename_illegal_chars(libname, name)
|
|
492
|
+
|
|
493
|
+
if libname in ["models", "oseries_models", "stresses_models"]:
|
|
1084
494
|
lib[name] = item
|
|
1085
495
|
else:
|
|
1086
496
|
lib[name] = (metadata, item)
|
|
1087
497
|
|
|
1088
|
-
def _get_item(self, libname:
|
|
498
|
+
def _get_item(self, libname: AllLibs, name: str) -> Union[FrameOrSeriesUnion, Dict]:
|
|
1089
499
|
"""Retrieve item from database (internal method).
|
|
1090
500
|
|
|
1091
501
|
Parameters
|
|
@@ -1098,16 +508,18 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1098
508
|
Returns
|
|
1099
509
|
-------
|
|
1100
510
|
item : Union[FrameorSeriesUnion, Dict]
|
|
1101
|
-
time series or model dictionary
|
|
511
|
+
time series or model dictionary, modifying the returned object will not
|
|
512
|
+
affect the stored data, like in a real database
|
|
1102
513
|
"""
|
|
1103
514
|
lib = self._get_library(libname)
|
|
1104
|
-
|
|
515
|
+
# deepcopy calls are needed to ensure users cannot change "stored" items
|
|
516
|
+
if libname in ["models", "oseries_models", "stresses_models"]:
|
|
1105
517
|
item = deepcopy(lib[name])
|
|
1106
518
|
else:
|
|
1107
519
|
item = deepcopy(lib[name][1])
|
|
1108
520
|
return item
|
|
1109
521
|
|
|
1110
|
-
def _del_item(self, libname:
|
|
522
|
+
def _del_item(self, libname: AllLibs, name: str, force: bool = False) -> None:
|
|
1111
523
|
"""Delete items (series or models) (internal method).
|
|
1112
524
|
|
|
1113
525
|
Parameters
|
|
@@ -1116,11 +528,16 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1116
528
|
name of library to delete item from
|
|
1117
529
|
name : str
|
|
1118
530
|
name of item to delete
|
|
531
|
+
force : bool, optional
|
|
532
|
+
if True, force delete item and do not perform check if series
|
|
533
|
+
is used in a model, by default False
|
|
1119
534
|
"""
|
|
535
|
+
if self.validator.PROTECT_SERIES_IN_MODELS and not force:
|
|
536
|
+
self.validator.check_series_in_models(libname, name)
|
|
1120
537
|
lib = self._get_library(libname)
|
|
1121
538
|
_ = lib.pop(name)
|
|
1122
539
|
|
|
1123
|
-
def _get_metadata(self, libname:
|
|
540
|
+
def _get_metadata(self, libname: TimeSeriesLibs, name: str) -> dict:
|
|
1124
541
|
"""Read metadata (internal method).
|
|
1125
542
|
|
|
1126
543
|
Parameters
|
|
@@ -1140,40 +557,40 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1140
557
|
return imeta
|
|
1141
558
|
|
|
1142
559
|
def _parallel(self, *args, **kwargs) -> None:
|
|
560
|
+
"""Parallel implementation method.
|
|
561
|
+
|
|
562
|
+
Raises
|
|
563
|
+
------
|
|
564
|
+
NotImplementedError
|
|
565
|
+
DictConnector uses in-memory storage that cannot be shared across
|
|
566
|
+
processes. Use PasConnector or ArcticDBConnector for parallel operations.
|
|
567
|
+
"""
|
|
1143
568
|
raise NotImplementedError(
|
|
1144
569
|
"DictConnector does not support parallel processing,"
|
|
1145
570
|
" use PasConnector or ArcticDBConnector."
|
|
1146
571
|
)
|
|
1147
572
|
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
"""List of oseries names."""
|
|
1151
|
-
lib = self._get_library("oseries")
|
|
1152
|
-
return list(lib.keys())
|
|
1153
|
-
|
|
1154
|
-
@property
|
|
1155
|
-
def stresses_names(self):
|
|
1156
|
-
"""List of stresses names."""
|
|
1157
|
-
lib = self._get_library("stresses")
|
|
1158
|
-
return list(lib.keys())
|
|
573
|
+
def _list_symbols(self, libname: AllLibs) -> List[str]:
|
|
574
|
+
"""List symbols in a library (internal method).
|
|
1159
575
|
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
return list(lib.keys())
|
|
576
|
+
Parameters
|
|
577
|
+
----------
|
|
578
|
+
libname : str
|
|
579
|
+
name of the library
|
|
1165
580
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
581
|
+
Returns
|
|
582
|
+
-------
|
|
583
|
+
list
|
|
584
|
+
list of symbols in the library
|
|
585
|
+
"""
|
|
586
|
+
lib = self._get_library(libname)
|
|
1170
587
|
return list(lib.keys())
|
|
1171
588
|
|
|
1172
589
|
|
|
1173
|
-
class PasConnector(BaseConnector,
|
|
590
|
+
class PasConnector(BaseConnector, ParallelUtil):
|
|
1174
591
|
"""PasConnector object that stores time series and models as JSON files on disk."""
|
|
1175
592
|
|
|
1176
|
-
|
|
593
|
+
_conn_type = "pas"
|
|
1177
594
|
|
|
1178
595
|
def __init__(self, name: str, path: str, verbose: bool = True):
|
|
1179
596
|
"""Create PasConnector object that stores data as JSON files on disk.
|
|
@@ -1190,47 +607,52 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1190
607
|
verbose : bool, optional
|
|
1191
608
|
whether to print message when database is initialized, by default True
|
|
1192
609
|
"""
|
|
610
|
+
# super().__init__()
|
|
1193
611
|
self.name = name
|
|
1194
|
-
self.parentdir = path
|
|
1195
|
-
self.path =
|
|
1196
|
-
self.relpath = os.path.relpath(self.
|
|
612
|
+
self.parentdir = Path(path)
|
|
613
|
+
self.path = (self.parentdir / self.name).absolute()
|
|
614
|
+
self.relpath = os.path.relpath(self.parentdir)
|
|
615
|
+
self._validator = Validator(self)
|
|
1197
616
|
self._initialize(verbose=verbose)
|
|
1198
617
|
self.models = ModelAccessor(self)
|
|
1199
618
|
# for older versions of PastaStore, if oseries_models library is empty
|
|
1200
619
|
# populate oseries_models library
|
|
1201
|
-
self.
|
|
620
|
+
self._update_time_series_model_links()
|
|
1202
621
|
# write pstore file to store database info that can be used to load pstore
|
|
1203
622
|
self._write_pstore_config_file()
|
|
1204
623
|
|
|
1205
624
|
def _initialize(self, verbose: bool = True) -> None:
|
|
1206
625
|
"""Initialize the libraries (internal method)."""
|
|
626
|
+
self.validator.check_config_connector_type(self.path)
|
|
1207
627
|
for val in self._default_library_names:
|
|
1208
|
-
libdir =
|
|
1209
|
-
if not
|
|
628
|
+
libdir = self.path / val
|
|
629
|
+
if not libdir.exists():
|
|
1210
630
|
if verbose:
|
|
1211
|
-
|
|
1212
|
-
|
|
631
|
+
logger.info(
|
|
632
|
+
"PasConnector: library '%s' created in '%s'", val, libdir
|
|
633
|
+
)
|
|
634
|
+
libdir.mkdir(parents=True, exist_ok=False)
|
|
1213
635
|
else:
|
|
1214
636
|
if verbose:
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
637
|
+
logger.info(
|
|
638
|
+
"PasConnector: library '%s' already exists. "
|
|
639
|
+
"Linking to existing directory: '%s'",
|
|
640
|
+
val,
|
|
641
|
+
libdir,
|
|
1218
642
|
)
|
|
1219
|
-
setattr(self, f"lib_{val}",
|
|
643
|
+
setattr(self, f"lib_{val}", self.path / val)
|
|
1220
644
|
|
|
1221
645
|
def _write_pstore_config_file(self):
|
|
1222
646
|
"""Write pstore configuration file to store database info."""
|
|
1223
647
|
config = {
|
|
1224
648
|
"connector_type": self.conn_type,
|
|
1225
649
|
"name": self.name,
|
|
1226
|
-
"path":
|
|
650
|
+
"path": str(self.parentdir.absolute()),
|
|
1227
651
|
}
|
|
1228
|
-
with open(
|
|
1229
|
-
os.path.join(self.path, f"{self.name}.pastastore"), "w", encoding="utf-8"
|
|
1230
|
-
) as f:
|
|
652
|
+
with (self.path / f"{self.name}.pastastore").open("w", encoding="utf-8") as f:
|
|
1231
653
|
json.dump(config, f)
|
|
1232
654
|
|
|
1233
|
-
def _get_library(self, libname:
|
|
655
|
+
def _get_library(self, libname: AllLibs) -> Path:
|
|
1234
656
|
"""Get path to directory holding data.
|
|
1235
657
|
|
|
1236
658
|
Parameters
|
|
@@ -1243,12 +665,12 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1243
665
|
lib : str
|
|
1244
666
|
path to library
|
|
1245
667
|
"""
|
|
1246
|
-
return getattr(self, "lib_" + libname)
|
|
668
|
+
return Path(getattr(self, "lib_" + libname))
|
|
1247
669
|
|
|
1248
670
|
def _add_item(
|
|
1249
671
|
self,
|
|
1250
672
|
libname: str,
|
|
1251
|
-
item: Union[
|
|
673
|
+
item: Union[FrameOrSeriesUnion, Dict],
|
|
1252
674
|
name: str,
|
|
1253
675
|
metadata: Optional[Dict] = None,
|
|
1254
676
|
**_,
|
|
@@ -1268,33 +690,41 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1268
690
|
"""
|
|
1269
691
|
lib = self._get_library(libname)
|
|
1270
692
|
|
|
693
|
+
# check file name for illegal characters
|
|
694
|
+
name = self.validator.check_filename_illegal_chars(libname, name)
|
|
695
|
+
|
|
1271
696
|
# time series
|
|
1272
697
|
if isinstance(item, pd.Series):
|
|
1273
698
|
item = item.to_frame()
|
|
1274
699
|
if isinstance(item, pd.DataFrame):
|
|
1275
700
|
sjson = item.to_json(orient="columns")
|
|
1276
|
-
|
|
1277
|
-
|
|
701
|
+
if name.endswith("_meta"):
|
|
702
|
+
raise ValueError(
|
|
703
|
+
"Time series name cannot end with '_meta'. "
|
|
704
|
+
"Please use a different name for your time series."
|
|
705
|
+
)
|
|
706
|
+
fname = lib / f"{name}.pas"
|
|
707
|
+
with fname.open("w", encoding="utf-8") as f:
|
|
1278
708
|
f.write(sjson)
|
|
1279
709
|
if metadata is not None:
|
|
1280
710
|
mjson = json.dumps(metadata, cls=PastasEncoder, indent=4)
|
|
1281
|
-
fname_meta =
|
|
1282
|
-
with open(
|
|
711
|
+
fname_meta = lib / f"{name}_meta.pas"
|
|
712
|
+
with fname_meta.open("w", encoding="utf-8") as m:
|
|
1283
713
|
m.write(mjson)
|
|
1284
714
|
# pastas model dict
|
|
1285
715
|
elif isinstance(item, dict):
|
|
1286
716
|
jsondict = json.dumps(item, cls=PastasEncoder, indent=4)
|
|
1287
|
-
fmodel =
|
|
1288
|
-
with open(
|
|
717
|
+
fmodel = lib / f"{name}.pas"
|
|
718
|
+
with fmodel.open("w", encoding="utf-8") as fm:
|
|
1289
719
|
fm.write(jsondict)
|
|
1290
|
-
# oseries_models list
|
|
720
|
+
# oseries_models or stresses_models list
|
|
1291
721
|
elif isinstance(item, list):
|
|
1292
722
|
jsondict = json.dumps(item)
|
|
1293
|
-
fname =
|
|
1294
|
-
with open(
|
|
723
|
+
fname = lib / f"{name}.pas"
|
|
724
|
+
with fname.open("w", encoding="utf-8") as fm:
|
|
1295
725
|
fm.write(jsondict)
|
|
1296
726
|
|
|
1297
|
-
def _get_item(self, libname:
|
|
727
|
+
def _get_item(self, libname: AllLibs, name: str) -> Union[FrameOrSeriesUnion, Dict]:
|
|
1298
728
|
"""Retrieve item (internal method).
|
|
1299
729
|
|
|
1300
730
|
Parameters
|
|
@@ -1310,24 +740,24 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1310
740
|
time series or model dictionary
|
|
1311
741
|
"""
|
|
1312
742
|
lib = self._get_library(libname)
|
|
1313
|
-
fjson =
|
|
1314
|
-
if not
|
|
743
|
+
fjson = lib / f"{name}.pas"
|
|
744
|
+
if not fjson.exists():
|
|
1315
745
|
msg = f"Item '{name}' not in '{libname}' library."
|
|
1316
746
|
raise FileNotFoundError(msg)
|
|
1317
747
|
# model
|
|
1318
748
|
if libname == "models":
|
|
1319
|
-
with open(
|
|
749
|
+
with fjson.open("r", encoding="utf-8") as ml_json:
|
|
1320
750
|
item = json.load(ml_json, object_hook=pastas_hook)
|
|
1321
751
|
# list of models per oseries
|
|
1322
|
-
elif libname
|
|
1323
|
-
with open(
|
|
752
|
+
elif libname in ["oseries_models", "stresses_models"]:
|
|
753
|
+
with fjson.open("r", encoding="utf-8") as f:
|
|
1324
754
|
item = json.load(f)
|
|
1325
755
|
# time series
|
|
1326
756
|
else:
|
|
1327
|
-
item =
|
|
757
|
+
item = series_from_json(fjson)
|
|
1328
758
|
return item
|
|
1329
759
|
|
|
1330
|
-
def _del_item(self, libname:
|
|
760
|
+
def _del_item(self, libname: AllLibs, name: str, force: bool = False) -> None:
|
|
1331
761
|
"""Delete items (series or models) (internal method).
|
|
1332
762
|
|
|
1333
763
|
Parameters
|
|
@@ -1336,18 +766,23 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1336
766
|
name of library to delete item from
|
|
1337
767
|
name : str
|
|
1338
768
|
name of item to delete
|
|
769
|
+
force : bool, optional
|
|
770
|
+
if True, force delete item and do not perform check if series
|
|
771
|
+
is used in a model, by default False
|
|
1339
772
|
"""
|
|
1340
773
|
lib = self._get_library(libname)
|
|
1341
|
-
|
|
774
|
+
if self.validator.PROTECT_SERIES_IN_MODELS and not force:
|
|
775
|
+
self.validator.check_series_in_models(libname, name)
|
|
776
|
+
(lib / f"{name}.pas").unlink()
|
|
1342
777
|
# remove metadata for time series
|
|
1343
|
-
if libname
|
|
778
|
+
if libname in ["oseries", "stresses"]:
|
|
1344
779
|
try:
|
|
1345
|
-
|
|
780
|
+
(lib / f"{name}_meta.pas").unlink()
|
|
1346
781
|
except FileNotFoundError:
|
|
1347
782
|
# Nothing to delete
|
|
1348
783
|
pass
|
|
1349
784
|
|
|
1350
|
-
def _get_metadata(self, libname:
|
|
785
|
+
def _get_metadata(self, libname: TimeSeriesLibs, name: str) -> dict:
|
|
1351
786
|
"""Read metadata (internal method).
|
|
1352
787
|
|
|
1353
788
|
Parameters
|
|
@@ -1363,9 +798,9 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1363
798
|
dictionary containing metadata
|
|
1364
799
|
"""
|
|
1365
800
|
lib = self._get_library(libname)
|
|
1366
|
-
mjson =
|
|
1367
|
-
if
|
|
1368
|
-
imeta =
|
|
801
|
+
mjson = lib / f"{name}_meta.pas"
|
|
802
|
+
if mjson.is_file():
|
|
803
|
+
imeta = metadata_from_json(mjson)
|
|
1369
804
|
else:
|
|
1370
805
|
imeta = {}
|
|
1371
806
|
return imeta
|
|
@@ -1399,7 +834,7 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1399
834
|
desc : str, optional
|
|
1400
835
|
description for progressbar, by default ""
|
|
1401
836
|
"""
|
|
1402
|
-
max_workers, chunksize =
|
|
837
|
+
max_workers, chunksize = self._get_max_workers_and_chunksize(
|
|
1403
838
|
max_workers, len(names), chunksize
|
|
1404
839
|
)
|
|
1405
840
|
|
|
@@ -1422,36 +857,18 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1422
857
|
)
|
|
1423
858
|
return result
|
|
1424
859
|
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
lib
|
|
1440
|
-
return [
|
|
1441
|
-
i[:-4]
|
|
1442
|
-
for i in os.listdir(lib)
|
|
1443
|
-
if i.endswith(".pas")
|
|
1444
|
-
if not i.endswith("_meta.pas")
|
|
1445
|
-
]
|
|
1446
|
-
|
|
1447
|
-
@property
|
|
1448
|
-
def model_names(self):
|
|
1449
|
-
"""List of model names."""
|
|
1450
|
-
lib = self._get_library("models")
|
|
1451
|
-
return [i[:-4] for i in os.listdir(lib) if i.endswith(".pas")]
|
|
1452
|
-
|
|
1453
|
-
@property
|
|
1454
|
-
def oseries_with_models(self):
|
|
1455
|
-
"""List of oseries with models."""
|
|
1456
|
-
lib = self._get_library("oseries_models")
|
|
1457
|
-
return [i[:-4] for i in os.listdir(lib) if i.endswith(".pas")]
|
|
860
|
+
def _list_symbols(self, libname: AllLibs) -> List[str]:
|
|
861
|
+
"""List symbols in a library (internal method).
|
|
862
|
+
|
|
863
|
+
Parameters
|
|
864
|
+
----------
|
|
865
|
+
libname : str
|
|
866
|
+
name of the library
|
|
867
|
+
|
|
868
|
+
Returns
|
|
869
|
+
-------
|
|
870
|
+
list
|
|
871
|
+
list of symbols in the library
|
|
872
|
+
"""
|
|
873
|
+
lib = self._get_library(libname)
|
|
874
|
+
return [i.stem for i in lib.glob("*.pas") if not i.stem.endswith("_meta")]
|