pastastore 1.10.2__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +10 -97
- pastastore/__init__.py +5 -1
- pastastore/base.py +875 -272
- pastastore/connectors.py +359 -816
- pastastore/datasets.py +23 -33
- pastastore/extensions/__init__.py +7 -3
- pastastore/extensions/hpd.py +39 -17
- pastastore/plotting.py +71 -38
- pastastore/store.py +205 -186
- pastastore/styling.py +4 -2
- pastastore/typing.py +12 -0
- pastastore/util.py +322 -88
- pastastore/validator.py +524 -0
- pastastore/version.py +2 -3
- pastastore/yaml_interface.py +37 -39
- {pastastore-1.10.2.dist-info → pastastore-1.12.0.dist-info}/METADATA +17 -11
- pastastore-1.12.0.dist-info/RECORD +31 -0
- {pastastore-1.10.2.dist-info → pastastore-1.12.0.dist-info}/WHEEL +1 -1
- tests/conftest.py +156 -59
- tests/test_001_import.py +2 -1
- tests/test_002_connectors.py +40 -3
- tests/test_003_pastastore.py +60 -29
- tests/test_005_maps_plots.py +12 -0
- tests/test_006_benchmark.py +1 -1
- tests/test_007_hpdextension.py +46 -8
- tests/test_009_parallel.py +393 -0
- pastastore-1.10.2.dist-info/RECORD +0 -28
- {pastastore-1.10.2.dist-info → pastastore-1.12.0.dist-info}/licenses/LICENSE +0 -0
- {pastastore-1.10.2.dist-info → pastastore-1.12.0.dist-info}/top_level.txt +0 -0
pastastore/connectors.py
CHANGED
|
@@ -4,650 +4,40 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
6
|
import warnings
|
|
7
|
-
from collections.abc import Iterable
|
|
8
7
|
from concurrent.futures import ProcessPoolExecutor
|
|
9
8
|
from copy import deepcopy
|
|
10
9
|
from functools import partial
|
|
10
|
+
from multiprocessing import Manager
|
|
11
|
+
from pathlib import Path
|
|
11
12
|
|
|
12
13
|
# import weakref
|
|
13
14
|
from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
14
15
|
|
|
15
16
|
import pandas as pd
|
|
16
|
-
import pastas as ps
|
|
17
|
-
from numpy import isin
|
|
18
|
-
from packaging.version import parse as parse_version
|
|
19
|
-
from pandas.testing import assert_series_equal
|
|
20
17
|
from pastas.io.pas import PastasEncoder, pastas_hook
|
|
21
18
|
from tqdm.auto import tqdm
|
|
22
19
|
from tqdm.contrib.concurrent import process_map
|
|
23
20
|
|
|
24
21
|
from pastastore.base import BaseConnector, ModelAccessor
|
|
25
|
-
from pastastore.
|
|
26
|
-
from pastastore.
|
|
22
|
+
from pastastore.typing import AllLibs, FrameOrSeriesUnion, TimeSeriesLibs
|
|
23
|
+
from pastastore.util import _custom_warning, metadata_from_json, series_from_json
|
|
24
|
+
from pastastore.validator import Validator
|
|
27
25
|
|
|
28
|
-
FrameorSeriesUnion = Union[pd.DataFrame, pd.Series]
|
|
29
26
|
warnings.showwarning = _custom_warning
|
|
30
27
|
|
|
31
28
|
logger = logging.getLogger(__name__)
|
|
32
29
|
|
|
30
|
+
# Global connector for multiprocessing workaround
|
|
31
|
+
# This is required for connectors (like ArcticDBConnector) that cannot be pickled.
|
|
32
|
+
# The initializer function in _parallel() sets this global variable in each worker
|
|
33
|
+
# process, allowing unpicklable connectors to be used with multiprocessing.
|
|
34
|
+
# See: https://docs.python.org/3/library/concurrent.futures.html#processpoolexecutor
|
|
35
|
+
# Note: Using simple None type to avoid circular import issues
|
|
36
|
+
conn = None
|
|
33
37
|
|
|
34
|
-
class ConnectorUtil:
|
|
35
|
-
"""Mix-in class for general Connector helper functions.
|
|
36
38
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
def _parse_names(
|
|
42
|
-
self,
|
|
43
|
-
names: Optional[Union[list, str]] = None,
|
|
44
|
-
libname: Optional[str] = "oseries",
|
|
45
|
-
) -> list:
|
|
46
|
-
"""Parse names kwarg, returns iterable with name(s) (internal method).
|
|
47
|
-
|
|
48
|
-
Parameters
|
|
49
|
-
----------
|
|
50
|
-
names : Union[list, str], optional
|
|
51
|
-
str or list of str or None or 'all' (last two options
|
|
52
|
-
retrieves all names)
|
|
53
|
-
libname : str, optional
|
|
54
|
-
name of library, default is 'oseries'
|
|
55
|
-
|
|
56
|
-
Returns
|
|
57
|
-
-------
|
|
58
|
-
list
|
|
59
|
-
list of names
|
|
60
|
-
"""
|
|
61
|
-
if not isinstance(names, str) and isinstance(names, Iterable):
|
|
62
|
-
return names
|
|
63
|
-
elif isinstance(names, str) and names != "all":
|
|
64
|
-
return [names]
|
|
65
|
-
elif names is None or names == "all":
|
|
66
|
-
if libname == "oseries":
|
|
67
|
-
return self.oseries_names
|
|
68
|
-
elif libname == "stresses":
|
|
69
|
-
return self.stresses_names
|
|
70
|
-
elif libname == "models":
|
|
71
|
-
return self.model_names
|
|
72
|
-
elif libname == "oseries_models":
|
|
73
|
-
return self.oseries_with_models
|
|
74
|
-
else:
|
|
75
|
-
raise ValueError(f"No library '{libname}'!")
|
|
76
|
-
else:
|
|
77
|
-
raise NotImplementedError(f"Cannot parse 'names': {names}")
|
|
78
|
-
|
|
79
|
-
@staticmethod
|
|
80
|
-
def _meta_list_to_frame(metalist: list, names: list):
|
|
81
|
-
"""Convert list of metadata dictionaries to DataFrame.
|
|
82
|
-
|
|
83
|
-
Parameters
|
|
84
|
-
----------
|
|
85
|
-
metalist : list
|
|
86
|
-
list of metadata dictionaries
|
|
87
|
-
names : list
|
|
88
|
-
list of names corresponding to data in metalist
|
|
89
|
-
|
|
90
|
-
Returns
|
|
91
|
-
-------
|
|
92
|
-
pandas.DataFrame
|
|
93
|
-
DataFrame containing overview of metadata
|
|
94
|
-
"""
|
|
95
|
-
# convert to dataframe
|
|
96
|
-
if len(metalist) > 1:
|
|
97
|
-
meta = pd.DataFrame(metalist)
|
|
98
|
-
if len({"x", "y"}.difference(meta.columns)) == 0:
|
|
99
|
-
meta["x"] = meta["x"].astype(float)
|
|
100
|
-
meta["y"] = meta["y"].astype(float)
|
|
101
|
-
elif len(metalist) == 1:
|
|
102
|
-
meta = pd.DataFrame(metalist)
|
|
103
|
-
elif len(metalist) == 0:
|
|
104
|
-
meta = pd.DataFrame()
|
|
105
|
-
|
|
106
|
-
meta.index = names
|
|
107
|
-
meta.index.name = "name"
|
|
108
|
-
return meta
|
|
109
|
-
|
|
110
|
-
def _parse_model_dict(self, mdict: dict, update_ts_settings: bool = False):
|
|
111
|
-
"""Parse dictionary describing pastas models (internal method).
|
|
112
|
-
|
|
113
|
-
Parameters
|
|
114
|
-
----------
|
|
115
|
-
mdict : dict
|
|
116
|
-
dictionary describing pastas.Model
|
|
117
|
-
update_ts_settings : bool, optional
|
|
118
|
-
update stored tmin and tmax in time series settings
|
|
119
|
-
based on time series loaded from store.
|
|
120
|
-
|
|
121
|
-
Returns
|
|
122
|
-
-------
|
|
123
|
-
ml : pastas.Model
|
|
124
|
-
time series analysis model
|
|
125
|
-
"""
|
|
126
|
-
PASFILE_LEQ_022 = parse_version(
|
|
127
|
-
mdict["file_info"]["pastas_version"]
|
|
128
|
-
) <= parse_version("0.22.0")
|
|
129
|
-
|
|
130
|
-
# oseries
|
|
131
|
-
if "series" not in mdict["oseries"]:
|
|
132
|
-
name = str(mdict["oseries"]["name"])
|
|
133
|
-
if name not in self.oseries.index:
|
|
134
|
-
msg = "oseries '{}' not present in library".format(name)
|
|
135
|
-
raise LookupError(msg)
|
|
136
|
-
mdict["oseries"]["series"] = self.get_oseries(name).squeeze()
|
|
137
|
-
# update tmin/tmax from time series
|
|
138
|
-
if update_ts_settings:
|
|
139
|
-
mdict["oseries"]["settings"]["tmin"] = mdict["oseries"]["series"].index[
|
|
140
|
-
0
|
|
141
|
-
]
|
|
142
|
-
mdict["oseries"]["settings"]["tmax"] = mdict["oseries"]["series"].index[
|
|
143
|
-
-1
|
|
144
|
-
]
|
|
145
|
-
|
|
146
|
-
# StressModel, WellModel
|
|
147
|
-
for ts in mdict["stressmodels"].values():
|
|
148
|
-
if "stress" in ts.keys():
|
|
149
|
-
# WellModel
|
|
150
|
-
classkey = "stressmodel" if PASFILE_LEQ_022 else "class"
|
|
151
|
-
if ts[classkey] == "WellModel":
|
|
152
|
-
for stress in ts["stress"]:
|
|
153
|
-
if "series" not in stress:
|
|
154
|
-
name = str(stress["name"])
|
|
155
|
-
if name in self.stresses.index:
|
|
156
|
-
stress["series"] = self.get_stresses(name).squeeze()
|
|
157
|
-
# update tmin/tmax from time series
|
|
158
|
-
if update_ts_settings:
|
|
159
|
-
stress["settings"]["tmin"] = stress["series"].index[
|
|
160
|
-
0
|
|
161
|
-
]
|
|
162
|
-
stress["settings"]["tmax"] = stress["series"].index[
|
|
163
|
-
-1
|
|
164
|
-
]
|
|
165
|
-
# StressModel
|
|
166
|
-
else:
|
|
167
|
-
for stress in ts["stress"] if PASFILE_LEQ_022 else [ts["stress"]]:
|
|
168
|
-
if "series" not in stress:
|
|
169
|
-
name = str(stress["name"])
|
|
170
|
-
if name in self.stresses.index:
|
|
171
|
-
stress["series"] = self.get_stresses(name).squeeze()
|
|
172
|
-
# update tmin/tmax from time series
|
|
173
|
-
if update_ts_settings:
|
|
174
|
-
stress["settings"]["tmin"] = stress["series"].index[
|
|
175
|
-
0
|
|
176
|
-
]
|
|
177
|
-
stress["settings"]["tmax"] = stress["series"].index[
|
|
178
|
-
-1
|
|
179
|
-
]
|
|
180
|
-
|
|
181
|
-
# RechargeModel, TarsoModel
|
|
182
|
-
if ("prec" in ts.keys()) and ("evap" in ts.keys()):
|
|
183
|
-
for stress in [ts["prec"], ts["evap"]]:
|
|
184
|
-
if "series" not in stress:
|
|
185
|
-
name = str(stress["name"])
|
|
186
|
-
if name in self.stresses.index:
|
|
187
|
-
stress["series"] = self.get_stresses(name).squeeze()
|
|
188
|
-
# update tmin/tmax from time series
|
|
189
|
-
if update_ts_settings:
|
|
190
|
-
stress["settings"]["tmin"] = stress["series"].index[0]
|
|
191
|
-
stress["settings"]["tmax"] = stress["series"].index[-1]
|
|
192
|
-
else:
|
|
193
|
-
msg = "stress '{}' not present in library".format(name)
|
|
194
|
-
raise KeyError(msg)
|
|
195
|
-
|
|
196
|
-
# hack for pcov w dtype object (when filled with NaNs on store?)
|
|
197
|
-
if "fit" in mdict:
|
|
198
|
-
if "pcov" in mdict["fit"]:
|
|
199
|
-
pcov = mdict["fit"]["pcov"]
|
|
200
|
-
if pcov.dtypes.apply(lambda dtyp: isinstance(dtyp, object)).any():
|
|
201
|
-
mdict["fit"]["pcov"] = pcov.astype(float)
|
|
202
|
-
|
|
203
|
-
# check pastas version vs pas-file version
|
|
204
|
-
file_version = mdict["file_info"]["pastas_version"]
|
|
205
|
-
|
|
206
|
-
# check file version and pastas version
|
|
207
|
-
# if file<0.23 and pastas>=1.0 --> error
|
|
208
|
-
PASTAS_GT_023 = parse_version(ps.__version__) > parse_version("0.23.1")
|
|
209
|
-
if PASFILE_LEQ_022 and PASTAS_GT_023:
|
|
210
|
-
raise UserWarning(
|
|
211
|
-
f"This file was created with Pastas v{file_version} "
|
|
212
|
-
f"and cannot be loaded with Pastas v{ps.__version__} Please load and "
|
|
213
|
-
"save the file with Pastas 0.23 first to update the file "
|
|
214
|
-
"format."
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
try:
|
|
218
|
-
# pastas>=0.15.0
|
|
219
|
-
ml = ps.io.base._load_model(mdict)
|
|
220
|
-
except AttributeError:
|
|
221
|
-
# pastas<0.15.0
|
|
222
|
-
ml = ps.io.base.load_model(mdict)
|
|
223
|
-
return ml
|
|
224
|
-
|
|
225
|
-
@staticmethod
|
|
226
|
-
def _validate_input_series(series):
|
|
227
|
-
"""Check if series is pandas.DataFrame or pandas.Series.
|
|
228
|
-
|
|
229
|
-
Parameters
|
|
230
|
-
----------
|
|
231
|
-
series : object
|
|
232
|
-
object to validate
|
|
233
|
-
|
|
234
|
-
Raises
|
|
235
|
-
------
|
|
236
|
-
TypeError
|
|
237
|
-
if object is not of type pandas.DataFrame or pandas.Series
|
|
238
|
-
"""
|
|
239
|
-
if not (isinstance(series, pd.DataFrame) or isinstance(series, pd.Series)):
|
|
240
|
-
raise TypeError("Please provide pandas.DataFrame or pandas.Series!")
|
|
241
|
-
if isinstance(series, pd.DataFrame):
|
|
242
|
-
if series.columns.size > 1:
|
|
243
|
-
raise ValueError("Only DataFrames with one column are supported!")
|
|
244
|
-
|
|
245
|
-
@staticmethod
|
|
246
|
-
def _set_series_name(series, name):
|
|
247
|
-
"""Set series name to match user defined name in store.
|
|
248
|
-
|
|
249
|
-
Parameters
|
|
250
|
-
----------
|
|
251
|
-
series : pandas.Series or pandas.DataFrame
|
|
252
|
-
set name for this time series
|
|
253
|
-
name : str
|
|
254
|
-
name of the time series (used in the pastastore)
|
|
255
|
-
"""
|
|
256
|
-
if isinstance(series, pd.Series):
|
|
257
|
-
series.name = name
|
|
258
|
-
# empty string on index name causes trouble when reading
|
|
259
|
-
# data from ArcticDB: TODO: check if still an issue?
|
|
260
|
-
if series.index.name == "":
|
|
261
|
-
series.index.name = None
|
|
262
|
-
|
|
263
|
-
if isinstance(series, pd.DataFrame):
|
|
264
|
-
series.columns = [name]
|
|
265
|
-
# check for hydropandas objects which are instances of DataFrame but
|
|
266
|
-
# do have a name attribute
|
|
267
|
-
if hasattr(series, "name"):
|
|
268
|
-
series.name = name
|
|
269
|
-
return series
|
|
270
|
-
|
|
271
|
-
@staticmethod
|
|
272
|
-
def _check_stressmodels_supported(ml):
|
|
273
|
-
supported_stressmodels = [
|
|
274
|
-
"StressModel",
|
|
275
|
-
"StressModel2",
|
|
276
|
-
"RechargeModel",
|
|
277
|
-
"WellModel",
|
|
278
|
-
"TarsoModel",
|
|
279
|
-
"Constant",
|
|
280
|
-
"LinearTrend",
|
|
281
|
-
"StepModel",
|
|
282
|
-
]
|
|
283
|
-
if isinstance(ml, ps.Model):
|
|
284
|
-
smtyps = [sm._name for sm in ml.stressmodels.values()]
|
|
285
|
-
elif isinstance(ml, dict):
|
|
286
|
-
classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
|
|
287
|
-
smtyps = [sm[classkey] for sm in ml["stressmodels"].values()]
|
|
288
|
-
check = isin(smtyps, supported_stressmodels)
|
|
289
|
-
if not all(check):
|
|
290
|
-
unsupported = set(smtyps) - set(supported_stressmodels)
|
|
291
|
-
raise NotImplementedError(
|
|
292
|
-
"PastaStore does not support storing models with the "
|
|
293
|
-
f"following stressmodels: {unsupported}"
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
@staticmethod
|
|
297
|
-
def _check_model_series_names_for_store(ml):
|
|
298
|
-
prec_evap_model = ["RechargeModel", "TarsoModel"]
|
|
299
|
-
|
|
300
|
-
if isinstance(ml, ps.Model):
|
|
301
|
-
series_names = [
|
|
302
|
-
istress.series.name
|
|
303
|
-
for sm in ml.stressmodels.values()
|
|
304
|
-
for istress in sm.stress
|
|
305
|
-
]
|
|
306
|
-
|
|
307
|
-
elif isinstance(ml, dict):
|
|
308
|
-
# non RechargeModel, Tarsomodel, WellModel stressmodels
|
|
309
|
-
classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
|
|
310
|
-
if PASTAS_LEQ_022:
|
|
311
|
-
series_names = [
|
|
312
|
-
istress["name"]
|
|
313
|
-
for sm in ml["stressmodels"].values()
|
|
314
|
-
if sm[classkey] not in (prec_evap_model + ["WellModel"])
|
|
315
|
-
for istress in sm["stress"]
|
|
316
|
-
]
|
|
317
|
-
else:
|
|
318
|
-
series_names = [
|
|
319
|
-
sm["stress"]["name"]
|
|
320
|
-
for sm in ml["stressmodels"].values()
|
|
321
|
-
if sm[classkey] not in (prec_evap_model + ["WellModel"])
|
|
322
|
-
]
|
|
323
|
-
|
|
324
|
-
# WellModel
|
|
325
|
-
if isin(
|
|
326
|
-
["WellModel"],
|
|
327
|
-
[i[classkey] for i in ml["stressmodels"].values()],
|
|
328
|
-
).any():
|
|
329
|
-
series_names += [
|
|
330
|
-
istress["name"]
|
|
331
|
-
for sm in ml["stressmodels"].values()
|
|
332
|
-
if sm[classkey] in ["WellModel"]
|
|
333
|
-
for istress in sm["stress"]
|
|
334
|
-
]
|
|
335
|
-
|
|
336
|
-
# RechargeModel, TarsoModel
|
|
337
|
-
if isin(
|
|
338
|
-
prec_evap_model,
|
|
339
|
-
[i[classkey] for i in ml["stressmodels"].values()],
|
|
340
|
-
).any():
|
|
341
|
-
series_names += [
|
|
342
|
-
istress["name"]
|
|
343
|
-
for sm in ml["stressmodels"].values()
|
|
344
|
-
if sm[classkey] in prec_evap_model
|
|
345
|
-
for istress in [sm["prec"], sm["evap"]]
|
|
346
|
-
]
|
|
347
|
-
|
|
348
|
-
else:
|
|
349
|
-
raise TypeError("Expected pastas.Model or dict!")
|
|
350
|
-
if len(series_names) - len(set(series_names)) > 0:
|
|
351
|
-
msg = (
|
|
352
|
-
"There are multiple stresses series with the same name! "
|
|
353
|
-
"Each series name must be unique for the PastaStore!"
|
|
354
|
-
)
|
|
355
|
-
raise ValueError(msg)
|
|
356
|
-
|
|
357
|
-
def _check_oseries_in_store(self, ml: Union[ps.Model, dict]):
|
|
358
|
-
"""Check if Model oseries are contained in PastaStore (internal method).
|
|
359
|
-
|
|
360
|
-
Parameters
|
|
361
|
-
----------
|
|
362
|
-
ml : Union[ps.Model, dict]
|
|
363
|
-
pastas Model
|
|
364
|
-
"""
|
|
365
|
-
if isinstance(ml, ps.Model):
|
|
366
|
-
name = ml.oseries.name
|
|
367
|
-
elif isinstance(ml, dict):
|
|
368
|
-
name = str(ml["oseries"]["name"])
|
|
369
|
-
else:
|
|
370
|
-
raise TypeError("Expected pastas.Model or dict!")
|
|
371
|
-
if name not in self.oseries.index:
|
|
372
|
-
msg = (
|
|
373
|
-
f"Cannot add model because oseries '{name}' is not contained in store."
|
|
374
|
-
)
|
|
375
|
-
raise LookupError(msg)
|
|
376
|
-
# expensive check
|
|
377
|
-
if self.CHECK_MODEL_SERIES_VALUES and isinstance(ml, ps.Model):
|
|
378
|
-
s_org = self.get_oseries(name).squeeze().dropna()
|
|
379
|
-
if PASTAS_LEQ_022:
|
|
380
|
-
so = ml.oseries.series_original
|
|
381
|
-
else:
|
|
382
|
-
so = ml.oseries._series_original
|
|
383
|
-
try:
|
|
384
|
-
assert_series_equal(
|
|
385
|
-
so.dropna(),
|
|
386
|
-
s_org,
|
|
387
|
-
atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
|
|
388
|
-
rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
|
|
389
|
-
)
|
|
390
|
-
except AssertionError as e:
|
|
391
|
-
raise ValueError(
|
|
392
|
-
f"Cannot add model because model oseries '{name}'"
|
|
393
|
-
" is different from stored oseries! See stacktrace for differences."
|
|
394
|
-
) from e
|
|
395
|
-
|
|
396
|
-
def _check_stresses_in_store(self, ml: Union[ps.Model, dict]):
|
|
397
|
-
"""Check if stresses time series are contained in PastaStore (internal method).
|
|
398
|
-
|
|
399
|
-
Parameters
|
|
400
|
-
----------
|
|
401
|
-
ml : Union[ps.Model, dict]
|
|
402
|
-
pastas Model
|
|
403
|
-
"""
|
|
404
|
-
prec_evap_model = ["RechargeModel", "TarsoModel"]
|
|
405
|
-
if isinstance(ml, ps.Model):
|
|
406
|
-
for sm in ml.stressmodels.values():
|
|
407
|
-
if sm._name in prec_evap_model:
|
|
408
|
-
stresses = [sm.prec, sm.evap]
|
|
409
|
-
else:
|
|
410
|
-
stresses = sm.stress
|
|
411
|
-
for s in stresses:
|
|
412
|
-
if str(s.name) not in self.stresses.index:
|
|
413
|
-
msg = (
|
|
414
|
-
f"Cannot add model because stress '{s.name}' "
|
|
415
|
-
"is not contained in store."
|
|
416
|
-
)
|
|
417
|
-
raise LookupError(msg)
|
|
418
|
-
if self.CHECK_MODEL_SERIES_VALUES:
|
|
419
|
-
s_org = self.get_stresses(s.name).squeeze()
|
|
420
|
-
if PASTAS_LEQ_022:
|
|
421
|
-
so = s.series_original
|
|
422
|
-
else:
|
|
423
|
-
so = s._series_original
|
|
424
|
-
try:
|
|
425
|
-
assert_series_equal(
|
|
426
|
-
so,
|
|
427
|
-
s_org,
|
|
428
|
-
atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
|
|
429
|
-
rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
|
|
430
|
-
)
|
|
431
|
-
except AssertionError as e:
|
|
432
|
-
raise ValueError(
|
|
433
|
-
f"Cannot add model because model stress "
|
|
434
|
-
f"'{s.name}' is different from stored stress! "
|
|
435
|
-
"See stacktrace for differences."
|
|
436
|
-
) from e
|
|
437
|
-
elif isinstance(ml, dict):
|
|
438
|
-
for sm in ml["stressmodels"].values():
|
|
439
|
-
classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
|
|
440
|
-
if sm[classkey] in prec_evap_model:
|
|
441
|
-
stresses = [sm["prec"], sm["evap"]]
|
|
442
|
-
elif sm[classkey] in ["WellModel"]:
|
|
443
|
-
stresses = sm["stress"]
|
|
444
|
-
else:
|
|
445
|
-
stresses = sm["stress"] if PASTAS_LEQ_022 else [sm["stress"]]
|
|
446
|
-
for s in stresses:
|
|
447
|
-
if str(s["name"]) not in self.stresses.index:
|
|
448
|
-
msg = (
|
|
449
|
-
f"Cannot add model because stress '{s['name']}' "
|
|
450
|
-
"is not contained in store."
|
|
451
|
-
)
|
|
452
|
-
raise LookupError(msg)
|
|
453
|
-
else:
|
|
454
|
-
raise TypeError("Expected pastas.Model or dict!")
|
|
455
|
-
|
|
456
|
-
def _stored_series_to_json(
|
|
457
|
-
self,
|
|
458
|
-
libname: str,
|
|
459
|
-
names: Optional[Union[list, str]] = None,
|
|
460
|
-
squeeze: bool = True,
|
|
461
|
-
progressbar: bool = False,
|
|
462
|
-
):
|
|
463
|
-
"""Write stored series to JSON.
|
|
464
|
-
|
|
465
|
-
Parameters
|
|
466
|
-
----------
|
|
467
|
-
libname : str
|
|
468
|
-
library name
|
|
469
|
-
names : Optional[Union[list, str]], optional
|
|
470
|
-
names of series, by default None
|
|
471
|
-
squeeze : bool, optional
|
|
472
|
-
return single entry as json string instead
|
|
473
|
-
of list, by default True
|
|
474
|
-
progressbar : bool, optional
|
|
475
|
-
show progressbar, by default False
|
|
476
|
-
|
|
477
|
-
Returns
|
|
478
|
-
-------
|
|
479
|
-
files : list or str
|
|
480
|
-
list of series converted to JSON string or single string
|
|
481
|
-
if single entry is returned and squeeze is True
|
|
482
|
-
"""
|
|
483
|
-
names = self._parse_names(names, libname=libname)
|
|
484
|
-
files = []
|
|
485
|
-
for n in tqdm(names, desc=libname) if progressbar else names:
|
|
486
|
-
s = self._get_series(libname, n, progressbar=False)
|
|
487
|
-
if isinstance(s, pd.Series):
|
|
488
|
-
s = s.to_frame()
|
|
489
|
-
try:
|
|
490
|
-
sjson = s.to_json(orient="columns")
|
|
491
|
-
except ValueError as e:
|
|
492
|
-
msg = (
|
|
493
|
-
f"DatetimeIndex of '{n}' probably contains NaT "
|
|
494
|
-
"or duplicate timestamps!"
|
|
495
|
-
)
|
|
496
|
-
raise ValueError(msg) from e
|
|
497
|
-
files.append(sjson)
|
|
498
|
-
if len(files) == 1 and squeeze:
|
|
499
|
-
return files[0]
|
|
500
|
-
else:
|
|
501
|
-
return files
|
|
502
|
-
|
|
503
|
-
def _stored_metadata_to_json(
|
|
504
|
-
self,
|
|
505
|
-
libname: str,
|
|
506
|
-
names: Optional[Union[list, str]] = None,
|
|
507
|
-
squeeze: bool = True,
|
|
508
|
-
progressbar: bool = False,
|
|
509
|
-
):
|
|
510
|
-
"""Write metadata from stored series to JSON.
|
|
511
|
-
|
|
512
|
-
Parameters
|
|
513
|
-
----------
|
|
514
|
-
libname : str
|
|
515
|
-
library containing series
|
|
516
|
-
names : Optional[Union[list, str]], optional
|
|
517
|
-
names to parse, by default None
|
|
518
|
-
squeeze : bool, optional
|
|
519
|
-
return single entry as json string instead of list, by default True
|
|
520
|
-
progressbar : bool, optional
|
|
521
|
-
show progressbar, by default False
|
|
522
|
-
|
|
523
|
-
Returns
|
|
524
|
-
-------
|
|
525
|
-
files : list or str
|
|
526
|
-
list of json string
|
|
527
|
-
"""
|
|
528
|
-
names = self._parse_names(names, libname=libname)
|
|
529
|
-
files = []
|
|
530
|
-
for n in tqdm(names, desc=libname) if progressbar else names:
|
|
531
|
-
meta = self.get_metadata(libname, n, as_frame=False)
|
|
532
|
-
meta_json = json.dumps(meta, cls=PastasEncoder, indent=4)
|
|
533
|
-
files.append(meta_json)
|
|
534
|
-
if len(files) == 1 and squeeze:
|
|
535
|
-
return files[0]
|
|
536
|
-
else:
|
|
537
|
-
return files
|
|
538
|
-
|
|
539
|
-
def _series_to_archive(
|
|
540
|
-
self,
|
|
541
|
-
archive,
|
|
542
|
-
libname: str,
|
|
543
|
-
names: Optional[Union[list, str]] = None,
|
|
544
|
-
progressbar: bool = True,
|
|
545
|
-
):
|
|
546
|
-
"""Write DataFrame or Series to zipfile (internal method).
|
|
547
|
-
|
|
548
|
-
Parameters
|
|
549
|
-
----------
|
|
550
|
-
archive : zipfile.ZipFile
|
|
551
|
-
reference to an archive to write data to
|
|
552
|
-
libname : str
|
|
553
|
-
name of the library to write to zipfile
|
|
554
|
-
names : str or list of str, optional
|
|
555
|
-
names of the time series to write to archive, by default None,
|
|
556
|
-
which writes all time series to archive
|
|
557
|
-
progressbar : bool, optional
|
|
558
|
-
show progressbar, by default True
|
|
559
|
-
"""
|
|
560
|
-
names = self._parse_names(names, libname=libname)
|
|
561
|
-
for n in tqdm(names, desc=libname) if progressbar else names:
|
|
562
|
-
sjson = self._stored_series_to_json(
|
|
563
|
-
libname, names=n, progressbar=False, squeeze=True
|
|
564
|
-
)
|
|
565
|
-
meta_json = self._stored_metadata_to_json(
|
|
566
|
-
libname, names=n, progressbar=False, squeeze=True
|
|
567
|
-
)
|
|
568
|
-
archive.writestr(f"{libname}/{n}.pas", sjson)
|
|
569
|
-
archive.writestr(f"{libname}/{n}_meta.pas", meta_json)
|
|
570
|
-
|
|
571
|
-
def _models_to_archive(self, archive, names=None, progressbar=True):
|
|
572
|
-
"""Write pastas.Model to zipfile (internal method).
|
|
573
|
-
|
|
574
|
-
Parameters
|
|
575
|
-
----------
|
|
576
|
-
archive : zipfile.ZipFile
|
|
577
|
-
reference to an archive to write data to
|
|
578
|
-
names : str or list of str, optional
|
|
579
|
-
names of the models to write to archive, by default None,
|
|
580
|
-
which writes all models to archive
|
|
581
|
-
progressbar : bool, optional
|
|
582
|
-
show progressbar, by default True
|
|
583
|
-
"""
|
|
584
|
-
names = self._parse_names(names, libname="models")
|
|
585
|
-
for n in tqdm(names, desc="models") if progressbar else names:
|
|
586
|
-
m = self.get_models(n, return_dict=True)
|
|
587
|
-
jsondict = json.dumps(m, cls=PastasEncoder, indent=4)
|
|
588
|
-
archive.writestr(f"models/{n}.pas", jsondict)
|
|
589
|
-
|
|
590
|
-
@staticmethod
|
|
591
|
-
def _series_from_json(fjson: str, squeeze: bool = True):
|
|
592
|
-
"""Load time series from JSON.
|
|
593
|
-
|
|
594
|
-
Parameters
|
|
595
|
-
----------
|
|
596
|
-
fjson : str
|
|
597
|
-
path to file
|
|
598
|
-
squeeze : bool, optional
|
|
599
|
-
squeeze time series object to obtain pandas Series
|
|
600
|
-
|
|
601
|
-
Returns
|
|
602
|
-
-------
|
|
603
|
-
s : pd.DataFrame
|
|
604
|
-
DataFrame containing time series
|
|
605
|
-
"""
|
|
606
|
-
s = pd.read_json(fjson, orient="columns", precise_float=True, dtype=False)
|
|
607
|
-
if not isinstance(s.index, pd.DatetimeIndex):
|
|
608
|
-
s.index = pd.to_datetime(s.index, unit="ms")
|
|
609
|
-
s = s.sort_index() # needed for some reason ...
|
|
610
|
-
if squeeze:
|
|
611
|
-
return s.squeeze(axis="columns")
|
|
612
|
-
return s
|
|
613
|
-
|
|
614
|
-
@staticmethod
|
|
615
|
-
def _metadata_from_json(fjson: str):
|
|
616
|
-
"""Load metadata dictionary from JSON.
|
|
617
|
-
|
|
618
|
-
Parameters
|
|
619
|
-
----------
|
|
620
|
-
fjson : str
|
|
621
|
-
path to file
|
|
622
|
-
|
|
623
|
-
Returns
|
|
624
|
-
-------
|
|
625
|
-
meta : dict
|
|
626
|
-
dictionary containing metadata
|
|
627
|
-
"""
|
|
628
|
-
with open(fjson, "r") as f:
|
|
629
|
-
meta = json.load(f)
|
|
630
|
-
return meta
|
|
631
|
-
|
|
632
|
-
def _get_model_orphans(self):
|
|
633
|
-
"""Get models whose oseries no longer exist in database.
|
|
634
|
-
|
|
635
|
-
Returns
|
|
636
|
-
-------
|
|
637
|
-
dict
|
|
638
|
-
dictionary with oseries names as keys and lists of model names
|
|
639
|
-
as values
|
|
640
|
-
"""
|
|
641
|
-
d = {}
|
|
642
|
-
for mlnam in tqdm(self.model_names, desc="Identifying model orphans"):
|
|
643
|
-
mdict = self.get_models(mlnam, return_dict=True)
|
|
644
|
-
onam = mdict["oseries"]["name"]
|
|
645
|
-
if onam not in self.oseries_names:
|
|
646
|
-
if onam in d:
|
|
647
|
-
d[onam] = d[onam].append(mlnam)
|
|
648
|
-
else:
|
|
649
|
-
d[onam] = [mlnam]
|
|
650
|
-
return d
|
|
39
|
+
class ParallelUtil:
|
|
40
|
+
"""Mix-in class for storing parallelizable methods."""
|
|
651
41
|
|
|
652
42
|
@staticmethod
|
|
653
43
|
def _solve_model(
|
|
@@ -675,11 +65,11 @@ class ConnectorUtil:
|
|
|
675
65
|
arguments are passed to the solve method.
|
|
676
66
|
"""
|
|
677
67
|
if connector is not None:
|
|
678
|
-
|
|
68
|
+
_conn = connector
|
|
679
69
|
else:
|
|
680
|
-
|
|
70
|
+
_conn = globals()["conn"]
|
|
681
71
|
|
|
682
|
-
ml =
|
|
72
|
+
ml = _conn.get_models(ml_name)
|
|
683
73
|
m_kwargs = {}
|
|
684
74
|
for key, value in kwargs.items():
|
|
685
75
|
if isinstance(value, pd.Series):
|
|
@@ -693,14 +83,14 @@ class ConnectorUtil:
|
|
|
693
83
|
|
|
694
84
|
try:
|
|
695
85
|
ml.solve(report=report, **m_kwargs)
|
|
696
|
-
except Exception as e:
|
|
86
|
+
except Exception as e: # pylint: disable=broad-except
|
|
697
87
|
if ignore_solve_errors:
|
|
698
|
-
warning = "Solve error ignored for '
|
|
88
|
+
warning = f"Solve error ignored for '{ml.name}': {e}"
|
|
699
89
|
logger.warning(warning)
|
|
700
90
|
else:
|
|
701
91
|
raise e
|
|
702
|
-
|
|
703
|
-
|
|
92
|
+
# store the updated model back in the database
|
|
93
|
+
_conn.add_model(ml, overwrite=True)
|
|
704
94
|
|
|
705
95
|
@staticmethod
|
|
706
96
|
def _get_statistics(
|
|
@@ -717,13 +107,14 @@ class ConnectorUtil:
|
|
|
717
107
|
of the apply method.
|
|
718
108
|
"""
|
|
719
109
|
if connector is not None:
|
|
720
|
-
|
|
110
|
+
_conn = connector
|
|
721
111
|
else:
|
|
722
|
-
|
|
112
|
+
_conn = globals()["conn"]
|
|
723
113
|
|
|
724
|
-
ml =
|
|
114
|
+
ml = _conn.get_model(name)
|
|
725
115
|
series = pd.Series(index=statistics, dtype=float)
|
|
726
116
|
for stat in statistics:
|
|
117
|
+
# Note: ml.stats is part of pastas.Model public API
|
|
727
118
|
series.loc[stat] = getattr(ml.stats, stat)(**kwargs)
|
|
728
119
|
return series
|
|
729
120
|
|
|
@@ -739,17 +130,22 @@ class ConnectorUtil:
|
|
|
739
130
|
min(32, os.cpu_count() + 4) if max_workers is None else max_workers
|
|
740
131
|
)
|
|
741
132
|
if chunksize is None:
|
|
742
|
-
|
|
133
|
+
# 14 chunks per worker balances overhead vs granularity
|
|
134
|
+
# from stackoverflow link posted in docstring.
|
|
135
|
+
CHUNKS_PER_WORKER = 14
|
|
136
|
+
num_chunks = max_workers * CHUNKS_PER_WORKER
|
|
743
137
|
chunksize = max(njobs // num_chunks, 1)
|
|
744
138
|
return max_workers, chunksize
|
|
745
139
|
|
|
746
140
|
|
|
747
|
-
class ArcticDBConnector(BaseConnector,
|
|
141
|
+
class ArcticDBConnector(BaseConnector, ParallelUtil):
|
|
748
142
|
"""ArcticDBConnector object using ArcticDB to store data."""
|
|
749
143
|
|
|
750
|
-
|
|
144
|
+
_conn_type = "arcticdb"
|
|
751
145
|
|
|
752
|
-
def __init__(
|
|
146
|
+
def __init__(
|
|
147
|
+
self, name: str, uri: str, verbose: bool = True, worker_process: bool = False
|
|
148
|
+
):
|
|
753
149
|
"""Create an ArcticDBConnector object using ArcticDB to store data.
|
|
754
150
|
|
|
755
151
|
Parameters
|
|
@@ -759,39 +155,75 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
759
155
|
uri : str
|
|
760
156
|
URI connection string (e.g. 'lmdb://<your path here>')
|
|
761
157
|
verbose : bool, optional
|
|
762
|
-
whether to
|
|
158
|
+
whether to log messages when database is initialized, by default True
|
|
159
|
+
worker_process : bool, optional
|
|
160
|
+
whether the connector is created in a worker process for parallel
|
|
161
|
+
processing, by default False
|
|
763
162
|
"""
|
|
764
163
|
try:
|
|
765
164
|
import arcticdb
|
|
766
165
|
|
|
767
166
|
except ModuleNotFoundError as e:
|
|
768
|
-
|
|
167
|
+
logger.error("Please install arcticdb with `pip install arcticdb`!")
|
|
769
168
|
raise e
|
|
169
|
+
|
|
170
|
+
# avoid warn on all metadata writes
|
|
171
|
+
from arcticdb_ext import set_config_string
|
|
172
|
+
|
|
173
|
+
set_config_string("PickledMetadata.LogLevel", "DEBUG")
|
|
174
|
+
|
|
770
175
|
self.uri = uri
|
|
771
176
|
self.name = name
|
|
772
177
|
|
|
178
|
+
# initialize validator class to check inputs
|
|
179
|
+
self._validator = Validator(self)
|
|
180
|
+
|
|
181
|
+
# create libraries
|
|
773
182
|
self.libs: dict = {}
|
|
774
183
|
self.arc = arcticdb.Arctic(uri)
|
|
775
184
|
self._initialize(verbose=verbose)
|
|
776
185
|
self.models = ModelAccessor(self)
|
|
777
|
-
|
|
778
|
-
#
|
|
779
|
-
|
|
780
|
-
#
|
|
781
|
-
|
|
782
|
-
|
|
186
|
+
|
|
187
|
+
# set shared memory manager flags for parallel operations
|
|
188
|
+
# NOTE: there is no stored reference to manager object, meaning
|
|
189
|
+
# that it cannot be properly shutdown. We let the Python garbage collector
|
|
190
|
+
# do this, but the downside is there is a risk some background
|
|
191
|
+
# processes potentially continue to run.
|
|
192
|
+
mgr = Manager()
|
|
193
|
+
self._oseries_links_need_update = mgr.Value(
|
|
194
|
+
"_oseries_links_need_update",
|
|
195
|
+
False,
|
|
196
|
+
)
|
|
197
|
+
self._stresses_links_need_update = mgr.Value(
|
|
198
|
+
"_stresses_links_need_update",
|
|
199
|
+
False,
|
|
200
|
+
)
|
|
201
|
+
if not worker_process:
|
|
202
|
+
# for older versions of PastaStore, if oseries_models library is empty
|
|
203
|
+
# populate oseries - models database
|
|
204
|
+
if (self.n_models > 0) and (
|
|
205
|
+
len(self.oseries_models) == 0 or len(self.stresses_models) == 0
|
|
206
|
+
):
|
|
207
|
+
self._update_time_series_model_links(recompute=False, progressbar=True)
|
|
208
|
+
# write pstore file to store database info that can be used to load pstore
|
|
209
|
+
if "lmdb" in self.uri:
|
|
210
|
+
self.write_pstore_config_file()
|
|
783
211
|
|
|
784
212
|
def _initialize(self, verbose: bool = True) -> None:
|
|
785
213
|
"""Initialize the libraries (internal method)."""
|
|
214
|
+
if "lmdb" in self.uri.lower(): # only check for LMDB
|
|
215
|
+
self.validator.check_config_connector_type(
|
|
216
|
+
Path(self.uri.split("://")[1]) / self.name
|
|
217
|
+
)
|
|
786
218
|
for libname in self._default_library_names:
|
|
787
219
|
if self._library_name(libname) not in self.arc.list_libraries():
|
|
788
220
|
self.arc.create_library(self._library_name(libname))
|
|
789
221
|
else:
|
|
790
222
|
if verbose:
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
223
|
+
logger.info(
|
|
224
|
+
"ArcticDBConnector: library '%s' already exists. "
|
|
225
|
+
"Linking to existing library.",
|
|
226
|
+
self._library_name(libname),
|
|
795
227
|
)
|
|
796
228
|
self.libs[libname] = self._get_library(libname)
|
|
797
229
|
|
|
@@ -809,20 +241,21 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
809
241
|
"uri": self.uri,
|
|
810
242
|
}
|
|
811
243
|
if path is None and "lmdb" in self.uri:
|
|
812
|
-
path = self.uri.split("://")[1]
|
|
244
|
+
path = Path(self.uri.split("://")[1])
|
|
813
245
|
elif path is None and "lmdb" not in self.uri:
|
|
814
246
|
raise ValueError("Please provide a path to write the pastastore file!")
|
|
815
247
|
|
|
816
|
-
with open(
|
|
817
|
-
|
|
248
|
+
with (path / self.name / f"{self.name}.pastastore").open(
|
|
249
|
+
"w",
|
|
250
|
+
encoding="utf-8",
|
|
818
251
|
) as f:
|
|
819
252
|
json.dump(config, f)
|
|
820
253
|
|
|
821
|
-
def _library_name(self, libname:
|
|
254
|
+
def _library_name(self, libname: AllLibs) -> str:
|
|
822
255
|
"""Get full library name according to ArcticDB (internal method)."""
|
|
823
256
|
return ".".join([self.name, libname])
|
|
824
257
|
|
|
825
|
-
def _get_library(self, libname:
|
|
258
|
+
def _get_library(self, libname: AllLibs):
|
|
826
259
|
"""Get ArcticDB library handle.
|
|
827
260
|
|
|
828
261
|
Parameters
|
|
@@ -836,13 +269,15 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
836
269
|
handle to the library
|
|
837
270
|
"""
|
|
838
271
|
# get library handle
|
|
839
|
-
|
|
840
|
-
|
|
272
|
+
if libname in self.libs:
|
|
273
|
+
return self.libs[libname]
|
|
274
|
+
else:
|
|
275
|
+
return self.arc.get_library(self._library_name(libname))
|
|
841
276
|
|
|
842
277
|
def _add_item(
|
|
843
278
|
self,
|
|
844
|
-
libname:
|
|
845
|
-
item: Union[
|
|
279
|
+
libname: AllLibs,
|
|
280
|
+
item: Union[FrameOrSeriesUnion, Dict],
|
|
846
281
|
name: str,
|
|
847
282
|
metadata: Optional[Dict] = None,
|
|
848
283
|
**_,
|
|
@@ -861,14 +296,22 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
861
296
|
dictionary containing metadata, by default None
|
|
862
297
|
"""
|
|
863
298
|
lib = self._get_library(libname)
|
|
299
|
+
|
|
300
|
+
# check file name for illegal characters
|
|
301
|
+
name = self.validator.check_filename_illegal_chars(libname, name)
|
|
302
|
+
|
|
864
303
|
# only normalizable datatypes can be written with write, else use write_pickle
|
|
865
304
|
# normalizable: Series, DataFrames, Numpy Arrays
|
|
866
305
|
if isinstance(item, (dict, list)):
|
|
306
|
+
logger.debug(
|
|
307
|
+
"Writing pickled item '%s' to ArcticDB library '%s'.", name, libname
|
|
308
|
+
)
|
|
867
309
|
lib.write_pickle(name, item, metadata=metadata)
|
|
868
310
|
else:
|
|
311
|
+
logger.debug("Writing item '%s' to ArcticDB library '%s'.", name, libname)
|
|
869
312
|
lib.write(name, item, metadata=metadata)
|
|
870
313
|
|
|
871
|
-
def _get_item(self, libname:
|
|
314
|
+
def _get_item(self, libname: AllLibs, name: str) -> Union[FrameOrSeriesUnion, Dict]:
|
|
872
315
|
"""Retrieve item from library (internal method).
|
|
873
316
|
|
|
874
317
|
Parameters
|
|
@@ -886,7 +329,7 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
886
329
|
lib = self._get_library(libname)
|
|
887
330
|
return lib.read(name).data
|
|
888
331
|
|
|
889
|
-
def _del_item(self, libname:
|
|
332
|
+
def _del_item(self, libname: AllLibs, name: str, force: bool = False) -> None:
|
|
890
333
|
"""Delete items (series or models) (internal method).
|
|
891
334
|
|
|
892
335
|
Parameters
|
|
@@ -895,11 +338,15 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
895
338
|
name of library to delete item from
|
|
896
339
|
name : str
|
|
897
340
|
name of item to delete
|
|
341
|
+
force : bool, optional
|
|
342
|
+
force deletion even if series is used in models, by default False
|
|
898
343
|
"""
|
|
899
344
|
lib = self._get_library(libname)
|
|
345
|
+
if self.validator.PROTECT_SERIES_IN_MODELS and not force:
|
|
346
|
+
self.validator.check_series_in_models(libname, name)
|
|
900
347
|
lib.delete(name)
|
|
901
348
|
|
|
902
|
-
def _get_metadata(self, libname:
|
|
349
|
+
def _get_metadata(self, libname: TimeSeriesLibs, name: str) -> dict:
|
|
903
350
|
"""Retrieve metadata for an item (internal method).
|
|
904
351
|
|
|
905
352
|
Parameters
|
|
@@ -926,11 +373,27 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
926
373
|
max_workers: Optional[int] = None,
|
|
927
374
|
chunksize: Optional[int] = None,
|
|
928
375
|
desc: str = "",
|
|
376
|
+
initializer: Callable = None,
|
|
377
|
+
initargs: Optional[tuple] = None,
|
|
929
378
|
):
|
|
930
379
|
"""Parallel processing of function.
|
|
931
380
|
|
|
932
381
|
Does not return results, so function must store results in database.
|
|
933
382
|
|
|
383
|
+
Note
|
|
384
|
+
----
|
|
385
|
+
ArcticDB connection objects cannot be pickled, which is required for
|
|
386
|
+
multiprocessing. This implementation uses an initializer function that
|
|
387
|
+
creates a new ArcticDBConnector instance in each worker process and stores
|
|
388
|
+
it in the global `conn` variable. User-provided functions can access this
|
|
389
|
+
connector via the global `conn` variable.
|
|
390
|
+
|
|
391
|
+
This is the standard Python multiprocessing pattern for unpicklable objects.
|
|
392
|
+
See: https://docs.python.org/3/library/concurrent.futures.html#processpoolexecutor
|
|
393
|
+
|
|
394
|
+
For a connector that supports direct method passing (no global variable
|
|
395
|
+
required), use PasConnector instead.
|
|
396
|
+
|
|
934
397
|
Parameters
|
|
935
398
|
----------
|
|
936
399
|
func : function
|
|
@@ -947,16 +410,24 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
947
410
|
chunksize for parallel processing, by default None
|
|
948
411
|
desc : str, optional
|
|
949
412
|
description for progressbar, by default ""
|
|
413
|
+
initializer : Callable, optional
|
|
414
|
+
function to initialize each worker process, by default None
|
|
415
|
+
initargs : tuple, optional
|
|
416
|
+
arguments to pass to initializer function, by default None
|
|
950
417
|
"""
|
|
951
|
-
max_workers, chunksize =
|
|
418
|
+
max_workers, chunksize = self._get_max_workers_and_chunksize(
|
|
952
419
|
max_workers, len(names), chunksize
|
|
953
420
|
)
|
|
421
|
+
if initializer is None:
|
|
954
422
|
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
423
|
+
def initializer(*args):
|
|
424
|
+
# assign to module-level variable without using 'global' statement
|
|
425
|
+
globals()["conn"] = ArcticDBConnector(*args, worker_process=True)
|
|
958
426
|
|
|
959
|
-
|
|
427
|
+
initargs = (self.name, self.uri, False)
|
|
428
|
+
|
|
429
|
+
if initargs is None:
|
|
430
|
+
initargs = ()
|
|
960
431
|
|
|
961
432
|
if kwargs is None:
|
|
962
433
|
kwargs = {}
|
|
@@ -979,51 +450,37 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
|
|
|
979
450
|
result = executor.map(
|
|
980
451
|
partial(func, **kwargs), names, chunksize=chunksize
|
|
981
452
|
)
|
|
982
|
-
return result
|
|
983
453
|
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
"""List of oseries names.
|
|
454
|
+
# update links if models were stored
|
|
455
|
+
self._trigger_links_update_if_needed(modelnames=names)
|
|
987
456
|
|
|
988
|
-
|
|
989
|
-
-------
|
|
990
|
-
list
|
|
991
|
-
list of oseries in library
|
|
992
|
-
"""
|
|
993
|
-
return self._get_library("oseries").list_symbols()
|
|
994
|
-
|
|
995
|
-
@property
|
|
996
|
-
def stresses_names(self):
|
|
997
|
-
"""List of stresses names.
|
|
457
|
+
return result
|
|
998
458
|
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
list
|
|
1002
|
-
list of stresses in library
|
|
1003
|
-
"""
|
|
1004
|
-
return self._get_library("stresses").list_symbols()
|
|
459
|
+
def _list_symbols(self, libname: AllLibs) -> List[str]:
|
|
460
|
+
"""List symbols in a library (internal method).
|
|
1005
461
|
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
462
|
+
Parameters
|
|
463
|
+
----------
|
|
464
|
+
libname : str
|
|
465
|
+
name of the library
|
|
1009
466
|
|
|
1010
467
|
Returns
|
|
1011
468
|
-------
|
|
1012
469
|
list
|
|
1013
|
-
list of
|
|
470
|
+
list of symbols in the library
|
|
1014
471
|
"""
|
|
1015
|
-
return self._get_library(
|
|
472
|
+
return self._get_library(libname).list_symbols()
|
|
1016
473
|
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
return
|
|
474
|
+
def _item_exists(self, libname: str, name: str) -> bool:
|
|
475
|
+
"""Check if item exists without scanning directory."""
|
|
476
|
+
lib = self._get_library(libname)
|
|
477
|
+
return lib.has_symbol(name)
|
|
1021
478
|
|
|
1022
479
|
|
|
1023
|
-
class DictConnector(BaseConnector,
|
|
480
|
+
class DictConnector(BaseConnector, ParallelUtil):
|
|
1024
481
|
"""DictConnector object that stores timeseries and models in dictionaries."""
|
|
1025
482
|
|
|
1026
|
-
|
|
483
|
+
_conn_type = "dict"
|
|
1027
484
|
|
|
1028
485
|
def __init__(self, name: str = "pastas_db"):
|
|
1029
486
|
"""Create DictConnector object that stores data in dictionaries.
|
|
@@ -1033,17 +490,26 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1033
490
|
name : str, optional
|
|
1034
491
|
user-specified name of the connector
|
|
1035
492
|
"""
|
|
493
|
+
super().__init__()
|
|
1036
494
|
self.name = name
|
|
1037
495
|
|
|
1038
496
|
# create empty dictionaries for series and models
|
|
1039
497
|
for val in self._default_library_names:
|
|
1040
498
|
setattr(self, "lib_" + val, {})
|
|
499
|
+
self._validator = Validator(self)
|
|
1041
500
|
self.models = ModelAccessor(self)
|
|
1042
501
|
# for older versions of PastaStore, if oseries_models library is empty
|
|
1043
502
|
# populate oseries - models database
|
|
1044
|
-
self.
|
|
503
|
+
if (self.n_models > 0) and (
|
|
504
|
+
len(self.oseries_models) == 0 or len(self.stresses_models) == 0
|
|
505
|
+
):
|
|
506
|
+
self._update_time_series_model_links(recompute=False, progressbar=True)
|
|
1045
507
|
|
|
1046
|
-
|
|
508
|
+
# delayed update flags
|
|
509
|
+
self._oseries_links_need_update = False
|
|
510
|
+
self._stresses_links_need_update = False
|
|
511
|
+
|
|
512
|
+
def _get_library(self, libname: AllLibs):
|
|
1047
513
|
"""Get reference to dictionary holding data.
|
|
1048
514
|
|
|
1049
515
|
Parameters
|
|
@@ -1061,7 +527,7 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1061
527
|
def _add_item(
|
|
1062
528
|
self,
|
|
1063
529
|
libname: str,
|
|
1064
|
-
item: Union[
|
|
530
|
+
item: Union[FrameOrSeriesUnion, Dict],
|
|
1065
531
|
name: str,
|
|
1066
532
|
metadata: Optional[Dict] = None,
|
|
1067
533
|
**_,
|
|
@@ -1080,12 +546,16 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1080
546
|
dictionary containing metadata, by default None
|
|
1081
547
|
"""
|
|
1082
548
|
lib = self._get_library(libname)
|
|
1083
|
-
|
|
549
|
+
|
|
550
|
+
# check file name for illegal characters
|
|
551
|
+
name = self.validator.check_filename_illegal_chars(libname, name)
|
|
552
|
+
|
|
553
|
+
if libname in ["models", "oseries_models", "stresses_models"]:
|
|
1084
554
|
lib[name] = item
|
|
1085
555
|
else:
|
|
1086
556
|
lib[name] = (metadata, item)
|
|
1087
557
|
|
|
1088
|
-
def _get_item(self, libname:
|
|
558
|
+
def _get_item(self, libname: AllLibs, name: str) -> Union[FrameOrSeriesUnion, Dict]:
|
|
1089
559
|
"""Retrieve item from database (internal method).
|
|
1090
560
|
|
|
1091
561
|
Parameters
|
|
@@ -1098,16 +568,18 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1098
568
|
Returns
|
|
1099
569
|
-------
|
|
1100
570
|
item : Union[FrameorSeriesUnion, Dict]
|
|
1101
|
-
time series or model dictionary
|
|
571
|
+
time series or model dictionary, modifying the returned object will not
|
|
572
|
+
affect the stored data, like in a real database
|
|
1102
573
|
"""
|
|
1103
574
|
lib = self._get_library(libname)
|
|
1104
|
-
|
|
575
|
+
# deepcopy calls are needed to ensure users cannot change "stored" items
|
|
576
|
+
if libname in ["models", "oseries_models", "stresses_models"]:
|
|
1105
577
|
item = deepcopy(lib[name])
|
|
1106
578
|
else:
|
|
1107
579
|
item = deepcopy(lib[name][1])
|
|
1108
580
|
return item
|
|
1109
581
|
|
|
1110
|
-
def _del_item(self, libname:
|
|
582
|
+
def _del_item(self, libname: AllLibs, name: str, force: bool = False) -> None:
|
|
1111
583
|
"""Delete items (series or models) (internal method).
|
|
1112
584
|
|
|
1113
585
|
Parameters
|
|
@@ -1116,11 +588,16 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1116
588
|
name of library to delete item from
|
|
1117
589
|
name : str
|
|
1118
590
|
name of item to delete
|
|
591
|
+
force : bool, optional
|
|
592
|
+
if True, force delete item and do not perform check if series
|
|
593
|
+
is used in a model, by default False
|
|
1119
594
|
"""
|
|
595
|
+
if self.validator.PROTECT_SERIES_IN_MODELS and not force:
|
|
596
|
+
self.validator.check_series_in_models(libname, name)
|
|
1120
597
|
lib = self._get_library(libname)
|
|
1121
598
|
_ = lib.pop(name)
|
|
1122
599
|
|
|
1123
|
-
def _get_metadata(self, libname:
|
|
600
|
+
def _get_metadata(self, libname: TimeSeriesLibs, name: str) -> dict:
|
|
1124
601
|
"""Read metadata (internal method).
|
|
1125
602
|
|
|
1126
603
|
Parameters
|
|
@@ -1140,40 +617,45 @@ class DictConnector(BaseConnector, ConnectorUtil):
|
|
|
1140
617
|
return imeta
|
|
1141
618
|
|
|
1142
619
|
def _parallel(self, *args, **kwargs) -> None:
|
|
620
|
+
"""Parallel implementation method.
|
|
621
|
+
|
|
622
|
+
Raises
|
|
623
|
+
------
|
|
624
|
+
NotImplementedError
|
|
625
|
+
DictConnector uses in-memory storage that cannot be shared across
|
|
626
|
+
processes. Use PasConnector or ArcticDBConnector for parallel operations.
|
|
627
|
+
"""
|
|
1143
628
|
raise NotImplementedError(
|
|
1144
629
|
"DictConnector does not support parallel processing,"
|
|
1145
630
|
" use PasConnector or ArcticDBConnector."
|
|
1146
631
|
)
|
|
1147
632
|
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
"""List of oseries names."""
|
|
1151
|
-
lib = self._get_library("oseries")
|
|
1152
|
-
return list(lib.keys())
|
|
633
|
+
def _list_symbols(self, libname: AllLibs) -> List[str]:
|
|
634
|
+
"""List symbols in a library (internal method).
|
|
1153
635
|
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
return list(lib.keys())
|
|
636
|
+
Parameters
|
|
637
|
+
----------
|
|
638
|
+
libname : str
|
|
639
|
+
name of the library
|
|
1159
640
|
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
641
|
+
Returns
|
|
642
|
+
-------
|
|
643
|
+
list
|
|
644
|
+
list of symbols in the library
|
|
645
|
+
"""
|
|
646
|
+
lib = self._get_library(libname)
|
|
1164
647
|
return list(lib.keys())
|
|
1165
648
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
return list(lib.keys())
|
|
649
|
+
def _item_exists(self, libname: str, name: str) -> bool:
|
|
650
|
+
"""Check if item exists without scanning directory."""
|
|
651
|
+
lib = self._get_library(libname)
|
|
652
|
+
return name in lib
|
|
1171
653
|
|
|
1172
654
|
|
|
1173
|
-
class PasConnector(BaseConnector,
|
|
655
|
+
class PasConnector(BaseConnector, ParallelUtil):
|
|
1174
656
|
"""PasConnector object that stores time series and models as JSON files on disk."""
|
|
1175
657
|
|
|
1176
|
-
|
|
658
|
+
_conn_type = "pas"
|
|
1177
659
|
|
|
1178
660
|
def __init__(self, name: str, path: str, verbose: bool = True):
|
|
1179
661
|
"""Create PasConnector object that stores data as JSON files on disk.
|
|
@@ -1190,47 +672,72 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1190
672
|
verbose : bool, optional
|
|
1191
673
|
whether to print message when database is initialized, by default True
|
|
1192
674
|
"""
|
|
675
|
+
# set shared memory flags for parallel processing
|
|
676
|
+
super().__init__()
|
|
1193
677
|
self.name = name
|
|
1194
|
-
self.parentdir = path
|
|
1195
|
-
self.path =
|
|
1196
|
-
self.relpath = os.path.relpath(self.
|
|
678
|
+
self.parentdir = Path(path)
|
|
679
|
+
self.path = (self.parentdir / self.name).absolute()
|
|
680
|
+
self.relpath = os.path.relpath(self.parentdir)
|
|
681
|
+
self._validator = Validator(self)
|
|
1197
682
|
self._initialize(verbose=verbose)
|
|
1198
683
|
self.models = ModelAccessor(self)
|
|
684
|
+
|
|
685
|
+
# set shared memory manager flags for parallel operations
|
|
686
|
+
# NOTE: there is no stored reference to manager object, meaning
|
|
687
|
+
# that it cannot be properly shutdown. We let the Python garbage collector
|
|
688
|
+
# do this, but the downside is there is a risk some background
|
|
689
|
+
# processes potentially continue to run.
|
|
690
|
+
mgr = Manager()
|
|
691
|
+
self._oseries_links_need_update = mgr.Value(
|
|
692
|
+
"_oseries_links_need_update",
|
|
693
|
+
False,
|
|
694
|
+
)
|
|
695
|
+
self._stresses_links_need_update = mgr.Value(
|
|
696
|
+
"_stresses_links_need_update",
|
|
697
|
+
False,
|
|
698
|
+
)
|
|
699
|
+
|
|
1199
700
|
# for older versions of PastaStore, if oseries_models library is empty
|
|
1200
701
|
# populate oseries_models library
|
|
1201
|
-
self.
|
|
702
|
+
if (self.n_models > 0) and (
|
|
703
|
+
len(self.oseries_models) == 0 or len(self.stresses_models) == 0
|
|
704
|
+
):
|
|
705
|
+
self._update_time_series_model_links(recompute=False, progressbar=True)
|
|
1202
706
|
# write pstore file to store database info that can be used to load pstore
|
|
1203
707
|
self._write_pstore_config_file()
|
|
1204
708
|
|
|
1205
709
|
def _initialize(self, verbose: bool = True) -> None:
|
|
1206
710
|
"""Initialize the libraries (internal method)."""
|
|
711
|
+
self.validator.check_config_connector_type(self.path)
|
|
1207
712
|
for val in self._default_library_names:
|
|
1208
|
-
libdir =
|
|
1209
|
-
if not
|
|
713
|
+
libdir = self.path / val
|
|
714
|
+
if not libdir.exists():
|
|
1210
715
|
if verbose:
|
|
1211
|
-
|
|
1212
|
-
|
|
716
|
+
logger.info(
|
|
717
|
+
"PasConnector: library '%s' created in '%s'", val, libdir
|
|
718
|
+
)
|
|
719
|
+
libdir.mkdir(parents=True, exist_ok=False)
|
|
1213
720
|
else:
|
|
1214
721
|
if verbose:
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
722
|
+
logger.info(
|
|
723
|
+
"PasConnector: library '%s' already exists. "
|
|
724
|
+
"Linking to existing directory: '%s'",
|
|
725
|
+
val,
|
|
726
|
+
libdir,
|
|
1218
727
|
)
|
|
1219
|
-
setattr(self, f"lib_{val}",
|
|
728
|
+
setattr(self, f"lib_{val}", self.path / val)
|
|
1220
729
|
|
|
1221
730
|
def _write_pstore_config_file(self):
|
|
1222
731
|
"""Write pstore configuration file to store database info."""
|
|
1223
732
|
config = {
|
|
1224
733
|
"connector_type": self.conn_type,
|
|
1225
734
|
"name": self.name,
|
|
1226
|
-
"path":
|
|
735
|
+
"path": str(self.parentdir.absolute()),
|
|
1227
736
|
}
|
|
1228
|
-
with open(
|
|
1229
|
-
os.path.join(self.path, f"{self.name}.pastastore"), "w", encoding="utf-8"
|
|
1230
|
-
) as f:
|
|
737
|
+
with (self.path / f"{self.name}.pastastore").open("w", encoding="utf-8") as f:
|
|
1231
738
|
json.dump(config, f)
|
|
1232
739
|
|
|
1233
|
-
def _get_library(self, libname:
|
|
740
|
+
def _get_library(self, libname: AllLibs) -> Path:
|
|
1234
741
|
"""Get path to directory holding data.
|
|
1235
742
|
|
|
1236
743
|
Parameters
|
|
@@ -1243,12 +750,12 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1243
750
|
lib : str
|
|
1244
751
|
path to library
|
|
1245
752
|
"""
|
|
1246
|
-
return getattr(self, "lib_" + libname)
|
|
753
|
+
return Path(getattr(self, "lib_" + libname))
|
|
1247
754
|
|
|
1248
755
|
def _add_item(
|
|
1249
756
|
self,
|
|
1250
757
|
libname: str,
|
|
1251
|
-
item: Union[
|
|
758
|
+
item: Union[FrameOrSeriesUnion, Dict],
|
|
1252
759
|
name: str,
|
|
1253
760
|
metadata: Optional[Dict] = None,
|
|
1254
761
|
**_,
|
|
@@ -1268,33 +775,52 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1268
775
|
"""
|
|
1269
776
|
lib = self._get_library(libname)
|
|
1270
777
|
|
|
778
|
+
# check file name for illegal characters
|
|
779
|
+
name = self.validator.check_filename_illegal_chars(libname, name)
|
|
780
|
+
|
|
1271
781
|
# time series
|
|
1272
782
|
if isinstance(item, pd.Series):
|
|
1273
783
|
item = item.to_frame()
|
|
1274
784
|
if isinstance(item, pd.DataFrame):
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
785
|
+
if type(item) is pd.DataFrame:
|
|
786
|
+
sjson = item.to_json(orient="columns")
|
|
787
|
+
else:
|
|
788
|
+
# workaround for subclasses of DataFrame that override to_json,
|
|
789
|
+
# looking at you hydropandas...
|
|
790
|
+
sjson = pd.DataFrame(item).to_json(orient="columns")
|
|
791
|
+
if name.endswith("_meta"):
|
|
792
|
+
raise ValueError(
|
|
793
|
+
"Time series name cannot end with '_meta'. "
|
|
794
|
+
"Please use a different name for your time series."
|
|
795
|
+
)
|
|
796
|
+
fname = lib / f"{name}.pas"
|
|
797
|
+
with fname.open("w", encoding="utf-8") as f:
|
|
798
|
+
logger.debug("Writing time series '%s' to disk at '%s'.", name, fname)
|
|
1278
799
|
f.write(sjson)
|
|
1279
800
|
if metadata is not None:
|
|
1280
801
|
mjson = json.dumps(metadata, cls=PastasEncoder, indent=4)
|
|
1281
|
-
fname_meta =
|
|
1282
|
-
with open(
|
|
802
|
+
fname_meta = lib / f"{name}_meta.pas"
|
|
803
|
+
with fname_meta.open("w", encoding="utf-8") as m:
|
|
804
|
+
logger.debug(
|
|
805
|
+
"Writing metadata '%s' to disk at '%s'.", name, fname_meta
|
|
806
|
+
)
|
|
1283
807
|
m.write(mjson)
|
|
1284
808
|
# pastas model dict
|
|
1285
809
|
elif isinstance(item, dict):
|
|
1286
810
|
jsondict = json.dumps(item, cls=PastasEncoder, indent=4)
|
|
1287
|
-
fmodel =
|
|
1288
|
-
with open(
|
|
811
|
+
fmodel = lib / f"{name}.pas"
|
|
812
|
+
with fmodel.open("w", encoding="utf-8") as fm:
|
|
813
|
+
logger.debug("Writing model '%s' to disk at '%s'.", name, fmodel)
|
|
1289
814
|
fm.write(jsondict)
|
|
1290
|
-
# oseries_models list
|
|
815
|
+
# oseries_models or stresses_models list
|
|
1291
816
|
elif isinstance(item, list):
|
|
1292
817
|
jsondict = json.dumps(item)
|
|
1293
|
-
fname =
|
|
1294
|
-
with open(
|
|
818
|
+
fname = lib / f"{name}.pas"
|
|
819
|
+
with fname.open("w", encoding="utf-8") as fm:
|
|
820
|
+
logger.debug("Writing link list '%s' to disk at '%s'.", name, fname)
|
|
1295
821
|
fm.write(jsondict)
|
|
1296
822
|
|
|
1297
|
-
def _get_item(self, libname:
|
|
823
|
+
def _get_item(self, libname: AllLibs, name: str) -> Union[FrameOrSeriesUnion, Dict]:
|
|
1298
824
|
"""Retrieve item (internal method).
|
|
1299
825
|
|
|
1300
826
|
Parameters
|
|
@@ -1310,24 +836,24 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1310
836
|
time series or model dictionary
|
|
1311
837
|
"""
|
|
1312
838
|
lib = self._get_library(libname)
|
|
1313
|
-
fjson =
|
|
1314
|
-
if not
|
|
839
|
+
fjson = lib / f"{name}.pas"
|
|
840
|
+
if not fjson.exists():
|
|
1315
841
|
msg = f"Item '{name}' not in '{libname}' library."
|
|
1316
842
|
raise FileNotFoundError(msg)
|
|
1317
843
|
# model
|
|
1318
844
|
if libname == "models":
|
|
1319
|
-
with open(
|
|
845
|
+
with fjson.open("r", encoding="utf-8") as ml_json:
|
|
1320
846
|
item = json.load(ml_json, object_hook=pastas_hook)
|
|
1321
847
|
# list of models per oseries
|
|
1322
|
-
elif libname
|
|
1323
|
-
with open(
|
|
848
|
+
elif libname in ["oseries_models", "stresses_models"]:
|
|
849
|
+
with fjson.open("r", encoding="utf-8") as f:
|
|
1324
850
|
item = json.load(f)
|
|
1325
851
|
# time series
|
|
1326
852
|
else:
|
|
1327
|
-
item =
|
|
853
|
+
item = series_from_json(fjson)
|
|
1328
854
|
return item
|
|
1329
855
|
|
|
1330
|
-
def _del_item(self, libname:
|
|
856
|
+
def _del_item(self, libname: AllLibs, name: str, force: bool = False) -> None:
|
|
1331
857
|
"""Delete items (series or models) (internal method).
|
|
1332
858
|
|
|
1333
859
|
Parameters
|
|
@@ -1336,18 +862,23 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1336
862
|
name of library to delete item from
|
|
1337
863
|
name : str
|
|
1338
864
|
name of item to delete
|
|
865
|
+
force : bool, optional
|
|
866
|
+
if True, force delete item and do not perform check if series
|
|
867
|
+
is used in a model, by default False
|
|
1339
868
|
"""
|
|
1340
869
|
lib = self._get_library(libname)
|
|
1341
|
-
|
|
870
|
+
if self.validator.PROTECT_SERIES_IN_MODELS and not force:
|
|
871
|
+
self.validator.check_series_in_models(libname, name)
|
|
872
|
+
(lib / f"{name}.pas").unlink()
|
|
1342
873
|
# remove metadata for time series
|
|
1343
|
-
if libname
|
|
874
|
+
if libname in ["oseries", "stresses"]:
|
|
1344
875
|
try:
|
|
1345
|
-
|
|
876
|
+
(lib / f"{name}_meta.pas").unlink()
|
|
1346
877
|
except FileNotFoundError:
|
|
1347
878
|
# Nothing to delete
|
|
1348
879
|
pass
|
|
1349
880
|
|
|
1350
|
-
def _get_metadata(self, libname:
|
|
881
|
+
def _get_metadata(self, libname: TimeSeriesLibs, name: str) -> dict:
|
|
1351
882
|
"""Read metadata (internal method).
|
|
1352
883
|
|
|
1353
884
|
Parameters
|
|
@@ -1363,9 +894,9 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1363
894
|
dictionary containing metadata
|
|
1364
895
|
"""
|
|
1365
896
|
lib = self._get_library(libname)
|
|
1366
|
-
mjson =
|
|
1367
|
-
if
|
|
1368
|
-
imeta =
|
|
897
|
+
mjson = lib / f"{name}_meta.pas"
|
|
898
|
+
if mjson.is_file():
|
|
899
|
+
imeta = metadata_from_json(mjson)
|
|
1369
900
|
else:
|
|
1370
901
|
imeta = {}
|
|
1371
902
|
return imeta
|
|
@@ -1379,6 +910,8 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1379
910
|
max_workers: Optional[int] = None,
|
|
1380
911
|
chunksize: Optional[int] = None,
|
|
1381
912
|
desc: str = "",
|
|
913
|
+
initializer: Callable = None,
|
|
914
|
+
initargs: Optional[tuple] = None,
|
|
1382
915
|
):
|
|
1383
916
|
"""Parallel processing of function.
|
|
1384
917
|
|
|
@@ -1398,8 +931,12 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1398
931
|
chunksize for parallel processing, by default None
|
|
1399
932
|
desc : str, optional
|
|
1400
933
|
description for progressbar, by default ""
|
|
934
|
+
initializer : Callable, optional
|
|
935
|
+
function to initialize each worker process, by default None
|
|
936
|
+
initargs : tuple, optional
|
|
937
|
+
arguments to pass to initializer function, by default None
|
|
1401
938
|
"""
|
|
1402
|
-
max_workers, chunksize =
|
|
939
|
+
max_workers, chunksize = self._get_max_workers_and_chunksize(
|
|
1403
940
|
max_workers, len(names), chunksize
|
|
1404
941
|
)
|
|
1405
942
|
|
|
@@ -1407,51 +944,57 @@ class PasConnector(BaseConnector, ConnectorUtil):
|
|
|
1407
944
|
kwargs = {}
|
|
1408
945
|
|
|
1409
946
|
if progressbar:
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
names,
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
947
|
+
if initializer is not None:
|
|
948
|
+
result = []
|
|
949
|
+
with tqdm(total=len(names), desc=desc) as pbar:
|
|
950
|
+
with ProcessPoolExecutor(
|
|
951
|
+
max_workers=max_workers,
|
|
952
|
+
initializer=initializer,
|
|
953
|
+
initargs=initargs,
|
|
954
|
+
) as executor:
|
|
955
|
+
for item in executor.map(
|
|
956
|
+
partial(func, **kwargs), names, chunksize=chunksize
|
|
957
|
+
):
|
|
958
|
+
result.append(item)
|
|
959
|
+
pbar.update()
|
|
960
|
+
else:
|
|
961
|
+
result = process_map(
|
|
962
|
+
partial(func, **kwargs),
|
|
963
|
+
names,
|
|
964
|
+
max_workers=max_workers,
|
|
965
|
+
chunksize=chunksize,
|
|
966
|
+
desc=desc,
|
|
967
|
+
total=len(names),
|
|
968
|
+
)
|
|
1418
969
|
else:
|
|
1419
970
|
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|
1420
971
|
result = executor.map(
|
|
1421
972
|
partial(func, **kwargs), names, chunksize=chunksize
|
|
1422
973
|
)
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
lib = self._get_library("models")
|
|
1451
|
-
return [i[:-4] for i in os.listdir(lib) if i.endswith(".pas")]
|
|
1452
|
-
|
|
1453
|
-
@property
|
|
1454
|
-
def oseries_with_models(self):
|
|
1455
|
-
"""List of oseries with models."""
|
|
1456
|
-
lib = self._get_library("oseries_models")
|
|
1457
|
-
return [i[:-4] for i in os.listdir(lib) if i.endswith(".pas")]
|
|
974
|
+
|
|
975
|
+
# update links if models were stored
|
|
976
|
+
self._trigger_links_update_if_needed(modelnames=names)
|
|
977
|
+
|
|
978
|
+
return result
|
|
979
|
+
|
|
980
|
+
def _list_symbols(self, libname: AllLibs) -> List[str]:
|
|
981
|
+
"""List symbols in a library (internal method).
|
|
982
|
+
|
|
983
|
+
Parameters
|
|
984
|
+
----------
|
|
985
|
+
libname : str
|
|
986
|
+
name of the library
|
|
987
|
+
|
|
988
|
+
Returns
|
|
989
|
+
-------
|
|
990
|
+
list
|
|
991
|
+
list of symbols in the library
|
|
992
|
+
"""
|
|
993
|
+
lib = self._get_library(libname)
|
|
994
|
+
return [i.stem for i in lib.glob("*.pas") if not i.stem.endswith("_meta")]
|
|
995
|
+
|
|
996
|
+
def _item_exists(self, libname: str, name: str) -> bool:
|
|
997
|
+
"""Check if item exists without scanning directory."""
|
|
998
|
+
lib = self._get_library(libname)
|
|
999
|
+
path = lib / f"{name}.pas"
|
|
1000
|
+
return path.exists()
|