pastastore 1.7.2__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pastastore/base.py +96 -638
- pastastore/connectors.py +917 -18
- pastastore/extensions/hpd.py +4 -4
- pastastore/plotting.py +8 -9
- pastastore/store.py +296 -116
- pastastore/styling.py +4 -4
- pastastore/util.py +15 -11
- pastastore/version.py +1 -1
- pastastore/yaml_interface.py +35 -8
- {pastastore-1.7.2.dist-info → pastastore-1.9.0.dist-info}/LICENSE +1 -1
- {pastastore-1.7.2.dist-info → pastastore-1.9.0.dist-info}/METADATA +37 -45
- pastastore-1.9.0.dist-info/RECORD +28 -0
- {pastastore-1.7.2.dist-info → pastastore-1.9.0.dist-info}/WHEEL +1 -1
- tests/conftest.py +1 -5
- tests/test_003_pastastore.py +28 -10
- tests/test_006_benchmark.py +0 -3
- tests/test_007_hpdextension.py +10 -8
- pastastore-1.7.2.dist-info/RECORD +0 -28
- {pastastore-1.7.2.dist-info → pastastore-1.9.0.dist-info}/top_level.txt +0 -0
pastastore/extensions/hpd.py
CHANGED
|
@@ -409,7 +409,7 @@ class HydroPandasExtension:
|
|
|
409
409
|
meteo_var: str = "RD",
|
|
410
410
|
tmin: Optional[TimeType] = None,
|
|
411
411
|
tmax: Optional[TimeType] = None,
|
|
412
|
-
unit_multiplier: float =
|
|
412
|
+
unit_multiplier: float = 1e3,
|
|
413
413
|
normalize_datetime_index: bool = True,
|
|
414
414
|
fill_missing_obs: bool = True,
|
|
415
415
|
**kwargs,
|
|
@@ -428,7 +428,7 @@ class HydroPandasExtension:
|
|
|
428
428
|
end time
|
|
429
429
|
unit_multiplier : float, optional
|
|
430
430
|
multiply unit by this value before saving it in the store,
|
|
431
|
-
by default
|
|
431
|
+
by default 1e3 (converting m to mm)
|
|
432
432
|
fill_missing_obs : bool, optional
|
|
433
433
|
if True, fill missing observations by getting observations from nearest
|
|
434
434
|
station with data.
|
|
@@ -454,7 +454,7 @@ class HydroPandasExtension:
|
|
|
454
454
|
meteo_var: str = "EV24",
|
|
455
455
|
tmin: Optional[TimeType] = None,
|
|
456
456
|
tmax: Optional[TimeType] = None,
|
|
457
|
-
unit_multiplier: float =
|
|
457
|
+
unit_multiplier: float = 1e3,
|
|
458
458
|
normalize_datetime_index: bool = True,
|
|
459
459
|
fill_missing_obs: bool = True,
|
|
460
460
|
**kwargs,
|
|
@@ -474,7 +474,7 @@ class HydroPandasExtension:
|
|
|
474
474
|
end time
|
|
475
475
|
unit_multiplier : float, optional
|
|
476
476
|
multiply unit by this value before saving it in the store,
|
|
477
|
-
by default
|
|
477
|
+
by default 1e3 (converting m to mm)
|
|
478
478
|
fill_missing_obs : bool, optional
|
|
479
479
|
if True, fill missing observations by getting observations from nearest
|
|
480
480
|
station with data.
|
pastastore/plotting.py
CHANGED
|
@@ -98,8 +98,7 @@ class Plots:
|
|
|
98
98
|
|
|
99
99
|
if len(names) > 20 and split:
|
|
100
100
|
raise ValueError(
|
|
101
|
-
"More than 20 time series leads to too many "
|
|
102
|
-
"subplots, set split=False."
|
|
101
|
+
"More than 20 time series leads to too many subplots, set split=False."
|
|
103
102
|
)
|
|
104
103
|
|
|
105
104
|
if ax is None:
|
|
@@ -542,7 +541,7 @@ class Plots:
|
|
|
542
541
|
|
|
543
542
|
if label is None:
|
|
544
543
|
if extend:
|
|
545
|
-
label = f"No. Models = {len(statsdf)-1}"
|
|
544
|
+
label = f"No. Models = {len(statsdf) - 1}"
|
|
546
545
|
else:
|
|
547
546
|
label = f"No. Models = {len(statsdf)}"
|
|
548
547
|
|
|
@@ -1053,7 +1052,7 @@ class Maps:
|
|
|
1053
1052
|
yi = imeta.pop("y", np.nan)
|
|
1054
1053
|
else:
|
|
1055
1054
|
raise ValueError(
|
|
1056
|
-
"metadata_source must be either
|
|
1055
|
+
"metadata_source must be either 'model' or 'store'!"
|
|
1057
1056
|
)
|
|
1058
1057
|
if np.isnan(xi) or np.isnan(yi):
|
|
1059
1058
|
print(f"No x,y-data for {istress.name}!")
|
|
@@ -1062,7 +1061,7 @@ class Maps:
|
|
|
1062
1061
|
print(f"x,y-data is 0.0 for {istress.name}, not plotting!")
|
|
1063
1062
|
continue
|
|
1064
1063
|
|
|
1065
|
-
stresses.loc[istress.name, :] = (xi, yi, name, f"C{count%10}")
|
|
1064
|
+
stresses.loc[istress.name, :] = (xi, yi, name, f"C{count % 10}")
|
|
1066
1065
|
count += 1
|
|
1067
1066
|
|
|
1068
1067
|
# create figure
|
|
@@ -1083,7 +1082,7 @@ class Maps:
|
|
|
1083
1082
|
xm = float(ometa.pop("x", np.nan))
|
|
1084
1083
|
ym = float(ometa.pop("y", np.nan))
|
|
1085
1084
|
else:
|
|
1086
|
-
raise ValueError("metadata_source must be either
|
|
1085
|
+
raise ValueError("metadata_source must be either 'model' or 'store'!")
|
|
1087
1086
|
|
|
1088
1087
|
po = ax.scatter(xm, ym, s=osize, marker="o", label=oserieslabel, color="k")
|
|
1089
1088
|
legend_list = [po]
|
|
@@ -1250,14 +1249,14 @@ class Maps:
|
|
|
1250
1249
|
if np.isin(st.loc[s, "kind"], kinds):
|
|
1251
1250
|
(c,) = np.where(skind == st.loc[s, "kind"])
|
|
1252
1251
|
if color_lines:
|
|
1253
|
-
color = f"C{c[0]+1}"
|
|
1252
|
+
color = f"C{c[0] + 1}"
|
|
1254
1253
|
else:
|
|
1255
1254
|
color = "k"
|
|
1256
1255
|
segments.append(
|
|
1257
1256
|
[[os["x"], os["y"]], [st.loc[s, "x"], st.loc[s, "y"]]]
|
|
1258
1257
|
)
|
|
1259
1258
|
segment_colors.append(color)
|
|
1260
|
-
scatter_colors.append(f"C{c[0]+1}")
|
|
1259
|
+
scatter_colors.append(f"C{c[0] + 1}")
|
|
1261
1260
|
|
|
1262
1261
|
stused = np.append(stused, s)
|
|
1263
1262
|
|
|
@@ -1294,7 +1293,7 @@ class Maps:
|
|
|
1294
1293
|
[],
|
|
1295
1294
|
marker="o",
|
|
1296
1295
|
color="w",
|
|
1297
|
-
markerfacecolor=f"C{c[0]+1}",
|
|
1296
|
+
markerfacecolor=f"C{c[0] + 1}",
|
|
1298
1297
|
label=kind,
|
|
1299
1298
|
markersize=10,
|
|
1300
1299
|
)
|
pastastore/store.py
CHANGED
|
@@ -4,9 +4,8 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
6
|
import warnings
|
|
7
|
-
from concurrent.futures import ProcessPoolExecutor
|
|
8
7
|
from functools import partial
|
|
9
|
-
from typing import Dict, List, Literal, Optional, Tuple, Union
|
|
8
|
+
from typing import Dict, Iterable, List, Literal, Optional, Tuple, Union
|
|
10
9
|
|
|
11
10
|
import numpy as np
|
|
12
11
|
import pandas as pd
|
|
@@ -14,10 +13,9 @@ import pastas as ps
|
|
|
14
13
|
from packaging.version import parse as parse_version
|
|
15
14
|
from pastas.io.pas import pastas_hook
|
|
16
15
|
from tqdm.auto import tqdm
|
|
17
|
-
from tqdm.contrib.concurrent import process_map
|
|
18
16
|
|
|
19
17
|
from pastastore.base import BaseConnector
|
|
20
|
-
from pastastore.connectors import DictConnector
|
|
18
|
+
from pastastore.connectors import ArcticDBConnector, DictConnector, PasConnector
|
|
21
19
|
from pastastore.plotting import Maps, Plots
|
|
22
20
|
from pastastore.util import _custom_warning
|
|
23
21
|
from pastastore.version import PASTAS_GEQ_150, PASTAS_LEQ_022
|
|
@@ -81,6 +79,24 @@ class PastaStore:
|
|
|
81
79
|
self.plots = Plots(self)
|
|
82
80
|
self.yaml = PastastoreYAML(self)
|
|
83
81
|
|
|
82
|
+
@classmethod
|
|
83
|
+
def from_pastastore_config_file(cls, fname):
|
|
84
|
+
"""Create a PastaStore from a pastastore config file."""
|
|
85
|
+
with open(fname, "r") as f:
|
|
86
|
+
cfg = json.load(f)
|
|
87
|
+
|
|
88
|
+
conn_type = cfg.pop("connector_type")
|
|
89
|
+
if conn_type == "pas":
|
|
90
|
+
conn = PasConnector(**cfg)
|
|
91
|
+
elif conn_type == "arcticdb":
|
|
92
|
+
conn = ArcticDBConnector(**cfg)
|
|
93
|
+
else:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"Cannot load connector type: '{conn_type}'. "
|
|
96
|
+
"This is only supported for PasConnector and ArcticDBConnector."
|
|
97
|
+
)
|
|
98
|
+
return cls(conn)
|
|
99
|
+
|
|
84
100
|
@property
|
|
85
101
|
def empty(self) -> bool:
|
|
86
102
|
"""Check if the PastaStore is empty."""
|
|
@@ -122,12 +138,43 @@ class PastaStore:
|
|
|
122
138
|
|
|
123
139
|
@property
|
|
124
140
|
def models(self):
|
|
125
|
-
"""Return
|
|
141
|
+
"""Return the ModelAccessor object.
|
|
142
|
+
|
|
143
|
+
The ModelAccessor object allows dictionary-like assignment and access to models.
|
|
144
|
+
In addition it provides some useful utilities for working with stored models
|
|
145
|
+
in the database.
|
|
146
|
+
|
|
147
|
+
Examples
|
|
148
|
+
--------
|
|
149
|
+
Get a model by name::
|
|
150
|
+
|
|
151
|
+
>>> model = pstore.models["my_model"]
|
|
152
|
+
|
|
153
|
+
Store a model in the database::
|
|
154
|
+
|
|
155
|
+
>>> pstore.models["my_model_v2"] = model
|
|
156
|
+
|
|
157
|
+
Get model metadata dataframe::
|
|
158
|
+
|
|
159
|
+
>>> pstore.models.metadata
|
|
160
|
+
|
|
161
|
+
Number of models::
|
|
162
|
+
|
|
163
|
+
>>> len(pstore.models)
|
|
164
|
+
|
|
165
|
+
Random model::
|
|
166
|
+
|
|
167
|
+
>>> model = pstore.models.random()
|
|
168
|
+
|
|
169
|
+
Iterate over stored models::
|
|
170
|
+
|
|
171
|
+
>>> for ml in pstore.models:
|
|
172
|
+
>>> ml.solve()
|
|
126
173
|
|
|
127
174
|
Returns
|
|
128
175
|
-------
|
|
129
|
-
|
|
130
|
-
|
|
176
|
+
ModelAccessor
|
|
177
|
+
ModelAccessor object
|
|
131
178
|
"""
|
|
132
179
|
return self.conn.models
|
|
133
180
|
|
|
@@ -624,8 +671,10 @@ class PastaStore:
|
|
|
624
671
|
self,
|
|
625
672
|
statistics: Union[str, List[str]],
|
|
626
673
|
modelnames: Optional[List[str]] = None,
|
|
674
|
+
parallel: bool = False,
|
|
627
675
|
progressbar: Optional[bool] = False,
|
|
628
676
|
ignore_errors: Optional[bool] = False,
|
|
677
|
+
fancy_output: bool = True,
|
|
629
678
|
**kwargs,
|
|
630
679
|
) -> FrameorSeriesUnion:
|
|
631
680
|
"""Get model statistics.
|
|
@@ -643,6 +692,11 @@ class PastaStore:
|
|
|
643
692
|
ignore_errors : bool, optional
|
|
644
693
|
ignore errors when True, i.e. when trying to calculate statistics
|
|
645
694
|
for non-existent model in modelnames, default is False
|
|
695
|
+
parallel : bool, optional
|
|
696
|
+
use parallel processing, by default False
|
|
697
|
+
fancy_output : bool, optional
|
|
698
|
+
only read if parallel=True, if True, return as DataFrame with statistics,
|
|
699
|
+
otherwise return list of results
|
|
646
700
|
**kwargs
|
|
647
701
|
any arguments that can be passed to the methods for calculating
|
|
648
702
|
statistics
|
|
@@ -657,25 +711,39 @@ class PastaStore:
|
|
|
657
711
|
if isinstance(statistics, str):
|
|
658
712
|
statistics = [statistics]
|
|
659
713
|
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
714
|
+
if parallel:
|
|
715
|
+
kwargs["statistics"] = statistics
|
|
716
|
+
if self.conn.conn_type == "pas":
|
|
717
|
+
kwargs["connector"] = self.conn
|
|
718
|
+
return self.apply(
|
|
719
|
+
"models",
|
|
720
|
+
self.conn._get_statistics,
|
|
721
|
+
modelnames,
|
|
722
|
+
kwargs=kwargs,
|
|
723
|
+
parallel=parallel,
|
|
724
|
+
progressbar=progressbar,
|
|
725
|
+
fancy_output=fancy_output,
|
|
726
|
+
).T # transpose to match serial output
|
|
727
|
+
else:
|
|
728
|
+
# create dataframe for results
|
|
729
|
+
s = pd.DataFrame(index=modelnames, columns=statistics, data=np.nan)
|
|
730
|
+
|
|
731
|
+
# loop through model names
|
|
732
|
+
desc = "Get model statistics"
|
|
733
|
+
for mlname in tqdm(modelnames, desc=desc) if progressbar else modelnames:
|
|
734
|
+
try:
|
|
735
|
+
ml = self.get_models(mlname, progressbar=False)
|
|
736
|
+
except Exception as e:
|
|
737
|
+
if ignore_errors:
|
|
738
|
+
continue
|
|
739
|
+
else:
|
|
740
|
+
raise e
|
|
741
|
+
for stat in statistics:
|
|
742
|
+
value = getattr(ml.stats, stat)(**kwargs)
|
|
743
|
+
s.loc[mlname, stat] = value
|
|
676
744
|
|
|
677
|
-
|
|
678
|
-
|
|
745
|
+
s = s.squeeze()
|
|
746
|
+
return s.astype(float)
|
|
679
747
|
|
|
680
748
|
def create_model(
|
|
681
749
|
self,
|
|
@@ -1235,74 +1303,58 @@ class PastaStore:
|
|
|
1235
1303
|
|
|
1236
1304
|
modelnames = self.conn._parse_names(modelnames, libname="models")
|
|
1237
1305
|
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
report=report,
|
|
1241
|
-
ignore_solve_errors=ignore_solve_errors,
|
|
1242
|
-
**kwargs,
|
|
1243
|
-
)
|
|
1244
|
-
if self.conn.conn_type != "pas":
|
|
1306
|
+
# prepare parallel
|
|
1307
|
+
if parallel and self.conn.conn_type == "dict":
|
|
1245
1308
|
parallel = False
|
|
1246
1309
|
logger.error(
|
|
1247
|
-
"Parallel solving only supported for PasConnector
|
|
1248
|
-
"Setting parallel to `False`"
|
|
1310
|
+
"Parallel solving only supported for PasConnector and "
|
|
1311
|
+
"ArcticDBConnector databases. Setting parallel to `False`"
|
|
1249
1312
|
)
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1313
|
+
if parallel:
|
|
1314
|
+
if self.conn.conn_type == "arcticdb":
|
|
1315
|
+
solve_model = partial(
|
|
1316
|
+
self.conn._solve_model,
|
|
1317
|
+
report=report,
|
|
1318
|
+
ignore_solve_errors=ignore_solve_errors,
|
|
1319
|
+
**kwargs,
|
|
1320
|
+
)
|
|
1321
|
+
self.conn._parallel(
|
|
1322
|
+
solve_model,
|
|
1323
|
+
modelnames,
|
|
1324
|
+
max_workers=max_workers,
|
|
1325
|
+
chunksize=None,
|
|
1326
|
+
progressbar=progressbar,
|
|
1327
|
+
desc="Solving models (parallel)",
|
|
1328
|
+
)
|
|
1329
|
+
elif self.conn.conn_type == "pas":
|
|
1330
|
+
solve_model = partial(
|
|
1331
|
+
self.conn._solve_model,
|
|
1332
|
+
connector=self.conn,
|
|
1333
|
+
report=report,
|
|
1334
|
+
ignore_solve_errors=ignore_solve_errors,
|
|
1335
|
+
**kwargs,
|
|
1336
|
+
)
|
|
1337
|
+
self.conn._parallel(
|
|
1338
|
+
solve_model,
|
|
1339
|
+
modelnames,
|
|
1340
|
+
max_workers=max_workers,
|
|
1341
|
+
chunksize=None,
|
|
1342
|
+
progressbar=progressbar,
|
|
1343
|
+
desc="Solving models (parallel)",
|
|
1344
|
+
)
|
|
1256
1345
|
else:
|
|
1346
|
+
solve_model = partial(
|
|
1347
|
+
self.conn._solve_model,
|
|
1348
|
+
connector=self.conn,
|
|
1349
|
+
report=report,
|
|
1350
|
+
ignore_solve_errors=ignore_solve_errors,
|
|
1351
|
+
**kwargs,
|
|
1352
|
+
)
|
|
1257
1353
|
for ml_name in (
|
|
1258
1354
|
tqdm(modelnames, desc="Solving models") if progressbar else modelnames
|
|
1259
1355
|
):
|
|
1260
1356
|
solve_model(ml_name=ml_name)
|
|
1261
1357
|
|
|
1262
|
-
def _solve_model(
|
|
1263
|
-
self,
|
|
1264
|
-
ml_name: str,
|
|
1265
|
-
report: bool = False,
|
|
1266
|
-
ignore_solve_errors: bool = False,
|
|
1267
|
-
**kwargs,
|
|
1268
|
-
) -> None:
|
|
1269
|
-
"""Solve a model in the store (internal method).
|
|
1270
|
-
|
|
1271
|
-
ml_name : list of str, optional
|
|
1272
|
-
name of a model in the pastastore
|
|
1273
|
-
report : boolean, optional
|
|
1274
|
-
determines if a report is printed when the model is solved,
|
|
1275
|
-
default is False
|
|
1276
|
-
ignore_solve_errors : boolean, optional
|
|
1277
|
-
if True, errors emerging from the solve method are ignored,
|
|
1278
|
-
default is False which will raise an exception when a model
|
|
1279
|
-
cannot be optimized
|
|
1280
|
-
**kwargs : dictionary
|
|
1281
|
-
arguments are passed to the solve method.
|
|
1282
|
-
"""
|
|
1283
|
-
ml = self.conn.get_models(ml_name)
|
|
1284
|
-
m_kwargs = {}
|
|
1285
|
-
for key, value in kwargs.items():
|
|
1286
|
-
if isinstance(value, pd.Series):
|
|
1287
|
-
m_kwargs[key] = value.loc[ml.name]
|
|
1288
|
-
else:
|
|
1289
|
-
m_kwargs[key] = value
|
|
1290
|
-
# Convert timestamps
|
|
1291
|
-
for tstamp in ["tmin", "tmax"]:
|
|
1292
|
-
if tstamp in m_kwargs:
|
|
1293
|
-
m_kwargs[tstamp] = pd.Timestamp(m_kwargs[tstamp])
|
|
1294
|
-
|
|
1295
|
-
try:
|
|
1296
|
-
ml.solve(report=report, **m_kwargs)
|
|
1297
|
-
except Exception as e:
|
|
1298
|
-
if ignore_solve_errors:
|
|
1299
|
-
warning = "Solve error ignored for '%s': %s " % (ml.name, e)
|
|
1300
|
-
logger.warning(warning)
|
|
1301
|
-
else:
|
|
1302
|
-
raise e
|
|
1303
|
-
|
|
1304
|
-
self.conn.add_model(ml, overwrite=True)
|
|
1305
|
-
|
|
1306
1358
|
def model_results(
|
|
1307
1359
|
self,
|
|
1308
1360
|
mls: Optional[Union[ps.Model, list, str]] = None,
|
|
@@ -1370,7 +1422,7 @@ class PastaStore:
|
|
|
1370
1422
|
|
|
1371
1423
|
if os.path.exists(fname) and not overwrite:
|
|
1372
1424
|
raise FileExistsError(
|
|
1373
|
-
"File already exists!
|
|
1425
|
+
"File already exists! Use 'overwrite=True' to force writing file."
|
|
1374
1426
|
)
|
|
1375
1427
|
elif os.path.exists(fname):
|
|
1376
1428
|
warnings.warn(f"Overwriting file '{os.path.basename(fname)}'", stacklevel=1)
|
|
@@ -1443,6 +1495,7 @@ class PastaStore:
|
|
|
1443
1495
|
conn: Optional[BaseConnector] = None,
|
|
1444
1496
|
storename: Optional[str] = None,
|
|
1445
1497
|
progressbar: bool = True,
|
|
1498
|
+
series_ext_json: bool = False,
|
|
1446
1499
|
):
|
|
1447
1500
|
"""Load PastaStore from zipfile.
|
|
1448
1501
|
|
|
@@ -1458,6 +1511,10 @@ class PastaStore:
|
|
|
1458
1511
|
defaults to the name of the Connector.
|
|
1459
1512
|
progressbar : bool, optional
|
|
1460
1513
|
show progressbar, by default True
|
|
1514
|
+
series_ext_json : bool, optional
|
|
1515
|
+
if True, series are expected to have a .json extension, by default False,
|
|
1516
|
+
which assumes a .pas extension. Set this option to true for reading
|
|
1517
|
+
zipfiles created with older versions of pastastore <1.8.0.
|
|
1461
1518
|
|
|
1462
1519
|
Returns
|
|
1463
1520
|
-------
|
|
@@ -1469,9 +1526,22 @@ class PastaStore:
|
|
|
1469
1526
|
if conn is None:
|
|
1470
1527
|
conn = DictConnector("pastas_db")
|
|
1471
1528
|
|
|
1529
|
+
if series_ext_json:
|
|
1530
|
+
ext = "json"
|
|
1531
|
+
else:
|
|
1532
|
+
ext = "pas"
|
|
1533
|
+
|
|
1534
|
+
# short circuit for PasConnector when zipfile was written using pas files
|
|
1535
|
+
if conn.conn_type == "pas" and not series_ext_json:
|
|
1536
|
+
with ZipFile(fname, "r") as archive:
|
|
1537
|
+
archive.extractall(conn.path)
|
|
1538
|
+
if storename is None:
|
|
1539
|
+
storename = conn.name
|
|
1540
|
+
return cls(conn, storename)
|
|
1541
|
+
|
|
1472
1542
|
with ZipFile(fname, "r") as archive:
|
|
1473
1543
|
namelist = [
|
|
1474
|
-
fi for fi in archive.namelist() if not fi.endswith("_meta.
|
|
1544
|
+
fi for fi in archive.namelist() if not fi.endswith(f"_meta.{ext}")
|
|
1475
1545
|
]
|
|
1476
1546
|
for f in tqdm(namelist, desc="Reading zip") if progressbar else namelist:
|
|
1477
1547
|
libname, fjson = os.path.split(f)
|
|
@@ -1480,7 +1550,7 @@ class PastaStore:
|
|
|
1480
1550
|
if not isinstance(s.index, pd.DatetimeIndex):
|
|
1481
1551
|
s.index = pd.to_datetime(s.index, unit="ms")
|
|
1482
1552
|
s = s.sort_index()
|
|
1483
|
-
meta = json.load(archive.open(f.replace(".
|
|
1553
|
+
meta = json.load(archive.open(f.replace(f".{ext}", f"_meta.{ext}")))
|
|
1484
1554
|
conn._add_series(libname, s, fjson.split(".")[0], metadata=meta)
|
|
1485
1555
|
elif libname in ["models"]:
|
|
1486
1556
|
ml = json.load(archive.open(f), object_hook=pastas_hook)
|
|
@@ -1496,7 +1566,7 @@ class PastaStore:
|
|
|
1496
1566
|
case_sensitive: bool = True,
|
|
1497
1567
|
sort=True,
|
|
1498
1568
|
):
|
|
1499
|
-
"""Search for names of time series or models
|
|
1569
|
+
"""Search for names of time series or models containing string `s`.
|
|
1500
1570
|
|
|
1501
1571
|
Parameters
|
|
1502
1572
|
----------
|
|
@@ -1515,30 +1585,48 @@ class PastaStore:
|
|
|
1515
1585
|
list of names that match search result
|
|
1516
1586
|
"""
|
|
1517
1587
|
if libname == "models":
|
|
1518
|
-
lib_names = self.model_names
|
|
1588
|
+
lib_names = {"models": self.model_names}
|
|
1519
1589
|
elif libname == "stresses":
|
|
1520
|
-
lib_names = self.stresses_names
|
|
1590
|
+
lib_names = {"stresses": self.stresses_names}
|
|
1521
1591
|
elif libname == "oseries":
|
|
1522
|
-
lib_names = self.oseries_names
|
|
1592
|
+
lib_names = {"oseries": self.oseries_names}
|
|
1593
|
+
elif libname is None:
|
|
1594
|
+
lib_names = {
|
|
1595
|
+
"oseries": self.oseries_names,
|
|
1596
|
+
"stresses": self.stresses_names,
|
|
1597
|
+
"models": self.model_names,
|
|
1598
|
+
}
|
|
1523
1599
|
else:
|
|
1524
|
-
raise ValueError(
|
|
1600
|
+
raise ValueError(
|
|
1601
|
+
"Provide valid libname: 'models', 'stresses', 'oseries' or None"
|
|
1602
|
+
" to seach within all libraries."
|
|
1603
|
+
)
|
|
1525
1604
|
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
else:
|
|
1530
|
-
matches = [n for n in lib_names if s.lower() in n.lower()]
|
|
1531
|
-
if isinstance(s, list):
|
|
1532
|
-
m = np.array([])
|
|
1533
|
-
for sub in s:
|
|
1605
|
+
result = {}
|
|
1606
|
+
for lib, names in lib_names.items():
|
|
1607
|
+
if isinstance(s, str):
|
|
1534
1608
|
if case_sensitive:
|
|
1535
|
-
|
|
1609
|
+
matches = [n for n in names if s in n]
|
|
1536
1610
|
else:
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1611
|
+
matches = [n for n in names if s.lower() in n.lower()]
|
|
1612
|
+
elif isinstance(s, list):
|
|
1613
|
+
m = np.array([])
|
|
1614
|
+
for sub in s:
|
|
1615
|
+
if case_sensitive:
|
|
1616
|
+
m = np.append(m, [n for n in names if sub in n])
|
|
1617
|
+
else:
|
|
1618
|
+
m = np.append(m, [n for n in names if sub.lower() in n.lower()])
|
|
1619
|
+
matches = list(np.unique(m))
|
|
1620
|
+
else:
|
|
1621
|
+
raise TypeError("s must be str or list of str!")
|
|
1622
|
+
if sort:
|
|
1623
|
+
matches.sort()
|
|
1624
|
+
result[lib] = matches
|
|
1625
|
+
|
|
1626
|
+
if len(result) == 1:
|
|
1627
|
+
return result[lib]
|
|
1628
|
+
else:
|
|
1629
|
+
return result
|
|
1542
1630
|
|
|
1543
1631
|
def get_model_timeseries_names(
|
|
1544
1632
|
self,
|
|
@@ -1603,7 +1691,17 @@ class PastaStore:
|
|
|
1603
1691
|
else:
|
|
1604
1692
|
return structure
|
|
1605
1693
|
|
|
1606
|
-
def apply(
|
|
1694
|
+
def apply(
|
|
1695
|
+
self,
|
|
1696
|
+
libname: str,
|
|
1697
|
+
func: callable,
|
|
1698
|
+
names: Optional[Union[str, List[str]]] = None,
|
|
1699
|
+
kwargs: Optional[dict] = None,
|
|
1700
|
+
progressbar: bool = True,
|
|
1701
|
+
parallel: bool = False,
|
|
1702
|
+
max_workers: Optional[int] = None,
|
|
1703
|
+
fancy_output: bool = True,
|
|
1704
|
+
) -> Union[dict, pd.Series, pd.DataFrame]:
|
|
1607
1705
|
"""Apply function to items in library.
|
|
1608
1706
|
|
|
1609
1707
|
Supported libraries are oseries, stresses, and models.
|
|
@@ -1613,32 +1711,114 @@ class PastaStore:
|
|
|
1613
1711
|
libname : str
|
|
1614
1712
|
library name, supports "oseries", "stresses" and "models"
|
|
1615
1713
|
func : callable
|
|
1616
|
-
function that accepts
|
|
1714
|
+
function that accepts a string corresponding to the name of an item in
|
|
1715
|
+
the library as its first argument. Additional keyword arguments can be
|
|
1716
|
+
specified. The function can return any result, or update an item in the
|
|
1717
|
+
database without returning anything.
|
|
1617
1718
|
names : str, list of str, optional
|
|
1618
1719
|
apply function to these names, by default None which loops over all stored
|
|
1619
1720
|
items in library
|
|
1721
|
+
kwargs : dict, optional
|
|
1722
|
+
keyword arguments to pass to func, by default None
|
|
1620
1723
|
progressbar : bool, optional
|
|
1621
1724
|
show progressbar, by default True
|
|
1725
|
+
parallel : bool, optional
|
|
1726
|
+
run apply in parallel, default is False.
|
|
1727
|
+
max_workers : int, optional
|
|
1728
|
+
max no. of workers, only used if parallel is True
|
|
1729
|
+
fancy_output : bool, optional
|
|
1730
|
+
if True, try returning result as pandas Series or DataFrame, by default
|
|
1731
|
+
False
|
|
1622
1732
|
|
|
1623
1733
|
Returns
|
|
1624
1734
|
-------
|
|
1625
1735
|
dict
|
|
1626
1736
|
dict of results of func, with names as keys and results as values
|
|
1737
|
+
|
|
1738
|
+
Notes
|
|
1739
|
+
-----
|
|
1740
|
+
Users should be aware that parallel solving is platform dependent
|
|
1741
|
+
and may not always work. The current implementation works well for Linux users.
|
|
1742
|
+
For Windows users, parallel solving does not work when called directly from
|
|
1743
|
+
Jupyter Notebooks or IPython. To use parallel solving on Windows, the following
|
|
1744
|
+
code should be used in a Python file::
|
|
1745
|
+
|
|
1746
|
+
from multiprocessing import freeze_support
|
|
1747
|
+
|
|
1748
|
+
if __name__ == "__main__":
|
|
1749
|
+
freeze_support()
|
|
1750
|
+
pstore.apply("models", some_func, parallel=True)
|
|
1627
1751
|
"""
|
|
1628
1752
|
names = self.conn._parse_names(names, libname)
|
|
1629
|
-
|
|
1753
|
+
if kwargs is None:
|
|
1754
|
+
kwargs = {}
|
|
1630
1755
|
if libname not in ("oseries", "stresses", "models"):
|
|
1631
1756
|
raise ValueError(
|
|
1632
1757
|
"'libname' must be one of ['oseries', 'stresses', 'models']!"
|
|
1633
1758
|
)
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1759
|
+
if parallel:
|
|
1760
|
+
result = self.conn._parallel(
|
|
1761
|
+
func,
|
|
1762
|
+
kwargs=kwargs,
|
|
1763
|
+
names=names,
|
|
1764
|
+
progressbar=progressbar,
|
|
1765
|
+
max_workers=max_workers,
|
|
1766
|
+
chunksize=None,
|
|
1767
|
+
desc=f"Applying {func.__name__} (parallel)",
|
|
1768
|
+
)
|
|
1769
|
+
else:
|
|
1770
|
+
result = []
|
|
1771
|
+
for n in tqdm(
|
|
1772
|
+
names, desc=f"Applying {func.__name__}", disable=not progressbar
|
|
1773
|
+
):
|
|
1774
|
+
result.append(func(n, **kwargs))
|
|
1775
|
+
if fancy_output:
|
|
1776
|
+
return PastaStore._fancy_output(result, names, func.__name__)
|
|
1777
|
+
else:
|
|
1778
|
+
return result
|
|
1640
1779
|
|
|
1641
|
-
|
|
1780
|
+
@staticmethod
|
|
1781
|
+
def _fancy_output(
|
|
1782
|
+
result: Iterable,
|
|
1783
|
+
names: List[str],
|
|
1784
|
+
label: Optional[str] = None,
|
|
1785
|
+
) -> Union[pd.Series, pd.DataFrame, dict]:
|
|
1786
|
+
"""Convert apply result to pandas Series, DataFrame or dict.
|
|
1787
|
+
|
|
1788
|
+
Parameters
|
|
1789
|
+
----------
|
|
1790
|
+
result : Iterable
|
|
1791
|
+
result of apply function
|
|
1792
|
+
names : list
|
|
1793
|
+
list of names
|
|
1794
|
+
label : str, optional
|
|
1795
|
+
label for columns, by default None
|
|
1796
|
+
|
|
1797
|
+
Returns
|
|
1798
|
+
-------
|
|
1799
|
+
pd.Series, pd.DataFrame, dict
|
|
1800
|
+
Series, DataFrame or dict with results
|
|
1801
|
+
"""
|
|
1802
|
+
if not isinstance(result, list):
|
|
1803
|
+
result = list(result)
|
|
1804
|
+
if isinstance(result[0], (float, int, np.integer)):
|
|
1805
|
+
return pd.Series(result, index=names)
|
|
1806
|
+
elif isinstance(result[0], (pd.Series, pd.DataFrame)):
|
|
1807
|
+
df = pd.concat(dict(zip(names, result, strict=True)), axis=1)
|
|
1808
|
+
if label is not None:
|
|
1809
|
+
df.columns.name = label
|
|
1810
|
+
return df
|
|
1811
|
+
elif result[0] is None:
|
|
1812
|
+
return None # return None if first result is None?
|
|
1813
|
+
else:
|
|
1814
|
+
return dict(zip(names, result, strict=True))
|
|
1815
|
+
|
|
1816
|
+
def within(
|
|
1817
|
+
self,
|
|
1818
|
+
extent: list,
|
|
1819
|
+
names: Optional[list[str]] = None,
|
|
1820
|
+
libname: str = "oseries",
|
|
1821
|
+
):
|
|
1642
1822
|
"""Get names of items within extent.
|
|
1643
1823
|
|
|
1644
1824
|
Parameters
|