pastastore 1.10.2__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,474 @@
1
+ """Module containing Validator class for checking input data for connectors."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import warnings
8
+
9
+ # import weakref
10
+ from typing import TYPE_CHECKING, Union
11
+
12
+ import pandas as pd
13
+ import pastas as ps
14
+ from numpy import isin
15
+ from pandas.testing import assert_series_equal
16
+
17
+ from pastastore.typing import PastasLibs
18
+ from pastastore.util import SeriesUsedByModel, _custom_warning, validate_names
19
+
20
+ if TYPE_CHECKING:
21
+ from pastastore.base import BaseConnector
22
+
23
+ FrameorSeriesUnion = Union[pd.DataFrame, pd.Series]
24
+ warnings.showwarning = _custom_warning
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class Validator:
30
+ """Validator class for checking input data and model consistency.
31
+
32
+ This class provides validation methods for time series data, models,
33
+ and metadata to ensure data integrity when storing in PastaStore
34
+ connectors.
35
+
36
+ Parameters
37
+ ----------
38
+ connector : BaseConnector
39
+ The connector instance used for validation.
40
+
41
+ Attributes
42
+ ----------
43
+ CHECK_MODEL_SERIES_VALUES : bool
44
+ Whether to check model time series contents against stored copies.
45
+ USE_PASTAS_VALIDATE_SERIES : bool
46
+ Whether to validate time series according to pastas rules.
47
+ SERIES_EQUALITY_ABSOLUTE_TOLERANCE : float
48
+ Absolute tolerance for series equality comparison.
49
+ SERIES_EQUALITY_RELATIVE_TOLERANCE : float
50
+ Relative tolerance for series equality comparison.
51
+ """
52
+
53
+ # whether to check model time series contents against stored copies
54
+ CHECK_MODEL_SERIES_VALUES = True
55
+
56
+ # whether to validate time series according to pastas rules
57
+ USE_PASTAS_VALIDATE_SERIES = True
58
+
59
+ # protect series in models from being deleted or modified
60
+ PROTECT_SERIES_IN_MODELS = True
61
+
62
+ # set series equality comparison settings (using assert_series_equal)
63
+ SERIES_EQUALITY_ABSOLUTE_TOLERANCE = 1e-10
64
+ SERIES_EQUALITY_RELATIVE_TOLERANCE = 0.0
65
+
66
+ def __init__(self, connector: "BaseConnector"):
67
+ """Initialize Validator with connector reference.
68
+
69
+ Parameters
70
+ ----------
71
+ connector : BaseConnector
72
+ The connector instance to validate against.
73
+ """
74
+ self.connector = connector
75
+
76
+ @property
77
+ def settings(self):
78
+ """Return current connector settings as dictionary."""
79
+ return {
80
+ "CHECK_MODEL_SERIES_VALUES": self.CHECK_MODEL_SERIES_VALUES,
81
+ "USE_PASTAS_VALIDATE_SERIES": self.USE_PASTAS_VALIDATE_SERIES,
82
+ "PROTECT_SERIES_IN_MODELS": self.PROTECT_SERIES_IN_MODELS,
83
+ "SERIES_EQUALITY_ABSOLUTE_TOLERANCE": (
84
+ self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE
85
+ ),
86
+ "SERIES_EQUALITY_RELATIVE_TOLERANCE": (
87
+ self.SERIES_EQUALITY_RELATIVE_TOLERANCE
88
+ ),
89
+ }
90
+
91
+ def set_check_model_series_values(self, b: bool):
92
+ """Turn CHECK_MODEL_SERIES_VALUES option on (True) or off (False).
93
+
94
+ The default option is on (it is highly recommended to keep it that
95
+ way). When turned on, the model time series
96
+ (ml.oseries._series_original, and stressmodel.stress._series_original)
97
+ values are checked against the stored copies in the database. If these
98
+ do not match, an error is raised, and the model is not added to the
99
+ database. This guarantees the stored model will be identical after
100
+ loading from the database. This check is somewhat computationally
101
+ expensive, which is why it can be turned on or off.
102
+
103
+ Parameters
104
+ ----------
105
+ b : bool
106
+ boolean indicating whether option should be turned on (True) or
107
+ off (False). Option is on by default.
108
+ """
109
+ self.CHECK_MODEL_SERIES_VALUES = b
110
+ logger.info("Model time series checking set to: %s.", b)
111
+
112
+ def set_use_pastas_validate_series(self, b: bool):
113
+ """Turn USE_PASTAS_VALIDATE_SERIES option on (True) or off (False).
114
+
115
+ This will use pastas.validate_oseries() or pastas.validate_stresses()
116
+ to test the time series. If they do not meet the criteria, an error is
117
+ raised. Turning this option off will allow the user to store any time
118
+ series but this will mean that time series models cannot be made from
119
+ stored time series directly and will have to be modified before
120
+ building the models. This in turn will mean that storing the models
121
+ will not work as the stored time series copy is checked against the
122
+ time series in the model to check if they are equal.
123
+
124
+ Note: this option requires pastas>=0.23.0, otherwise it is turned off.
125
+
126
+ Parameters
127
+ ----------
128
+ b : bool
129
+ boolean indicating whether option should be turned on (True) or
130
+ off (False). Option is on by default.
131
+ """
132
+ self.USE_PASTAS_VALIDATE_SERIES = b
133
+ logger.info("Pastas time series validation set to: %s.", b)
134
+
135
+ def set_protect_series_in_models(self, b: bool):
136
+ """Turn PROTECT_SERIES_IN_MODELS option on (True) or off (False).
137
+
138
+ The default option is on. When turned on, deleting a time series that
139
+ is used in a model will raise an error. This prevents models from
140
+ breaking because a required time series has been deleted. If you really
141
+ want to delete such a time series, use the force=True option in
142
+ del_oseries() or del_stress().
143
+
144
+ Parameters
145
+ ----------
146
+ b : bool
147
+ boolean indicating whether option should be turned on (True) or
148
+ off (False). Option is on by default.
149
+ """
150
+ self.PROTECT_SERIES_IN_MODELS = b
151
+ logger.info("Protect series in models set to: %s.", b)
152
+
153
+ def pastas_validation_status(self, validate):
154
+ """Whether to validate time series.
155
+
156
+ Parameters
157
+ ----------
158
+ validate : bool, NoneType
159
+ value of validate keyword argument
160
+
161
+ Returns
162
+ -------
163
+ b : bool
164
+ return global or local setting (True or False)
165
+ """
166
+ if validate is None:
167
+ return self.USE_PASTAS_VALIDATE_SERIES
168
+ else:
169
+ return validate
170
+
171
+ @staticmethod
172
+ def check_filename_illegal_chars(libname: PastasLibs, name: str) -> str:
173
+ """Check filename for invalid characters (internal method).
174
+
175
+ Parameters
176
+ ----------
177
+ libname : str
178
+ library name
179
+ name : str
180
+ name of the item
181
+
182
+ Returns
183
+ -------
184
+ str
185
+ validated name
186
+ """
187
+ # check name for invalid characters in name
188
+ new_name = validate_names(name, deletechars=r"\/" + os.sep, replace_space=None)
189
+ if new_name != name:
190
+ warning = (
191
+ f"{libname} name '{name}' contained invalid characters "
192
+ f"and was changed to '{new_name}'"
193
+ )
194
+ logger.warning(warning)
195
+ name = new_name
196
+ return name
197
+
198
+ @staticmethod
199
+ def validate_input_series(series):
200
+ """Check if series is pandas.DataFrame or pandas.Series.
201
+
202
+ Parameters
203
+ ----------
204
+ series : object
205
+ object to validate
206
+
207
+ Raises
208
+ ------
209
+ TypeError
210
+ if object is not of type pandas.DataFrame or pandas.Series
211
+ """
212
+ if not (isinstance(series, pd.DataFrame) or isinstance(series, pd.Series)):
213
+ raise TypeError("Please provide pandas.DataFrame or pandas.Series!")
214
+ if isinstance(series, pd.DataFrame):
215
+ if series.columns.size > 1:
216
+ raise ValueError("Only DataFrames with one column are supported!")
217
+
218
+ @staticmethod
219
+ def set_series_name(series, name):
220
+ """Set series name to match user defined name in store.
221
+
222
+ Parameters
223
+ ----------
224
+ series : pandas.Series or pandas.DataFrame
225
+ set name for this time series
226
+ name : str
227
+ name of the time series (used in the pastastore)
228
+ """
229
+ if isinstance(series, pd.Series):
230
+ series.name = name
231
+ # empty string on index name causes trouble when reading
232
+ # data from ArcticDB: TODO: check if still an issue?
233
+ if series.index.name == "":
234
+ series.index.name = None
235
+
236
+ if isinstance(series, pd.DataFrame):
237
+ series.columns = [name]
238
+ # check for hydropandas objects which are instances of DataFrame but
239
+ # do have a name attribute
240
+ if hasattr(series, "name"):
241
+ series.name = name
242
+ return series
243
+
244
+ @staticmethod
245
+ def check_stressmodels_supported(ml):
246
+ """Check if all stressmodels in the model are supported."""
247
+ supported_stressmodels = [
248
+ "StressModel",
249
+ "StressModel2",
250
+ "RechargeModel",
251
+ "WellModel",
252
+ "TarsoModel",
253
+ "Constant",
254
+ "LinearTrend",
255
+ "StepModel",
256
+ ]
257
+ if isinstance(ml, ps.Model):
258
+ # Use type().__name__ instead of protected _name attribute
259
+ smtyps = [type(sm).__name__ for sm in ml.stressmodels.values()]
260
+ elif isinstance(ml, dict):
261
+ classkey = "class"
262
+ smtyps = [sm[classkey] for sm in ml["stressmodels"].values()]
263
+ else:
264
+ raise TypeError("Expected pastas.Model or dict!")
265
+ check = set(smtyps).issubset(supported_stressmodels)
266
+ if not check:
267
+ unsupported = set(smtyps) - set(supported_stressmodels)
268
+ raise NotImplementedError(
269
+ "PastaStore does not support storing models with the "
270
+ f"following stressmodels: {unsupported}"
271
+ )
272
+
273
+ @staticmethod
274
+ def check_model_series_names_duplicates(ml):
275
+ """Check for duplicate series names in the model."""
276
+ prec_evap_model = ["RechargeModel", "TarsoModel"]
277
+
278
+ if isinstance(ml, ps.Model):
279
+ series_names = [
280
+ istress.series.name
281
+ for sm in ml.stressmodels.values()
282
+ for istress in sm.stress
283
+ ]
284
+
285
+ elif isinstance(ml, dict):
286
+ # non RechargeModel, Tarsomodel, WellModel stressmodels
287
+ classkey = "class"
288
+ series_names = [
289
+ sm["stress"]["name"]
290
+ for sm in ml["stressmodels"].values()
291
+ if sm[classkey] not in (prec_evap_model + ["WellModel"])
292
+ ]
293
+
294
+ # WellModel
295
+ if isin(
296
+ ["WellModel"],
297
+ [i[classkey] for i in ml["stressmodels"].values()],
298
+ ).any():
299
+ series_names += [
300
+ istress["name"]
301
+ for sm in ml["stressmodels"].values()
302
+ if sm[classkey] in ["WellModel"]
303
+ for istress in sm["stress"]
304
+ ]
305
+
306
+ # RechargeModel, TarsoModel
307
+ if isin(
308
+ prec_evap_model,
309
+ [i[classkey] for i in ml["stressmodels"].values()],
310
+ ).any():
311
+ series_names += [
312
+ istress["name"]
313
+ for sm in ml["stressmodels"].values()
314
+ if sm[classkey] in prec_evap_model
315
+ for istress in [sm["prec"], sm["evap"]]
316
+ ]
317
+
318
+ else:
319
+ raise TypeError("Expected pastas.Model or dict!")
320
+ if len(series_names) - len(set(series_names)) > 0:
321
+ msg = (
322
+ "There are multiple stresses series with the same name! "
323
+ "Each series name must be unique for the PastaStore!"
324
+ )
325
+ raise ValueError(msg)
326
+
327
+ def check_oseries_in_store(self, ml: Union[ps.Model, dict]):
328
+ """Check if Model oseries are contained in PastaStore (internal method).
329
+
330
+ Parameters
331
+ ----------
332
+ ml : Union[ps.Model, dict]
333
+ pastas Model
334
+ """
335
+ if isinstance(ml, ps.Model):
336
+ name = ml.oseries.name
337
+ elif isinstance(ml, dict):
338
+ name = str(ml["oseries"]["name"])
339
+ else:
340
+ raise TypeError("Expected pastas.Model or dict!")
341
+ if name not in self.connector.oseries.index:
342
+ msg = (
343
+ f"Cannot add model because oseries '{name}' is not contained in store."
344
+ )
345
+ raise LookupError(msg)
346
+ # expensive check
347
+ if self.CHECK_MODEL_SERIES_VALUES and isinstance(ml, ps.Model):
348
+ s_org = self.connector.get_oseries(name).squeeze().dropna()
349
+ # Access to _series_original is necessary for validation with Pastas models
350
+ so = ml.oseries._series_original # noqa: SLF001
351
+ try:
352
+ assert_series_equal(
353
+ so.dropna(),
354
+ s_org,
355
+ atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
356
+ rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
357
+ )
358
+ except AssertionError as e:
359
+ raise ValueError(
360
+ f"Cannot add model because model oseries '{name}'"
361
+ " is different from stored oseries! See stacktrace for differences."
362
+ ) from e
363
+
364
+ def check_stresses_in_store(self, ml: Union[ps.Model, dict]):
365
+ """Check if stresses time series are contained in PastaStore (internal method).
366
+
367
+ Parameters
368
+ ----------
369
+ ml : Union[ps.Model, dict]
370
+ pastas Model
371
+ """
372
+ prec_evap_model = ["RechargeModel", "TarsoModel"]
373
+ if isinstance(ml, ps.Model):
374
+ for sm in ml.stressmodels.values():
375
+ # Check class name using type instead of protected _name attribute
376
+ if type(sm).__name__ in prec_evap_model:
377
+ stresses = [sm.prec, sm.evap]
378
+ else:
379
+ stresses = sm.stress
380
+ for s in stresses:
381
+ if str(s.name) not in self.connector.stresses.index:
382
+ msg = (
383
+ f"Cannot add model because stress '{s.name}' "
384
+ "is not contained in store."
385
+ )
386
+ raise LookupError(msg)
387
+ if self.CHECK_MODEL_SERIES_VALUES:
388
+ s_org = self.connector.get_stresses(s.name).squeeze()
389
+ # Access to _series_original needed for Pastas validation
390
+ so = s._series_original # noqa: SLF001
391
+ try:
392
+ assert_series_equal(
393
+ so,
394
+ s_org,
395
+ atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
396
+ rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
397
+ )
398
+ except AssertionError as e:
399
+ raise ValueError(
400
+ f"Cannot add model because model stress "
401
+ f"'{s.name}' is different from stored stress! "
402
+ "See stacktrace for differences."
403
+ ) from e
404
+ elif isinstance(ml, dict):
405
+ for sm in ml["stressmodels"].values():
406
+ classkey = "class"
407
+ if sm[classkey] in prec_evap_model:
408
+ stresses = [sm["prec"], sm["evap"]]
409
+ elif sm[classkey] in ["WellModel"]:
410
+ stresses = sm["stress"]
411
+ else:
412
+ stresses = [sm["stress"]]
413
+ for s in stresses:
414
+ if str(s["name"]) not in self.connector.stresses.index:
415
+ msg = (
416
+ f"Cannot add model because stress '{s['name']}' "
417
+ "is not contained in store."
418
+ )
419
+ raise LookupError(msg)
420
+ else:
421
+ raise TypeError("Expected pastas.Model or dict!")
422
+
423
+ def check_config_connector_type(self, path: str) -> None:
424
+ """Check if config file connector type matches connector instance.
425
+
426
+ Parameters
427
+ ----------
428
+ path : str
429
+ path to directory containing the pastastore config file
430
+ """
431
+ if path.exists() and path.is_dir():
432
+ config_file = list(path.glob("*.pastastore"))
433
+ if len(config_file) > 0:
434
+ with config_file[0].open("r", encoding="utf-8") as f:
435
+ cfg = json.load(f)
436
+ stored_connector_type = cfg.pop("connector_type")
437
+ if stored_connector_type != self.connector.conn_type:
438
+ # NOTE: delete _arctic_cfg that is created on ArcticDB init
439
+ if self.connector.conn_type == "arcticdb":
440
+ shutil.rmtree(path.parent / "_arctic_cfg")
441
+ raise ValueError(
442
+ f"Directory '{self.connector.name}/' in use by another "
443
+ f"connector type! Either create a '{stored_connector_type}' "
444
+ "connector to load the current pastastore or change the "
445
+ f"directory name to create a new '{self.connector.conn_type}' "
446
+ "connector."
447
+ )
448
+
449
+ def check_series_in_models(self, libname, name):
450
+ """Check if time series is used in any model (internal method).
451
+
452
+ Parameters
453
+ ----------
454
+ libname : str
455
+ library name ('oseries' or 'stresses')
456
+ name : str
457
+ name of the time series
458
+ """
459
+ msg = (
460
+ "{libname} '{name}' is used in {n_models} model(s)! Either "
461
+ "delete model(s) first, or use force=True."
462
+ )
463
+ if libname == "oseries":
464
+ if name in self.connector.oseries_models:
465
+ n_models = len(self.connector.oseries_models[name])
466
+ raise SeriesUsedByModel(
467
+ msg.format(libname=libname, name=name, n_models=n_models)
468
+ )
469
+ elif libname == "stresses":
470
+ if name in self.connector.stresses_models:
471
+ n_models = len(self.connector.stresses_models[name])
472
+ raise SeriesUsedByModel(
473
+ msg.format(libname=libname, name=name, n_models=n_models)
474
+ )
pastastore/version.py CHANGED
@@ -6,10 +6,9 @@ import pastas as ps
6
6
  from packaging.version import parse as parse_version
7
7
 
8
8
  PASTAS_VERSION = parse_version(ps.__version__)
9
- PASTAS_LEQ_022 = PASTAS_VERSION <= parse_version("0.22.0")
10
9
  PASTAS_GEQ_150 = PASTAS_VERSION >= parse_version("1.5.0")
11
10
 
12
- __version__ = "1.10.2"
11
+ __version__ = "1.11.0"
13
12
 
14
13
 
15
14
  def show_versions(optional=False) -> None: