pastastore 1.10.2__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,524 @@
1
+ """Module containing Validator class for checking input data for connectors."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import warnings
8
+
9
+ # import weakref
10
+ from typing import TYPE_CHECKING, Union
11
+
12
+ import pandas as pd
13
+ import pastas as ps
14
+ from numpy import isin
15
+ from pandas.testing import assert_series_equal
16
+
17
+ from pastastore.typing import PastasLibs
18
+ from pastastore.util import SeriesUsedByModel, _custom_warning, validate_names
19
+
20
+ if TYPE_CHECKING:
21
+ from pastastore.base import BaseConnector
22
+
23
+ FrameorSeriesUnion = Union[pd.DataFrame, pd.Series]
24
+ warnings.showwarning = _custom_warning
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class Validator:
30
+ """Validator class for checking input data and model consistency.
31
+
32
+ This class provides validation methods for time series data, models,
33
+ and metadata to ensure data integrity when storing in PastaStore
34
+ connectors.
35
+
36
+ Parameters
37
+ ----------
38
+ connector : BaseConnector
39
+ The connector instance used for validation.
40
+
41
+ Attributes
42
+ ----------
43
+ CHECK_MODEL_SERIES_VALUES : bool
44
+ Whether to check model time series contents against stored copies.
45
+ USE_PASTAS_VALIDATE_SERIES : bool
46
+ Whether to validate time series according to pastas rules.
47
+ SERIES_EQUALITY_ABSOLUTE_TOLERANCE : float
48
+ Absolute tolerance for series equality comparison.
49
+ SERIES_EQUALITY_RELATIVE_TOLERANCE : float
50
+ Relative tolerance for series equality comparison.
51
+ """
52
+
53
+ # whether to check model time series contents against stored copies
54
+ CHECK_MODEL_SERIES_VALUES = True
55
+
56
+ # whether to validate time series according to pastas rules
57
+ USE_PASTAS_VALIDATE_SERIES = True
58
+
59
+ # whether to validate metadata keys
60
+ VALIDATE_METADATA = True
61
+
62
+ # protect series in models from being deleted or modified
63
+ PROTECT_SERIES_IN_MODELS = True
64
+
65
+ # set series equality comparison settings (using assert_series_equal)
66
+ SERIES_EQUALITY_ABSOLUTE_TOLERANCE = 1e-10
67
+ SERIES_EQUALITY_RELATIVE_TOLERANCE = 0.0
68
+
69
+ def __init__(self, connector: "BaseConnector"):
70
+ """Initialize Validator with connector reference.
71
+
72
+ Parameters
73
+ ----------
74
+ connector : BaseConnector
75
+ The connector instance to validate against.
76
+ """
77
+ self.connector = connector
78
+
79
+ @property
80
+ def settings(self):
81
+ """Return current connector settings as dictionary."""
82
+ return {
83
+ "CHECK_MODEL_SERIES_VALUES": self.CHECK_MODEL_SERIES_VALUES,
84
+ "USE_PASTAS_VALIDATE_SERIES": self.USE_PASTAS_VALIDATE_SERIES,
85
+ "VALIDATE_METADATA": self.VALIDATE_METADATA,
86
+ "PROTECT_SERIES_IN_MODELS": self.PROTECT_SERIES_IN_MODELS,
87
+ "SERIES_EQUALITY_ABSOLUTE_TOLERANCE": (
88
+ self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE
89
+ ),
90
+ "SERIES_EQUALITY_RELATIVE_TOLERANCE": (
91
+ self.SERIES_EQUALITY_RELATIVE_TOLERANCE
92
+ ),
93
+ }
94
+
95
+ def set_check_model_series_values(self, b: bool):
96
+ """Turn CHECK_MODEL_SERIES_VALUES option on (True) or off (False).
97
+
98
+ The default option is on (it is highly recommended to keep it that
99
+ way). When turned on, the model time series
100
+ (ml.oseries._series_original, and stressmodel.stress._series_original)
101
+ values are checked against the stored copies in the database. If these
102
+ do not match, an error is raised, and the model is not added to the
103
+ database. This guarantees the stored model will be identical after
104
+ loading from the database. This check is somewhat computationally
105
+ expensive, which is why it can be turned on or off.
106
+
107
+ Parameters
108
+ ----------
109
+ b : bool
110
+ boolean indicating whether option should be turned on (True) or
111
+ off (False). Option is on by default.
112
+ """
113
+ self.CHECK_MODEL_SERIES_VALUES = b
114
+ logger.info("Model time series checking set to: %s.", b)
115
+
116
+ def set_use_pastas_validate_series(self, b: bool):
117
+ """Turn USE_PASTAS_VALIDATE_SERIES option on (True) or off (False).
118
+
119
+ This will use pastas.validate_oseries() or pastas.validate_stresses()
120
+ to test the time series. If they do not meet the criteria, an error is
121
+ raised. Turning this option off will allow the user to store any time
122
+ series but this will mean that time series models cannot be made from
123
+ stored time series directly and will have to be modified before
124
+ building the models. This in turn will mean that storing the models
125
+ will not work as the stored time series copy is checked against the
126
+ time series in the model to check if they are equal.
127
+
128
+ Note: this option requires pastas>=0.23.0, otherwise it is turned off.
129
+
130
+ Parameters
131
+ ----------
132
+ b : bool
133
+ boolean indicating whether option should be turned on (True) or
134
+ off (False). Option is on by default.
135
+ """
136
+ self.USE_PASTAS_VALIDATE_SERIES = b
137
+ logger.info("Pastas time series validation set to: %s.", b)
138
+
139
+ def set_validate_metadata(self, b: bool):
140
+ """Turn VALIDATE_METADATA option on (True) or off (False).
141
+
142
+ Parameters
143
+ ----------
144
+ b : bool
145
+ boolean indicating whether option should be turned on (True) or
146
+ off (False). Option is on by default.
147
+ """
148
+ self.VALIDATE_METADATA = b
149
+ logger.info("Metadata validation set to: %s.", b)
150
+
151
+ def validate_metadata(self, metadata: dict):
152
+ """Validate metadata.
153
+
154
+ Checks if metadata keys 'tmin', 'tmax', 'date_modified', and
155
+ 'date_created' are valid Pandas Timestamps (or convertible to them)
156
+ or None.
157
+
158
+ Parameters
159
+ ----------
160
+ metadata : dict
161
+ metadata dictionary
162
+ """
163
+ if not self.VALIDATE_METADATA or metadata is None:
164
+ return
165
+
166
+ for key in ["tmin", "tmax", "date_modified", "date_created"]:
167
+ if key in metadata:
168
+ val = metadata[key]
169
+ if val is None:
170
+ continue
171
+ if isinstance(val, bool):
172
+ raise ValueError(
173
+ f"Metadata key '{key}' has boolean value {val}, "
174
+ "expected Timestamp, None, or convertible string."
175
+ )
176
+ try:
177
+ pd.Timestamp(val)
178
+ except (ValueError, TypeError) as e:
179
+ raise ValueError(
180
+ f"Metadata key '{key}' has value {val} which is "
181
+ f"not convertible to Timestamp: {e}"
182
+ ) from e
183
+
184
+ def set_protect_series_in_models(self, b: bool):
185
+ """Turn PROTECT_SERIES_IN_MODELS option on (True) or off (False).
186
+
187
+ The default option is on. When turned on, deleting a time series that
188
+ is used in a model will raise an error. This prevents models from
189
+ breaking because a required time series has been deleted. If you really
190
+ want to delete such a time series, use the force=True option in
191
+ del_oseries() or del_stress().
192
+
193
+ Parameters
194
+ ----------
195
+ b : bool
196
+ boolean indicating whether option should be turned on (True) or
197
+ off (False). Option is on by default.
198
+ """
199
+ self.PROTECT_SERIES_IN_MODELS = b
200
+ logger.info("Protect series in models set to: %s.", b)
201
+
202
+ def pastas_validation_status(self, validate):
203
+ """Whether to validate time series.
204
+
205
+ Parameters
206
+ ----------
207
+ validate : bool, NoneType
208
+ value of validate keyword argument
209
+
210
+ Returns
211
+ -------
212
+ b : bool
213
+ return global or local setting (True or False)
214
+ """
215
+ if validate is None:
216
+ return self.USE_PASTAS_VALIDATE_SERIES
217
+ else:
218
+ return validate
219
+
220
+ @staticmethod
221
+ def check_filename_illegal_chars(libname: PastasLibs, name: str) -> str:
222
+ """Check filename for invalid characters (internal method).
223
+
224
+ Parameters
225
+ ----------
226
+ libname : str
227
+ library name
228
+ name : str
229
+ name of the item
230
+
231
+ Returns
232
+ -------
233
+ str
234
+ validated name
235
+ """
236
+ # check name for invalid characters in name
237
+ new_name = validate_names(name, deletechars=r"\/" + os.sep, replace_space=None)
238
+ if new_name != name:
239
+ warning = (
240
+ f"{libname} name '{name}' contained invalid characters "
241
+ f"and was changed to '{new_name}'"
242
+ )
243
+ logger.warning(warning)
244
+ name = new_name
245
+ return name
246
+
247
+ @staticmethod
248
+ def validate_input_series(series):
249
+ """Check if series is pandas.DataFrame or pandas.Series.
250
+
251
+ Parameters
252
+ ----------
253
+ series : object
254
+ object to validate
255
+
256
+ Raises
257
+ ------
258
+ TypeError
259
+ if object is not of type pandas.DataFrame or pandas.Series
260
+ """
261
+ if not (isinstance(series, pd.DataFrame) or isinstance(series, pd.Series)):
262
+ raise TypeError("Please provide pandas.DataFrame or pandas.Series!")
263
+ if isinstance(series, pd.DataFrame):
264
+ if series.columns.size > 1:
265
+ raise ValueError("Only DataFrames with one column are supported!")
266
+
267
+ @staticmethod
268
+ def set_series_name(series, name):
269
+ """Set series name to match user defined name in store.
270
+
271
+ Parameters
272
+ ----------
273
+ series : pandas.Series or pandas.DataFrame
274
+ set name for this time series
275
+ name : str
276
+ name of the time series (used in the pastastore)
277
+ """
278
+ if isinstance(series, pd.Series):
279
+ series.name = name
280
+ # empty string on index name causes trouble when reading
281
+ # data from ArcticDB: TODO: check if still an issue?
282
+ if series.index.name == "":
283
+ series.index.name = None
284
+
285
+ if isinstance(series, pd.DataFrame):
286
+ series.columns = [name]
287
+ # check for hydropandas objects which are instances of DataFrame but
288
+ # do have a name attribute
289
+ if hasattr(series, "name"):
290
+ series.name = name
291
+ return series
292
+
293
+ @staticmethod
294
+ def check_stressmodels_supported(ml):
295
+ """Check if all stressmodels in the model are supported."""
296
+ supported_stressmodels = [
297
+ "StressModel",
298
+ "RechargeModel",
299
+ "WellModel",
300
+ "TarsoModel",
301
+ "Constant",
302
+ "LinearTrend",
303
+ "StepModel",
304
+ ]
305
+ if isinstance(ml, ps.Model):
306
+ # Use type().__name__ instead of protected _name attribute
307
+ smtyps = [type(sm).__name__ for sm in ml.stressmodels.values()]
308
+ elif isinstance(ml, dict):
309
+ classkey = "class"
310
+ smtyps = [sm[classkey] for sm in ml["stressmodels"].values()]
311
+ else:
312
+ raise TypeError("Expected pastas.Model or dict!")
313
+ check = set(smtyps).issubset(supported_stressmodels)
314
+ if not check:
315
+ unsupported = set(smtyps) - set(supported_stressmodels)
316
+ raise NotImplementedError(
317
+ "PastaStore does not support storing models with the "
318
+ f"following stressmodels: {unsupported}"
319
+ )
320
+
321
+ @staticmethod
322
+ def check_model_series_names_duplicates(ml):
323
+ """Check for duplicate series names in the model."""
324
+ prec_evap_model = ["RechargeModel", "TarsoModel"]
325
+
326
+ if isinstance(ml, ps.Model):
327
+ series_names = [
328
+ istress.series.name
329
+ for sm in ml.stressmodels.values()
330
+ for istress in sm.stress
331
+ ]
332
+
333
+ elif isinstance(ml, dict):
334
+ # non RechargeModel, Tarsomodel, WellModel stressmodels
335
+ classkey = "class"
336
+ series_names = [
337
+ sm["stress"]["name"]
338
+ for sm in ml["stressmodels"].values()
339
+ if sm[classkey] not in (prec_evap_model + ["WellModel"])
340
+ ]
341
+
342
+ # WellModel
343
+ if isin(
344
+ ["WellModel"],
345
+ [i[classkey] for i in ml["stressmodels"].values()],
346
+ ).any():
347
+ series_names += [
348
+ istress["name"]
349
+ for sm in ml["stressmodels"].values()
350
+ if sm[classkey] in ["WellModel"]
351
+ for istress in sm["stress"]
352
+ ]
353
+
354
+ # RechargeModel, TarsoModel
355
+ if isin(
356
+ prec_evap_model,
357
+ [i[classkey] for i in ml["stressmodels"].values()],
358
+ ).any():
359
+ series_names += [
360
+ istress["name"]
361
+ for sm in ml["stressmodels"].values()
362
+ if sm[classkey] in prec_evap_model
363
+ for istress in [sm["prec"], sm["evap"]]
364
+ ]
365
+
366
+ else:
367
+ raise TypeError("Expected pastas.Model or dict!")
368
+ if len(series_names) - len(set(series_names)) > 0:
369
+ msg = (
370
+ "There are multiple stresses series with the same name! "
371
+ "Each series name must be unique for the PastaStore!"
372
+ )
373
+ raise ValueError(msg)
374
+
375
+ def check_oseries_in_store(self, ml: Union[ps.Model, dict]):
376
+ """Check if Model oseries are contained in PastaStore (internal method).
377
+
378
+ Parameters
379
+ ----------
380
+ ml : Union[ps.Model, dict]
381
+ pastas Model
382
+ """
383
+ if isinstance(ml, ps.Model):
384
+ name = ml.oseries.name
385
+ elif isinstance(ml, dict):
386
+ name = str(ml["oseries"]["name"])
387
+ else:
388
+ raise TypeError("Expected pastas.Model or dict!")
389
+ if name not in self.connector.oseries.index:
390
+ msg = (
391
+ f"Cannot add model because oseries '{name}' is not contained in store."
392
+ )
393
+ raise LookupError(msg)
394
+ # expensive check
395
+ if self.CHECK_MODEL_SERIES_VALUES and isinstance(ml, ps.Model):
396
+ s_org = self.connector.get_oseries(name).squeeze().dropna()
397
+ # Access to _series_original is necessary for validation with Pastas models
398
+ so = ml.oseries._series_original # noqa: SLF001
399
+ try:
400
+ assert_series_equal(
401
+ so.dropna(),
402
+ s_org,
403
+ atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
404
+ rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
405
+ )
406
+ except AssertionError as e:
407
+ raise ValueError(
408
+ f"Cannot add model because model oseries '{name}'"
409
+ " is different from stored oseries! See stacktrace for differences."
410
+ ) from e
411
+
412
+ def check_stresses_in_store(self, ml: Union[ps.Model, dict]):
413
+ """Check if stresses time series are contained in PastaStore (internal method).
414
+
415
+ Parameters
416
+ ----------
417
+ ml : Union[ps.Model, dict]
418
+ pastas Model
419
+ """
420
+ prec_evap_model = ["RechargeModel", "TarsoModel"]
421
+ if isinstance(ml, ps.Model):
422
+ for sm in ml.stressmodels.values():
423
+ # Check class name using type instead of protected _name attribute
424
+ if type(sm).__name__ in prec_evap_model:
425
+ stresses = [sm.prec, sm.evap]
426
+ else:
427
+ stresses = sm.stress
428
+ for s in stresses:
429
+ if str(s.name) not in self.connector.stresses.index:
430
+ msg = (
431
+ f"Cannot add model because stress '{s.name}' "
432
+ "is not contained in store."
433
+ )
434
+ raise LookupError(msg)
435
+ if self.CHECK_MODEL_SERIES_VALUES:
436
+ s_org = self.connector.get_stresses(s.name).squeeze()
437
+ # Access to _series_original needed for Pastas validation
438
+ so = s._series_original # noqa: SLF001
439
+ try:
440
+ assert_series_equal(
441
+ so,
442
+ s_org,
443
+ atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
444
+ rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
445
+ )
446
+ except AssertionError as e:
447
+ raise ValueError(
448
+ f"Cannot add model because model stress "
449
+ f"'{s.name}' is different from stored stress! "
450
+ "See stacktrace for differences."
451
+ ) from e
452
+ elif isinstance(ml, dict):
453
+ for sm in ml["stressmodels"].values():
454
+ classkey = "class"
455
+ if sm[classkey] in prec_evap_model:
456
+ stresses = [sm["prec"], sm["evap"]]
457
+ elif sm[classkey] in ["WellModel"]:
458
+ stresses = sm["stress"]
459
+ else:
460
+ stresses = [sm["stress"]]
461
+ for s in stresses:
462
+ if str(s["name"]) not in self.connector.stresses.index:
463
+ msg = (
464
+ f"Cannot add model because stress '{s['name']}' "
465
+ "is not contained in store."
466
+ )
467
+ raise LookupError(msg)
468
+ else:
469
+ raise TypeError("Expected pastas.Model or dict!")
470
+
471
+ def check_config_connector_type(self, path: str) -> None:
472
+ """Check if config file connector type matches connector instance.
473
+
474
+ Parameters
475
+ ----------
476
+ path : str
477
+ path to directory containing the pastastore config file
478
+ """
479
+ if path.exists() and path.is_dir():
480
+ config_file = list(path.glob("*.pastastore"))
481
+ if len(config_file) > 0:
482
+ with config_file[0].open("r", encoding="utf-8") as f:
483
+ cfg = json.load(f)
484
+ stored_connector_type = cfg.pop("connector_type")
485
+ if stored_connector_type != self.connector.conn_type:
486
+ # NOTE: delete _arctic_cfg that is created on ArcticDB init
487
+ if self.connector.conn_type == "arcticdb":
488
+ shutil.rmtree(path.parent / "_arctic_cfg")
489
+ raise ValueError(
490
+ f"Directory '{self.connector.name}/' in use by another "
491
+ f"connector type! Either create a '{stored_connector_type}' "
492
+ "connector to load the current pastastore or change the "
493
+ f"directory name to create a new '{self.connector.conn_type}' "
494
+ "connector."
495
+ )
496
+
497
+ def check_series_in_models(self, libname, name):
498
+ """Check if time series is used in any model (internal method).
499
+
500
+ Parameters
501
+ ----------
502
+ libname : str
503
+ library name ('oseries' or 'stresses')
504
+ name : str
505
+ name of the time series
506
+ """
507
+ msg = (
508
+ "{libname} '{name}' is used in {n_models} model(s)! Either "
509
+ "delete model(s) first, or use force=True."
510
+ )
511
+ if libname == "oseries":
512
+ self.connector._trigger_links_update_if_needed() # trigger update if needed
513
+ if self.connector._item_exists("oseries_models", name):
514
+ n_models = len(self.connector.oseries_models[name])
515
+ raise SeriesUsedByModel(
516
+ msg.format(libname=libname, name=name, n_models=n_models)
517
+ )
518
+ elif libname == "stresses":
519
+ self.connector._trigger_links_update_if_needed() # trigger update if needed
520
+ if self.connector._item_exists("stresses_models", name):
521
+ n_models = len(self.connector.stresses_models[name])
522
+ raise SeriesUsedByModel(
523
+ msg.format(libname=libname, name=name, n_models=n_models)
524
+ )
pastastore/version.py CHANGED
@@ -6,10 +6,9 @@ import pastas as ps
6
6
  from packaging.version import parse as parse_version
7
7
 
8
8
  PASTAS_VERSION = parse_version(ps.__version__)
9
- PASTAS_LEQ_022 = PASTAS_VERSION <= parse_version("0.22.0")
10
9
  PASTAS_GEQ_150 = PASTAS_VERSION >= parse_version("1.5.0")
11
10
 
12
- __version__ = "1.10.2"
11
+ __version__ = "1.12.0"
13
12
 
14
13
 
15
14
  def show_versions(optional=False) -> None:
@@ -32,7 +31,7 @@ def show_versions(optional=False) -> None:
32
31
  msg += "\nArcticDB version : "
33
32
  try:
34
33
  import_module("arcticdb")
35
- msg += f"{metadata.version('arctidb')}"
34
+ msg += f"{metadata.version('arcticdb')}"
36
35
  except ImportError:
37
36
  msg += "Not Installed"
38
37