pastastore 1.10.2__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pastastore/connectors.py CHANGED
@@ -4,650 +4,40 @@ import json
4
4
  import logging
5
5
  import os
6
6
  import warnings
7
- from collections.abc import Iterable
8
7
  from concurrent.futures import ProcessPoolExecutor
9
8
  from copy import deepcopy
10
9
  from functools import partial
10
+ from multiprocessing import Manager
11
+ from pathlib import Path
11
12
 
12
13
  # import weakref
13
14
  from typing import Callable, Dict, List, Optional, Tuple, Union
14
15
 
15
16
  import pandas as pd
16
- import pastas as ps
17
- from numpy import isin
18
- from packaging.version import parse as parse_version
19
- from pandas.testing import assert_series_equal
20
17
  from pastas.io.pas import PastasEncoder, pastas_hook
21
18
  from tqdm.auto import tqdm
22
19
  from tqdm.contrib.concurrent import process_map
23
20
 
24
21
  from pastastore.base import BaseConnector, ModelAccessor
25
- from pastastore.util import _custom_warning
26
- from pastastore.version import PASTAS_LEQ_022
22
+ from pastastore.typing import AllLibs, FrameOrSeriesUnion, TimeSeriesLibs
23
+ from pastastore.util import _custom_warning, metadata_from_json, series_from_json
24
+ from pastastore.validator import Validator
27
25
 
28
- FrameorSeriesUnion = Union[pd.DataFrame, pd.Series]
29
26
  warnings.showwarning = _custom_warning
30
27
 
31
28
  logger = logging.getLogger(__name__)
32
29
 
30
+ # Global connector for multiprocessing workaround
31
+ # This is required for connectors (like ArcticDBConnector) that cannot be pickled.
32
+ # The initializer function in _parallel() sets this global variable in each worker
33
+ # process, allowing unpicklable connectors to be used with multiprocessing.
34
+ # See: https://docs.python.org/3/library/concurrent.futures.html#processpoolexecutor
35
+ # Note: Using simple None type to avoid circular import issues
36
+ conn = None
33
37
 
34
- class ConnectorUtil:
35
- """Mix-in class for general Connector helper functions.
36
38
 
37
- Only for internal methods, and not methods that are related to CRUD operations on
38
- database.
39
- """
40
-
41
- def _parse_names(
42
- self,
43
- names: Optional[Union[list, str]] = None,
44
- libname: Optional[str] = "oseries",
45
- ) -> list:
46
- """Parse names kwarg, returns iterable with name(s) (internal method).
47
-
48
- Parameters
49
- ----------
50
- names : Union[list, str], optional
51
- str or list of str or None or 'all' (last two options
52
- retrieves all names)
53
- libname : str, optional
54
- name of library, default is 'oseries'
55
-
56
- Returns
57
- -------
58
- list
59
- list of names
60
- """
61
- if not isinstance(names, str) and isinstance(names, Iterable):
62
- return names
63
- elif isinstance(names, str) and names != "all":
64
- return [names]
65
- elif names is None or names == "all":
66
- if libname == "oseries":
67
- return self.oseries_names
68
- elif libname == "stresses":
69
- return self.stresses_names
70
- elif libname == "models":
71
- return self.model_names
72
- elif libname == "oseries_models":
73
- return self.oseries_with_models
74
- else:
75
- raise ValueError(f"No library '{libname}'!")
76
- else:
77
- raise NotImplementedError(f"Cannot parse 'names': {names}")
78
-
79
- @staticmethod
80
- def _meta_list_to_frame(metalist: list, names: list):
81
- """Convert list of metadata dictionaries to DataFrame.
82
-
83
- Parameters
84
- ----------
85
- metalist : list
86
- list of metadata dictionaries
87
- names : list
88
- list of names corresponding to data in metalist
89
-
90
- Returns
91
- -------
92
- pandas.DataFrame
93
- DataFrame containing overview of metadata
94
- """
95
- # convert to dataframe
96
- if len(metalist) > 1:
97
- meta = pd.DataFrame(metalist)
98
- if len({"x", "y"}.difference(meta.columns)) == 0:
99
- meta["x"] = meta["x"].astype(float)
100
- meta["y"] = meta["y"].astype(float)
101
- elif len(metalist) == 1:
102
- meta = pd.DataFrame(metalist)
103
- elif len(metalist) == 0:
104
- meta = pd.DataFrame()
105
-
106
- meta.index = names
107
- meta.index.name = "name"
108
- return meta
109
-
110
- def _parse_model_dict(self, mdict: dict, update_ts_settings: bool = False):
111
- """Parse dictionary describing pastas models (internal method).
112
-
113
- Parameters
114
- ----------
115
- mdict : dict
116
- dictionary describing pastas.Model
117
- update_ts_settings : bool, optional
118
- update stored tmin and tmax in time series settings
119
- based on time series loaded from store.
120
-
121
- Returns
122
- -------
123
- ml : pastas.Model
124
- time series analysis model
125
- """
126
- PASFILE_LEQ_022 = parse_version(
127
- mdict["file_info"]["pastas_version"]
128
- ) <= parse_version("0.22.0")
129
-
130
- # oseries
131
- if "series" not in mdict["oseries"]:
132
- name = str(mdict["oseries"]["name"])
133
- if name not in self.oseries.index:
134
- msg = "oseries '{}' not present in library".format(name)
135
- raise LookupError(msg)
136
- mdict["oseries"]["series"] = self.get_oseries(name).squeeze()
137
- # update tmin/tmax from time series
138
- if update_ts_settings:
139
- mdict["oseries"]["settings"]["tmin"] = mdict["oseries"]["series"].index[
140
- 0
141
- ]
142
- mdict["oseries"]["settings"]["tmax"] = mdict["oseries"]["series"].index[
143
- -1
144
- ]
145
-
146
- # StressModel, WellModel
147
- for ts in mdict["stressmodels"].values():
148
- if "stress" in ts.keys():
149
- # WellModel
150
- classkey = "stressmodel" if PASFILE_LEQ_022 else "class"
151
- if ts[classkey] == "WellModel":
152
- for stress in ts["stress"]:
153
- if "series" not in stress:
154
- name = str(stress["name"])
155
- if name in self.stresses.index:
156
- stress["series"] = self.get_stresses(name).squeeze()
157
- # update tmin/tmax from time series
158
- if update_ts_settings:
159
- stress["settings"]["tmin"] = stress["series"].index[
160
- 0
161
- ]
162
- stress["settings"]["tmax"] = stress["series"].index[
163
- -1
164
- ]
165
- # StressModel
166
- else:
167
- for stress in ts["stress"] if PASFILE_LEQ_022 else [ts["stress"]]:
168
- if "series" not in stress:
169
- name = str(stress["name"])
170
- if name in self.stresses.index:
171
- stress["series"] = self.get_stresses(name).squeeze()
172
- # update tmin/tmax from time series
173
- if update_ts_settings:
174
- stress["settings"]["tmin"] = stress["series"].index[
175
- 0
176
- ]
177
- stress["settings"]["tmax"] = stress["series"].index[
178
- -1
179
- ]
180
-
181
- # RechargeModel, TarsoModel
182
- if ("prec" in ts.keys()) and ("evap" in ts.keys()):
183
- for stress in [ts["prec"], ts["evap"]]:
184
- if "series" not in stress:
185
- name = str(stress["name"])
186
- if name in self.stresses.index:
187
- stress["series"] = self.get_stresses(name).squeeze()
188
- # update tmin/tmax from time series
189
- if update_ts_settings:
190
- stress["settings"]["tmin"] = stress["series"].index[0]
191
- stress["settings"]["tmax"] = stress["series"].index[-1]
192
- else:
193
- msg = "stress '{}' not present in library".format(name)
194
- raise KeyError(msg)
195
-
196
- # hack for pcov w dtype object (when filled with NaNs on store?)
197
- if "fit" in mdict:
198
- if "pcov" in mdict["fit"]:
199
- pcov = mdict["fit"]["pcov"]
200
- if pcov.dtypes.apply(lambda dtyp: isinstance(dtyp, object)).any():
201
- mdict["fit"]["pcov"] = pcov.astype(float)
202
-
203
- # check pastas version vs pas-file version
204
- file_version = mdict["file_info"]["pastas_version"]
205
-
206
- # check file version and pastas version
207
- # if file<0.23 and pastas>=1.0 --> error
208
- PASTAS_GT_023 = parse_version(ps.__version__) > parse_version("0.23.1")
209
- if PASFILE_LEQ_022 and PASTAS_GT_023:
210
- raise UserWarning(
211
- f"This file was created with Pastas v{file_version} "
212
- f"and cannot be loaded with Pastas v{ps.__version__} Please load and "
213
- "save the file with Pastas 0.23 first to update the file "
214
- "format."
215
- )
216
-
217
- try:
218
- # pastas>=0.15.0
219
- ml = ps.io.base._load_model(mdict)
220
- except AttributeError:
221
- # pastas<0.15.0
222
- ml = ps.io.base.load_model(mdict)
223
- return ml
224
-
225
- @staticmethod
226
- def _validate_input_series(series):
227
- """Check if series is pandas.DataFrame or pandas.Series.
228
-
229
- Parameters
230
- ----------
231
- series : object
232
- object to validate
233
-
234
- Raises
235
- ------
236
- TypeError
237
- if object is not of type pandas.DataFrame or pandas.Series
238
- """
239
- if not (isinstance(series, pd.DataFrame) or isinstance(series, pd.Series)):
240
- raise TypeError("Please provide pandas.DataFrame or pandas.Series!")
241
- if isinstance(series, pd.DataFrame):
242
- if series.columns.size > 1:
243
- raise ValueError("Only DataFrames with one column are supported!")
244
-
245
- @staticmethod
246
- def _set_series_name(series, name):
247
- """Set series name to match user defined name in store.
248
-
249
- Parameters
250
- ----------
251
- series : pandas.Series or pandas.DataFrame
252
- set name for this time series
253
- name : str
254
- name of the time series (used in the pastastore)
255
- """
256
- if isinstance(series, pd.Series):
257
- series.name = name
258
- # empty string on index name causes trouble when reading
259
- # data from ArcticDB: TODO: check if still an issue?
260
- if series.index.name == "":
261
- series.index.name = None
262
-
263
- if isinstance(series, pd.DataFrame):
264
- series.columns = [name]
265
- # check for hydropandas objects which are instances of DataFrame but
266
- # do have a name attribute
267
- if hasattr(series, "name"):
268
- series.name = name
269
- return series
270
-
271
- @staticmethod
272
- def _check_stressmodels_supported(ml):
273
- supported_stressmodels = [
274
- "StressModel",
275
- "StressModel2",
276
- "RechargeModel",
277
- "WellModel",
278
- "TarsoModel",
279
- "Constant",
280
- "LinearTrend",
281
- "StepModel",
282
- ]
283
- if isinstance(ml, ps.Model):
284
- smtyps = [sm._name for sm in ml.stressmodels.values()]
285
- elif isinstance(ml, dict):
286
- classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
287
- smtyps = [sm[classkey] for sm in ml["stressmodels"].values()]
288
- check = isin(smtyps, supported_stressmodels)
289
- if not all(check):
290
- unsupported = set(smtyps) - set(supported_stressmodels)
291
- raise NotImplementedError(
292
- "PastaStore does not support storing models with the "
293
- f"following stressmodels: {unsupported}"
294
- )
295
-
296
- @staticmethod
297
- def _check_model_series_names_for_store(ml):
298
- prec_evap_model = ["RechargeModel", "TarsoModel"]
299
-
300
- if isinstance(ml, ps.Model):
301
- series_names = [
302
- istress.series.name
303
- for sm in ml.stressmodels.values()
304
- for istress in sm.stress
305
- ]
306
-
307
- elif isinstance(ml, dict):
308
- # non RechargeModel, Tarsomodel, WellModel stressmodels
309
- classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
310
- if PASTAS_LEQ_022:
311
- series_names = [
312
- istress["name"]
313
- for sm in ml["stressmodels"].values()
314
- if sm[classkey] not in (prec_evap_model + ["WellModel"])
315
- for istress in sm["stress"]
316
- ]
317
- else:
318
- series_names = [
319
- sm["stress"]["name"]
320
- for sm in ml["stressmodels"].values()
321
- if sm[classkey] not in (prec_evap_model + ["WellModel"])
322
- ]
323
-
324
- # WellModel
325
- if isin(
326
- ["WellModel"],
327
- [i[classkey] for i in ml["stressmodels"].values()],
328
- ).any():
329
- series_names += [
330
- istress["name"]
331
- for sm in ml["stressmodels"].values()
332
- if sm[classkey] in ["WellModel"]
333
- for istress in sm["stress"]
334
- ]
335
-
336
- # RechargeModel, TarsoModel
337
- if isin(
338
- prec_evap_model,
339
- [i[classkey] for i in ml["stressmodels"].values()],
340
- ).any():
341
- series_names += [
342
- istress["name"]
343
- for sm in ml["stressmodels"].values()
344
- if sm[classkey] in prec_evap_model
345
- for istress in [sm["prec"], sm["evap"]]
346
- ]
347
-
348
- else:
349
- raise TypeError("Expected pastas.Model or dict!")
350
- if len(series_names) - len(set(series_names)) > 0:
351
- msg = (
352
- "There are multiple stresses series with the same name! "
353
- "Each series name must be unique for the PastaStore!"
354
- )
355
- raise ValueError(msg)
356
-
357
- def _check_oseries_in_store(self, ml: Union[ps.Model, dict]):
358
- """Check if Model oseries are contained in PastaStore (internal method).
359
-
360
- Parameters
361
- ----------
362
- ml : Union[ps.Model, dict]
363
- pastas Model
364
- """
365
- if isinstance(ml, ps.Model):
366
- name = ml.oseries.name
367
- elif isinstance(ml, dict):
368
- name = str(ml["oseries"]["name"])
369
- else:
370
- raise TypeError("Expected pastas.Model or dict!")
371
- if name not in self.oseries.index:
372
- msg = (
373
- f"Cannot add model because oseries '{name}' is not contained in store."
374
- )
375
- raise LookupError(msg)
376
- # expensive check
377
- if self.CHECK_MODEL_SERIES_VALUES and isinstance(ml, ps.Model):
378
- s_org = self.get_oseries(name).squeeze().dropna()
379
- if PASTAS_LEQ_022:
380
- so = ml.oseries.series_original
381
- else:
382
- so = ml.oseries._series_original
383
- try:
384
- assert_series_equal(
385
- so.dropna(),
386
- s_org,
387
- atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
388
- rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
389
- )
390
- except AssertionError as e:
391
- raise ValueError(
392
- f"Cannot add model because model oseries '{name}'"
393
- " is different from stored oseries! See stacktrace for differences."
394
- ) from e
395
-
396
- def _check_stresses_in_store(self, ml: Union[ps.Model, dict]):
397
- """Check if stresses time series are contained in PastaStore (internal method).
398
-
399
- Parameters
400
- ----------
401
- ml : Union[ps.Model, dict]
402
- pastas Model
403
- """
404
- prec_evap_model = ["RechargeModel", "TarsoModel"]
405
- if isinstance(ml, ps.Model):
406
- for sm in ml.stressmodels.values():
407
- if sm._name in prec_evap_model:
408
- stresses = [sm.prec, sm.evap]
409
- else:
410
- stresses = sm.stress
411
- for s in stresses:
412
- if str(s.name) not in self.stresses.index:
413
- msg = (
414
- f"Cannot add model because stress '{s.name}' "
415
- "is not contained in store."
416
- )
417
- raise LookupError(msg)
418
- if self.CHECK_MODEL_SERIES_VALUES:
419
- s_org = self.get_stresses(s.name).squeeze()
420
- if PASTAS_LEQ_022:
421
- so = s.series_original
422
- else:
423
- so = s._series_original
424
- try:
425
- assert_series_equal(
426
- so,
427
- s_org,
428
- atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
429
- rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
430
- )
431
- except AssertionError as e:
432
- raise ValueError(
433
- f"Cannot add model because model stress "
434
- f"'{s.name}' is different from stored stress! "
435
- "See stacktrace for differences."
436
- ) from e
437
- elif isinstance(ml, dict):
438
- for sm in ml["stressmodels"].values():
439
- classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
440
- if sm[classkey] in prec_evap_model:
441
- stresses = [sm["prec"], sm["evap"]]
442
- elif sm[classkey] in ["WellModel"]:
443
- stresses = sm["stress"]
444
- else:
445
- stresses = sm["stress"] if PASTAS_LEQ_022 else [sm["stress"]]
446
- for s in stresses:
447
- if str(s["name"]) not in self.stresses.index:
448
- msg = (
449
- f"Cannot add model because stress '{s['name']}' "
450
- "is not contained in store."
451
- )
452
- raise LookupError(msg)
453
- else:
454
- raise TypeError("Expected pastas.Model or dict!")
455
-
456
- def _stored_series_to_json(
457
- self,
458
- libname: str,
459
- names: Optional[Union[list, str]] = None,
460
- squeeze: bool = True,
461
- progressbar: bool = False,
462
- ):
463
- """Write stored series to JSON.
464
-
465
- Parameters
466
- ----------
467
- libname : str
468
- library name
469
- names : Optional[Union[list, str]], optional
470
- names of series, by default None
471
- squeeze : bool, optional
472
- return single entry as json string instead
473
- of list, by default True
474
- progressbar : bool, optional
475
- show progressbar, by default False
476
-
477
- Returns
478
- -------
479
- files : list or str
480
- list of series converted to JSON string or single string
481
- if single entry is returned and squeeze is True
482
- """
483
- names = self._parse_names(names, libname=libname)
484
- files = []
485
- for n in tqdm(names, desc=libname) if progressbar else names:
486
- s = self._get_series(libname, n, progressbar=False)
487
- if isinstance(s, pd.Series):
488
- s = s.to_frame()
489
- try:
490
- sjson = s.to_json(orient="columns")
491
- except ValueError as e:
492
- msg = (
493
- f"DatetimeIndex of '{n}' probably contains NaT "
494
- "or duplicate timestamps!"
495
- )
496
- raise ValueError(msg) from e
497
- files.append(sjson)
498
- if len(files) == 1 and squeeze:
499
- return files[0]
500
- else:
501
- return files
502
-
503
- def _stored_metadata_to_json(
504
- self,
505
- libname: str,
506
- names: Optional[Union[list, str]] = None,
507
- squeeze: bool = True,
508
- progressbar: bool = False,
509
- ):
510
- """Write metadata from stored series to JSON.
511
-
512
- Parameters
513
- ----------
514
- libname : str
515
- library containing series
516
- names : Optional[Union[list, str]], optional
517
- names to parse, by default None
518
- squeeze : bool, optional
519
- return single entry as json string instead of list, by default True
520
- progressbar : bool, optional
521
- show progressbar, by default False
522
-
523
- Returns
524
- -------
525
- files : list or str
526
- list of json string
527
- """
528
- names = self._parse_names(names, libname=libname)
529
- files = []
530
- for n in tqdm(names, desc=libname) if progressbar else names:
531
- meta = self.get_metadata(libname, n, as_frame=False)
532
- meta_json = json.dumps(meta, cls=PastasEncoder, indent=4)
533
- files.append(meta_json)
534
- if len(files) == 1 and squeeze:
535
- return files[0]
536
- else:
537
- return files
538
-
539
- def _series_to_archive(
540
- self,
541
- archive,
542
- libname: str,
543
- names: Optional[Union[list, str]] = None,
544
- progressbar: bool = True,
545
- ):
546
- """Write DataFrame or Series to zipfile (internal method).
547
-
548
- Parameters
549
- ----------
550
- archive : zipfile.ZipFile
551
- reference to an archive to write data to
552
- libname : str
553
- name of the library to write to zipfile
554
- names : str or list of str, optional
555
- names of the time series to write to archive, by default None,
556
- which writes all time series to archive
557
- progressbar : bool, optional
558
- show progressbar, by default True
559
- """
560
- names = self._parse_names(names, libname=libname)
561
- for n in tqdm(names, desc=libname) if progressbar else names:
562
- sjson = self._stored_series_to_json(
563
- libname, names=n, progressbar=False, squeeze=True
564
- )
565
- meta_json = self._stored_metadata_to_json(
566
- libname, names=n, progressbar=False, squeeze=True
567
- )
568
- archive.writestr(f"{libname}/{n}.pas", sjson)
569
- archive.writestr(f"{libname}/{n}_meta.pas", meta_json)
570
-
571
- def _models_to_archive(self, archive, names=None, progressbar=True):
572
- """Write pastas.Model to zipfile (internal method).
573
-
574
- Parameters
575
- ----------
576
- archive : zipfile.ZipFile
577
- reference to an archive to write data to
578
- names : str or list of str, optional
579
- names of the models to write to archive, by default None,
580
- which writes all models to archive
581
- progressbar : bool, optional
582
- show progressbar, by default True
583
- """
584
- names = self._parse_names(names, libname="models")
585
- for n in tqdm(names, desc="models") if progressbar else names:
586
- m = self.get_models(n, return_dict=True)
587
- jsondict = json.dumps(m, cls=PastasEncoder, indent=4)
588
- archive.writestr(f"models/{n}.pas", jsondict)
589
-
590
- @staticmethod
591
- def _series_from_json(fjson: str, squeeze: bool = True):
592
- """Load time series from JSON.
593
-
594
- Parameters
595
- ----------
596
- fjson : str
597
- path to file
598
- squeeze : bool, optional
599
- squeeze time series object to obtain pandas Series
600
-
601
- Returns
602
- -------
603
- s : pd.DataFrame
604
- DataFrame containing time series
605
- """
606
- s = pd.read_json(fjson, orient="columns", precise_float=True, dtype=False)
607
- if not isinstance(s.index, pd.DatetimeIndex):
608
- s.index = pd.to_datetime(s.index, unit="ms")
609
- s = s.sort_index() # needed for some reason ...
610
- if squeeze:
611
- return s.squeeze(axis="columns")
612
- return s
613
-
614
- @staticmethod
615
- def _metadata_from_json(fjson: str):
616
- """Load metadata dictionary from JSON.
617
-
618
- Parameters
619
- ----------
620
- fjson : str
621
- path to file
622
-
623
- Returns
624
- -------
625
- meta : dict
626
- dictionary containing metadata
627
- """
628
- with open(fjson, "r") as f:
629
- meta = json.load(f)
630
- return meta
631
-
632
- def _get_model_orphans(self):
633
- """Get models whose oseries no longer exist in database.
634
-
635
- Returns
636
- -------
637
- dict
638
- dictionary with oseries names as keys and lists of model names
639
- as values
640
- """
641
- d = {}
642
- for mlnam in tqdm(self.model_names, desc="Identifying model orphans"):
643
- mdict = self.get_models(mlnam, return_dict=True)
644
- onam = mdict["oseries"]["name"]
645
- if onam not in self.oseries_names:
646
- if onam in d:
647
- d[onam] = d[onam].append(mlnam)
648
- else:
649
- d[onam] = [mlnam]
650
- return d
39
+ class ParallelUtil:
40
+ """Mix-in class for storing parallelizable methods."""
651
41
 
652
42
  @staticmethod
653
43
  def _solve_model(
@@ -675,11 +65,11 @@ class ConnectorUtil:
675
65
  arguments are passed to the solve method.
676
66
  """
677
67
  if connector is not None:
678
- conn = connector
68
+ _conn = connector
679
69
  else:
680
- conn = globals()["conn"]
70
+ _conn = globals()["conn"]
681
71
 
682
- ml = conn.get_models(ml_name)
72
+ ml = _conn.get_models(ml_name)
683
73
  m_kwargs = {}
684
74
  for key, value in kwargs.items():
685
75
  if isinstance(value, pd.Series):
@@ -693,14 +83,14 @@ class ConnectorUtil:
693
83
 
694
84
  try:
695
85
  ml.solve(report=report, **m_kwargs)
696
- except Exception as e:
86
+ except Exception as e: # pylint: disable=broad-except
697
87
  if ignore_solve_errors:
698
- warning = "Solve error ignored for '%s': %s " % (ml.name, e)
88
+ warning = f"Solve error ignored for '{ml.name}': {e}"
699
89
  logger.warning(warning)
700
90
  else:
701
91
  raise e
702
-
703
- conn.add_model(ml, overwrite=True)
92
+ # store the updated model back in the database
93
+ _conn.add_model(ml, overwrite=True)
704
94
 
705
95
  @staticmethod
706
96
  def _get_statistics(
@@ -717,13 +107,14 @@ class ConnectorUtil:
717
107
  of the apply method.
718
108
  """
719
109
  if connector is not None:
720
- conn = connector
110
+ _conn = connector
721
111
  else:
722
- conn = globals()["conn"]
112
+ _conn = globals()["conn"]
723
113
 
724
- ml = conn.get_model(name)
114
+ ml = _conn.get_model(name)
725
115
  series = pd.Series(index=statistics, dtype=float)
726
116
  for stat in statistics:
117
+ # Note: ml.stats is part of pastas.Model public API
727
118
  series.loc[stat] = getattr(ml.stats, stat)(**kwargs)
728
119
  return series
729
120
 
@@ -739,17 +130,22 @@ class ConnectorUtil:
739
130
  min(32, os.cpu_count() + 4) if max_workers is None else max_workers
740
131
  )
741
132
  if chunksize is None:
742
- num_chunks = max_workers * 14
133
+ # 14 chunks per worker balances overhead vs granularity
134
+ # from stackoverflow link posted in docstring.
135
+ CHUNKS_PER_WORKER = 14
136
+ num_chunks = max_workers * CHUNKS_PER_WORKER
743
137
  chunksize = max(njobs // num_chunks, 1)
744
138
  return max_workers, chunksize
745
139
 
746
140
 
747
- class ArcticDBConnector(BaseConnector, ConnectorUtil):
141
+ class ArcticDBConnector(BaseConnector, ParallelUtil):
748
142
  """ArcticDBConnector object using ArcticDB to store data."""
749
143
 
750
- conn_type = "arcticdb"
144
+ _conn_type = "arcticdb"
751
145
 
752
- def __init__(self, name: str, uri: str, verbose: bool = True):
146
+ def __init__(
147
+ self, name: str, uri: str, verbose: bool = True, worker_process: bool = False
148
+ ):
753
149
  """Create an ArcticDBConnector object using ArcticDB to store data.
754
150
 
755
151
  Parameters
@@ -759,39 +155,75 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
759
155
  uri : str
760
156
  URI connection string (e.g. 'lmdb://<your path here>')
761
157
  verbose : bool, optional
762
- whether to print message when database is initialized, by default True
158
+ whether to log messages when database is initialized, by default True
159
+ worker_process : bool, optional
160
+ whether the connector is created in a worker process for parallel
161
+ processing, by default False
763
162
  """
764
163
  try:
765
164
  import arcticdb
766
165
 
767
166
  except ModuleNotFoundError as e:
768
- print("Please install arcticdb with `pip install arcticdb`!")
167
+ logger.error("Please install arcticdb with `pip install arcticdb`!")
769
168
  raise e
169
+
170
+ # avoid warn on all metadata writes
171
+ from arcticdb_ext import set_config_string
172
+
173
+ set_config_string("PickledMetadata.LogLevel", "DEBUG")
174
+
770
175
  self.uri = uri
771
176
  self.name = name
772
177
 
178
+ # initialize validator class to check inputs
179
+ self._validator = Validator(self)
180
+
181
+ # create libraries
773
182
  self.libs: dict = {}
774
183
  self.arc = arcticdb.Arctic(uri)
775
184
  self._initialize(verbose=verbose)
776
185
  self.models = ModelAccessor(self)
777
- # for older versions of PastaStore, if oseries_models library is empty
778
- # populate oseries - models database
779
- self._update_all_oseries_model_links()
780
- # write pstore file to store database info that can be used to load pstore
781
- if "lmdb" in self.uri:
782
- self.write_pstore_config_file()
186
+
187
+ # set shared memory manager flags for parallel operations
188
+ # NOTE: there is no stored reference to manager object, meaning
189
+ # that it cannot be properly shutdown. We let the Python garbage collector
190
+ # do this, but the downside is there is a risk some background
191
+ # processes potentially continue to run.
192
+ mgr = Manager()
193
+ self._oseries_links_need_update = mgr.Value(
194
+ "_oseries_links_need_update",
195
+ False,
196
+ )
197
+ self._stresses_links_need_update = mgr.Value(
198
+ "_stresses_links_need_update",
199
+ False,
200
+ )
201
+ if not worker_process:
202
+ # for older versions of PastaStore, if oseries_models library is empty
203
+ # populate oseries - models database
204
+ if (self.n_models > 0) and (
205
+ len(self.oseries_models) == 0 or len(self.stresses_models) == 0
206
+ ):
207
+ self._update_time_series_model_links(recompute=False, progressbar=True)
208
+ # write pstore file to store database info that can be used to load pstore
209
+ if "lmdb" in self.uri:
210
+ self.write_pstore_config_file()
783
211
 
784
212
  def _initialize(self, verbose: bool = True) -> None:
785
213
  """Initialize the libraries (internal method)."""
214
+ if "lmdb" in self.uri.lower(): # only check for LMDB
215
+ self.validator.check_config_connector_type(
216
+ Path(self.uri.split("://")[1]) / self.name
217
+ )
786
218
  for libname in self._default_library_names:
787
219
  if self._library_name(libname) not in self.arc.list_libraries():
788
220
  self.arc.create_library(self._library_name(libname))
789
221
  else:
790
222
  if verbose:
791
- print(
792
- f"ArcticDBConnector: library "
793
- f"'{self._library_name(libname)}'"
794
- " already exists. Linking to existing library."
223
+ logger.info(
224
+ "ArcticDBConnector: library '%s' already exists. "
225
+ "Linking to existing library.",
226
+ self._library_name(libname),
795
227
  )
796
228
  self.libs[libname] = self._get_library(libname)
797
229
 
@@ -809,20 +241,21 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
809
241
  "uri": self.uri,
810
242
  }
811
243
  if path is None and "lmdb" in self.uri:
812
- path = self.uri.split("://")[1]
244
+ path = Path(self.uri.split("://")[1])
813
245
  elif path is None and "lmdb" not in self.uri:
814
246
  raise ValueError("Please provide a path to write the pastastore file!")
815
247
 
816
- with open(
817
- os.path.join(path, f"{self.name}.pastastore"), "w", encoding="utf-8"
248
+ with (path / self.name / f"{self.name}.pastastore").open(
249
+ "w",
250
+ encoding="utf-8",
818
251
  ) as f:
819
252
  json.dump(config, f)
820
253
 
821
- def _library_name(self, libname: str) -> str:
254
+ def _library_name(self, libname: AllLibs) -> str:
822
255
  """Get full library name according to ArcticDB (internal method)."""
823
256
  return ".".join([self.name, libname])
824
257
 
825
- def _get_library(self, libname: str):
258
+ def _get_library(self, libname: AllLibs):
826
259
  """Get ArcticDB library handle.
827
260
 
828
261
  Parameters
@@ -836,13 +269,15 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
836
269
  handle to the library
837
270
  """
838
271
  # get library handle
839
- lib = self.arc.get_library(self._library_name(libname))
840
- return lib
272
+ if libname in self.libs:
273
+ return self.libs[libname]
274
+ else:
275
+ return self.arc.get_library(self._library_name(libname))
841
276
 
842
277
  def _add_item(
843
278
  self,
844
- libname: str,
845
- item: Union[FrameorSeriesUnion, Dict],
279
+ libname: AllLibs,
280
+ item: Union[FrameOrSeriesUnion, Dict],
846
281
  name: str,
847
282
  metadata: Optional[Dict] = None,
848
283
  **_,
@@ -861,14 +296,22 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
861
296
  dictionary containing metadata, by default None
862
297
  """
863
298
  lib = self._get_library(libname)
299
+
300
+ # check file name for illegal characters
301
+ name = self.validator.check_filename_illegal_chars(libname, name)
302
+
864
303
  # only normalizable datatypes can be written with write, else use write_pickle
865
304
  # normalizable: Series, DataFrames, Numpy Arrays
866
305
  if isinstance(item, (dict, list)):
306
+ logger.debug(
307
+ "Writing pickled item '%s' to ArcticDB library '%s'.", name, libname
308
+ )
867
309
  lib.write_pickle(name, item, metadata=metadata)
868
310
  else:
311
+ logger.debug("Writing item '%s' to ArcticDB library '%s'.", name, libname)
869
312
  lib.write(name, item, metadata=metadata)
870
313
 
871
- def _get_item(self, libname: str, name: str) -> Union[FrameorSeriesUnion, Dict]:
314
+ def _get_item(self, libname: AllLibs, name: str) -> Union[FrameOrSeriesUnion, Dict]:
872
315
  """Retrieve item from library (internal method).
873
316
 
874
317
  Parameters
@@ -886,7 +329,7 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
886
329
  lib = self._get_library(libname)
887
330
  return lib.read(name).data
888
331
 
889
- def _del_item(self, libname: str, name: str) -> None:
332
+ def _del_item(self, libname: AllLibs, name: str, force: bool = False) -> None:
890
333
  """Delete items (series or models) (internal method).
891
334
 
892
335
  Parameters
@@ -895,11 +338,15 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
895
338
  name of library to delete item from
896
339
  name : str
897
340
  name of item to delete
341
+ force : bool, optional
342
+ force deletion even if series is used in models, by default False
898
343
  """
899
344
  lib = self._get_library(libname)
345
+ if self.validator.PROTECT_SERIES_IN_MODELS and not force:
346
+ self.validator.check_series_in_models(libname, name)
900
347
  lib.delete(name)
901
348
 
902
- def _get_metadata(self, libname: str, name: str) -> dict:
349
+ def _get_metadata(self, libname: TimeSeriesLibs, name: str) -> dict:
903
350
  """Retrieve metadata for an item (internal method).
904
351
 
905
352
  Parameters
@@ -926,11 +373,27 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
926
373
  max_workers: Optional[int] = None,
927
374
  chunksize: Optional[int] = None,
928
375
  desc: str = "",
376
+ initializer: Callable = None,
377
+ initargs: Optional[tuple] = None,
929
378
  ):
930
379
  """Parallel processing of function.
931
380
 
932
381
  Does not return results, so function must store results in database.
933
382
 
383
+ Note
384
+ ----
385
+ ArcticDB connection objects cannot be pickled, which is required for
386
+ multiprocessing. This implementation uses an initializer function that
387
+ creates a new ArcticDBConnector instance in each worker process and stores
388
+ it in the global `conn` variable. User-provided functions can access this
389
+ connector via the global `conn` variable.
390
+
391
+ This is the standard Python multiprocessing pattern for unpicklable objects.
392
+ See: https://docs.python.org/3/library/concurrent.futures.html#processpoolexecutor
393
+
394
+ For a connector that supports direct method passing (no global variable
395
+ required), use PasConnector instead.
396
+
934
397
  Parameters
935
398
  ----------
936
399
  func : function
@@ -947,16 +410,24 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
947
410
  chunksize for parallel processing, by default None
948
411
  desc : str, optional
949
412
  description for progressbar, by default ""
413
+ initializer : Callable, optional
414
+ function to initialize each worker process, by default None
415
+ initargs : tuple, optional
416
+ arguments to pass to initializer function, by default None
950
417
  """
951
- max_workers, chunksize = ConnectorUtil._get_max_workers_and_chunksize(
418
+ max_workers, chunksize = self._get_max_workers_and_chunksize(
952
419
  max_workers, len(names), chunksize
953
420
  )
421
+ if initializer is None:
954
422
 
955
- def initializer(*args):
956
- global conn
957
- conn = ArcticDBConnector(*args)
423
+ def initializer(*args):
424
+ # assign to module-level variable without using 'global' statement
425
+ globals()["conn"] = ArcticDBConnector(*args, worker_process=True)
958
426
 
959
- initargs = (self.name, self.uri, False)
427
+ initargs = (self.name, self.uri, False)
428
+
429
+ if initargs is None:
430
+ initargs = ()
960
431
 
961
432
  if kwargs is None:
962
433
  kwargs = {}
@@ -979,51 +450,37 @@ class ArcticDBConnector(BaseConnector, ConnectorUtil):
979
450
  result = executor.map(
980
451
  partial(func, **kwargs), names, chunksize=chunksize
981
452
  )
982
- return result
983
453
 
984
- @property
985
- def oseries_names(self):
986
- """List of oseries names.
454
+ # update links if models were stored
455
+ self._trigger_links_update_if_needed(modelnames=names)
987
456
 
988
- Returns
989
- -------
990
- list
991
- list of oseries in library
992
- """
993
- return self._get_library("oseries").list_symbols()
994
-
995
- @property
996
- def stresses_names(self):
997
- """List of stresses names.
457
+ return result
998
458
 
999
- Returns
1000
- -------
1001
- list
1002
- list of stresses in library
1003
- """
1004
- return self._get_library("stresses").list_symbols()
459
+ def _list_symbols(self, libname: AllLibs) -> List[str]:
460
+ """List symbols in a library (internal method).
1005
461
 
1006
- @property
1007
- def model_names(self):
1008
- """List of model names.
462
+ Parameters
463
+ ----------
464
+ libname : str
465
+ name of the library
1009
466
 
1010
467
  Returns
1011
468
  -------
1012
469
  list
1013
- list of models in library
470
+ list of symbols in the library
1014
471
  """
1015
- return self._get_library("models").list_symbols()
472
+ return self._get_library(libname).list_symbols()
1016
473
 
1017
- @property
1018
- def oseries_with_models(self):
1019
- """List of oseries with models."""
1020
- return self._get_library("oseries_models").list_symbols()
474
+ def _item_exists(self, libname: str, name: str) -> bool:
475
+ """Check if item exists without scanning directory."""
476
+ lib = self._get_library(libname)
477
+ return lib.has_symbol(name)
1021
478
 
1022
479
 
1023
- class DictConnector(BaseConnector, ConnectorUtil):
480
+ class DictConnector(BaseConnector, ParallelUtil):
1024
481
  """DictConnector object that stores timeseries and models in dictionaries."""
1025
482
 
1026
- conn_type = "dict"
483
+ _conn_type = "dict"
1027
484
 
1028
485
  def __init__(self, name: str = "pastas_db"):
1029
486
  """Create DictConnector object that stores data in dictionaries.
@@ -1033,17 +490,26 @@ class DictConnector(BaseConnector, ConnectorUtil):
1033
490
  name : str, optional
1034
491
  user-specified name of the connector
1035
492
  """
493
+ super().__init__()
1036
494
  self.name = name
1037
495
 
1038
496
  # create empty dictionaries for series and models
1039
497
  for val in self._default_library_names:
1040
498
  setattr(self, "lib_" + val, {})
499
+ self._validator = Validator(self)
1041
500
  self.models = ModelAccessor(self)
1042
501
  # for older versions of PastaStore, if oseries_models library is empty
1043
502
  # populate oseries - models database
1044
- self._update_all_oseries_model_links()
503
+ if (self.n_models > 0) and (
504
+ len(self.oseries_models) == 0 or len(self.stresses_models) == 0
505
+ ):
506
+ self._update_time_series_model_links(recompute=False, progressbar=True)
1045
507
 
1046
- def _get_library(self, libname: str):
508
+ # delayed update flags
509
+ self._oseries_links_need_update = False
510
+ self._stresses_links_need_update = False
511
+
512
+ def _get_library(self, libname: AllLibs):
1047
513
  """Get reference to dictionary holding data.
1048
514
 
1049
515
  Parameters
@@ -1061,7 +527,7 @@ class DictConnector(BaseConnector, ConnectorUtil):
1061
527
  def _add_item(
1062
528
  self,
1063
529
  libname: str,
1064
- item: Union[FrameorSeriesUnion, Dict],
530
+ item: Union[FrameOrSeriesUnion, Dict],
1065
531
  name: str,
1066
532
  metadata: Optional[Dict] = None,
1067
533
  **_,
@@ -1080,12 +546,16 @@ class DictConnector(BaseConnector, ConnectorUtil):
1080
546
  dictionary containing metadata, by default None
1081
547
  """
1082
548
  lib = self._get_library(libname)
1083
- if libname in ["models", "oseries_models"]:
549
+
550
+ # check file name for illegal characters
551
+ name = self.validator.check_filename_illegal_chars(libname, name)
552
+
553
+ if libname in ["models", "oseries_models", "stresses_models"]:
1084
554
  lib[name] = item
1085
555
  else:
1086
556
  lib[name] = (metadata, item)
1087
557
 
1088
- def _get_item(self, libname: str, name: str) -> Union[FrameorSeriesUnion, Dict]:
558
+ def _get_item(self, libname: AllLibs, name: str) -> Union[FrameOrSeriesUnion, Dict]:
1089
559
  """Retrieve item from database (internal method).
1090
560
 
1091
561
  Parameters
@@ -1098,16 +568,18 @@ class DictConnector(BaseConnector, ConnectorUtil):
1098
568
  Returns
1099
569
  -------
1100
570
  item : Union[FrameorSeriesUnion, Dict]
1101
- time series or model dictionary
571
+ time series or model dictionary, modifying the returned object will not
572
+ affect the stored data, like in a real database
1102
573
  """
1103
574
  lib = self._get_library(libname)
1104
- if libname in ["models", "oseries_models"]:
575
+ # deepcopy calls are needed to ensure users cannot change "stored" items
576
+ if libname in ["models", "oseries_models", "stresses_models"]:
1105
577
  item = deepcopy(lib[name])
1106
578
  else:
1107
579
  item = deepcopy(lib[name][1])
1108
580
  return item
1109
581
 
1110
- def _del_item(self, libname: str, name: str) -> None:
582
+ def _del_item(self, libname: AllLibs, name: str, force: bool = False) -> None:
1111
583
  """Delete items (series or models) (internal method).
1112
584
 
1113
585
  Parameters
@@ -1116,11 +588,16 @@ class DictConnector(BaseConnector, ConnectorUtil):
1116
588
  name of library to delete item from
1117
589
  name : str
1118
590
  name of item to delete
591
+ force : bool, optional
592
+ if True, force delete item and do not perform check if series
593
+ is used in a model, by default False
1119
594
  """
595
+ if self.validator.PROTECT_SERIES_IN_MODELS and not force:
596
+ self.validator.check_series_in_models(libname, name)
1120
597
  lib = self._get_library(libname)
1121
598
  _ = lib.pop(name)
1122
599
 
1123
- def _get_metadata(self, libname: str, name: str) -> dict:
600
+ def _get_metadata(self, libname: TimeSeriesLibs, name: str) -> dict:
1124
601
  """Read metadata (internal method).
1125
602
 
1126
603
  Parameters
@@ -1140,40 +617,45 @@ class DictConnector(BaseConnector, ConnectorUtil):
1140
617
  return imeta
1141
618
 
1142
619
  def _parallel(self, *args, **kwargs) -> None:
620
+ """Parallel implementation method.
621
+
622
+ Raises
623
+ ------
624
+ NotImplementedError
625
+ DictConnector uses in-memory storage that cannot be shared across
626
+ processes. Use PasConnector or ArcticDBConnector for parallel operations.
627
+ """
1143
628
  raise NotImplementedError(
1144
629
  "DictConnector does not support parallel processing,"
1145
630
  " use PasConnector or ArcticDBConnector."
1146
631
  )
1147
632
 
1148
- @property
1149
- def oseries_names(self):
1150
- """List of oseries names."""
1151
- lib = self._get_library("oseries")
1152
- return list(lib.keys())
633
+ def _list_symbols(self, libname: AllLibs) -> List[str]:
634
+ """List symbols in a library (internal method).
1153
635
 
1154
- @property
1155
- def stresses_names(self):
1156
- """List of stresses names."""
1157
- lib = self._get_library("stresses")
1158
- return list(lib.keys())
636
+ Parameters
637
+ ----------
638
+ libname : str
639
+ name of the library
1159
640
 
1160
- @property
1161
- def model_names(self):
1162
- """List of model names."""
1163
- lib = self._get_library("models")
641
+ Returns
642
+ -------
643
+ list
644
+ list of symbols in the library
645
+ """
646
+ lib = self._get_library(libname)
1164
647
  return list(lib.keys())
1165
648
 
1166
- @property
1167
- def oseries_with_models(self):
1168
- """List of oseries with models."""
1169
- lib = self._get_library("oseries_models")
1170
- return list(lib.keys())
649
+ def _item_exists(self, libname: str, name: str) -> bool:
650
+ """Check if item exists without scanning directory."""
651
+ lib = self._get_library(libname)
652
+ return name in lib
1171
653
 
1172
654
 
1173
- class PasConnector(BaseConnector, ConnectorUtil):
655
+ class PasConnector(BaseConnector, ParallelUtil):
1174
656
  """PasConnector object that stores time series and models as JSON files on disk."""
1175
657
 
1176
- conn_type = "pas"
658
+ _conn_type = "pas"
1177
659
 
1178
660
  def __init__(self, name: str, path: str, verbose: bool = True):
1179
661
  """Create PasConnector object that stores data as JSON files on disk.
@@ -1190,47 +672,72 @@ class PasConnector(BaseConnector, ConnectorUtil):
1190
672
  verbose : bool, optional
1191
673
  whether to print message when database is initialized, by default True
1192
674
  """
675
+ # set shared memory flags for parallel processing
676
+ super().__init__()
1193
677
  self.name = name
1194
- self.parentdir = path
1195
- self.path = os.path.abspath(os.path.join(path, self.name))
1196
- self.relpath = os.path.relpath(self.path)
678
+ self.parentdir = Path(path)
679
+ self.path = (self.parentdir / self.name).absolute()
680
+ self.relpath = os.path.relpath(self.parentdir)
681
+ self._validator = Validator(self)
1197
682
  self._initialize(verbose=verbose)
1198
683
  self.models = ModelAccessor(self)
684
+
685
+ # set shared memory manager flags for parallel operations
686
+ # NOTE: there is no stored reference to manager object, meaning
687
+ # that it cannot be properly shutdown. We let the Python garbage collector
688
+ # do this, but the downside is there is a risk some background
689
+ # processes potentially continue to run.
690
+ mgr = Manager()
691
+ self._oseries_links_need_update = mgr.Value(
692
+ "_oseries_links_need_update",
693
+ False,
694
+ )
695
+ self._stresses_links_need_update = mgr.Value(
696
+ "_stresses_links_need_update",
697
+ False,
698
+ )
699
+
1199
700
  # for older versions of PastaStore, if oseries_models library is empty
1200
701
  # populate oseries_models library
1201
- self._update_all_oseries_model_links()
702
+ if (self.n_models > 0) and (
703
+ len(self.oseries_models) == 0 or len(self.stresses_models) == 0
704
+ ):
705
+ self._update_time_series_model_links(recompute=False, progressbar=True)
1202
706
  # write pstore file to store database info that can be used to load pstore
1203
707
  self._write_pstore_config_file()
1204
708
 
1205
709
  def _initialize(self, verbose: bool = True) -> None:
1206
710
  """Initialize the libraries (internal method)."""
711
+ self.validator.check_config_connector_type(self.path)
1207
712
  for val in self._default_library_names:
1208
- libdir = os.path.join(self.path, val)
1209
- if not os.path.exists(libdir):
713
+ libdir = self.path / val
714
+ if not libdir.exists():
1210
715
  if verbose:
1211
- print(f"PasConnector: library '{val}' created in '{libdir}'")
1212
- os.makedirs(libdir)
716
+ logger.info(
717
+ "PasConnector: library '%s' created in '%s'", val, libdir
718
+ )
719
+ libdir.mkdir(parents=True, exist_ok=False)
1213
720
  else:
1214
721
  if verbose:
1215
- print(
1216
- f"PasConnector: library '{val}' already exists. "
1217
- f"Linking to existing directory: '{libdir}'"
722
+ logger.info(
723
+ "PasConnector: library '%s' already exists. "
724
+ "Linking to existing directory: '%s'",
725
+ val,
726
+ libdir,
1218
727
  )
1219
- setattr(self, f"lib_{val}", os.path.join(self.path, val))
728
+ setattr(self, f"lib_{val}", self.path / val)
1220
729
 
1221
730
  def _write_pstore_config_file(self):
1222
731
  """Write pstore configuration file to store database info."""
1223
732
  config = {
1224
733
  "connector_type": self.conn_type,
1225
734
  "name": self.name,
1226
- "path": os.path.abspath(self.parentdir),
735
+ "path": str(self.parentdir.absolute()),
1227
736
  }
1228
- with open(
1229
- os.path.join(self.path, f"{self.name}.pastastore"), "w", encoding="utf-8"
1230
- ) as f:
737
+ with (self.path / f"{self.name}.pastastore").open("w", encoding="utf-8") as f:
1231
738
  json.dump(config, f)
1232
739
 
1233
- def _get_library(self, libname: str):
740
+ def _get_library(self, libname: AllLibs) -> Path:
1234
741
  """Get path to directory holding data.
1235
742
 
1236
743
  Parameters
@@ -1243,12 +750,12 @@ class PasConnector(BaseConnector, ConnectorUtil):
1243
750
  lib : str
1244
751
  path to library
1245
752
  """
1246
- return getattr(self, "lib_" + libname)
753
+ return Path(getattr(self, "lib_" + libname))
1247
754
 
1248
755
  def _add_item(
1249
756
  self,
1250
757
  libname: str,
1251
- item: Union[FrameorSeriesUnion, Dict],
758
+ item: Union[FrameOrSeriesUnion, Dict],
1252
759
  name: str,
1253
760
  metadata: Optional[Dict] = None,
1254
761
  **_,
@@ -1268,33 +775,52 @@ class PasConnector(BaseConnector, ConnectorUtil):
1268
775
  """
1269
776
  lib = self._get_library(libname)
1270
777
 
778
+ # check file name for illegal characters
779
+ name = self.validator.check_filename_illegal_chars(libname, name)
780
+
1271
781
  # time series
1272
782
  if isinstance(item, pd.Series):
1273
783
  item = item.to_frame()
1274
784
  if isinstance(item, pd.DataFrame):
1275
- sjson = item.to_json(orient="columns")
1276
- fname = os.path.join(lib, f"{name}.pas")
1277
- with open(fname, "w") as f:
785
+ if type(item) is pd.DataFrame:
786
+ sjson = item.to_json(orient="columns")
787
+ else:
788
+ # workaround for subclasses of DataFrame that override to_json,
789
+ # looking at you hydropandas...
790
+ sjson = pd.DataFrame(item).to_json(orient="columns")
791
+ if name.endswith("_meta"):
792
+ raise ValueError(
793
+ "Time series name cannot end with '_meta'. "
794
+ "Please use a different name for your time series."
795
+ )
796
+ fname = lib / f"{name}.pas"
797
+ with fname.open("w", encoding="utf-8") as f:
798
+ logger.debug("Writing time series '%s' to disk at '%s'.", name, fname)
1278
799
  f.write(sjson)
1279
800
  if metadata is not None:
1280
801
  mjson = json.dumps(metadata, cls=PastasEncoder, indent=4)
1281
- fname_meta = os.path.join(lib, f"{name}_meta.pas")
1282
- with open(fname_meta, "w") as m:
802
+ fname_meta = lib / f"{name}_meta.pas"
803
+ with fname_meta.open("w", encoding="utf-8") as m:
804
+ logger.debug(
805
+ "Writing metadata '%s' to disk at '%s'.", name, fname_meta
806
+ )
1283
807
  m.write(mjson)
1284
808
  # pastas model dict
1285
809
  elif isinstance(item, dict):
1286
810
  jsondict = json.dumps(item, cls=PastasEncoder, indent=4)
1287
- fmodel = os.path.join(lib, f"{name}.pas")
1288
- with open(fmodel, "w") as fm:
811
+ fmodel = lib / f"{name}.pas"
812
+ with fmodel.open("w", encoding="utf-8") as fm:
813
+ logger.debug("Writing model '%s' to disk at '%s'.", name, fmodel)
1289
814
  fm.write(jsondict)
1290
- # oseries_models list
815
+ # oseries_models or stresses_models list
1291
816
  elif isinstance(item, list):
1292
817
  jsondict = json.dumps(item)
1293
- fname = os.path.join(lib, f"{name}.pas")
1294
- with open(fname, "w") as fm:
818
+ fname = lib / f"{name}.pas"
819
+ with fname.open("w", encoding="utf-8") as fm:
820
+ logger.debug("Writing link list '%s' to disk at '%s'.", name, fname)
1295
821
  fm.write(jsondict)
1296
822
 
1297
- def _get_item(self, libname: str, name: str) -> Union[FrameorSeriesUnion, Dict]:
823
+ def _get_item(self, libname: AllLibs, name: str) -> Union[FrameOrSeriesUnion, Dict]:
1298
824
  """Retrieve item (internal method).
1299
825
 
1300
826
  Parameters
@@ -1310,24 +836,24 @@ class PasConnector(BaseConnector, ConnectorUtil):
1310
836
  time series or model dictionary
1311
837
  """
1312
838
  lib = self._get_library(libname)
1313
- fjson = os.path.join(lib, f"{name}.pas")
1314
- if not os.path.exists(fjson):
839
+ fjson = lib / f"{name}.pas"
840
+ if not fjson.exists():
1315
841
  msg = f"Item '{name}' not in '{libname}' library."
1316
842
  raise FileNotFoundError(msg)
1317
843
  # model
1318
844
  if libname == "models":
1319
- with open(fjson, "r") as ml_json:
845
+ with fjson.open("r", encoding="utf-8") as ml_json:
1320
846
  item = json.load(ml_json, object_hook=pastas_hook)
1321
847
  # list of models per oseries
1322
- elif libname == "oseries_models":
1323
- with open(fjson, "r") as f:
848
+ elif libname in ["oseries_models", "stresses_models"]:
849
+ with fjson.open("r", encoding="utf-8") as f:
1324
850
  item = json.load(f)
1325
851
  # time series
1326
852
  else:
1327
- item = self._series_from_json(fjson)
853
+ item = series_from_json(fjson)
1328
854
  return item
1329
855
 
1330
- def _del_item(self, libname: str, name: str) -> None:
856
+ def _del_item(self, libname: AllLibs, name: str, force: bool = False) -> None:
1331
857
  """Delete items (series or models) (internal method).
1332
858
 
1333
859
  Parameters
@@ -1336,18 +862,23 @@ class PasConnector(BaseConnector, ConnectorUtil):
1336
862
  name of library to delete item from
1337
863
  name : str
1338
864
  name of item to delete
865
+ force : bool, optional
866
+ if True, force delete item and do not perform check if series
867
+ is used in a model, by default False
1339
868
  """
1340
869
  lib = self._get_library(libname)
1341
- os.remove(os.path.join(lib, f"{name}.pas"))
870
+ if self.validator.PROTECT_SERIES_IN_MODELS and not force:
871
+ self.validator.check_series_in_models(libname, name)
872
+ (lib / f"{name}.pas").unlink()
1342
873
  # remove metadata for time series
1343
- if libname != "models":
874
+ if libname in ["oseries", "stresses"]:
1344
875
  try:
1345
- os.remove(os.path.join(lib, f"{name}_meta.pas"))
876
+ (lib / f"{name}_meta.pas").unlink()
1346
877
  except FileNotFoundError:
1347
878
  # Nothing to delete
1348
879
  pass
1349
880
 
1350
- def _get_metadata(self, libname: str, name: str) -> dict:
881
+ def _get_metadata(self, libname: TimeSeriesLibs, name: str) -> dict:
1351
882
  """Read metadata (internal method).
1352
883
 
1353
884
  Parameters
@@ -1363,9 +894,9 @@ class PasConnector(BaseConnector, ConnectorUtil):
1363
894
  dictionary containing metadata
1364
895
  """
1365
896
  lib = self._get_library(libname)
1366
- mjson = os.path.join(lib, f"{name}_meta.pas")
1367
- if os.path.isfile(mjson):
1368
- imeta = self._metadata_from_json(mjson)
897
+ mjson = lib / f"{name}_meta.pas"
898
+ if mjson.is_file():
899
+ imeta = metadata_from_json(mjson)
1369
900
  else:
1370
901
  imeta = {}
1371
902
  return imeta
@@ -1379,6 +910,8 @@ class PasConnector(BaseConnector, ConnectorUtil):
1379
910
  max_workers: Optional[int] = None,
1380
911
  chunksize: Optional[int] = None,
1381
912
  desc: str = "",
913
+ initializer: Callable = None,
914
+ initargs: Optional[tuple] = None,
1382
915
  ):
1383
916
  """Parallel processing of function.
1384
917
 
@@ -1398,8 +931,12 @@ class PasConnector(BaseConnector, ConnectorUtil):
1398
931
  chunksize for parallel processing, by default None
1399
932
  desc : str, optional
1400
933
  description for progressbar, by default ""
934
+ initializer : Callable, optional
935
+ function to initialize each worker process, by default None
936
+ initargs : tuple, optional
937
+ arguments to pass to initializer function, by default None
1401
938
  """
1402
- max_workers, chunksize = ConnectorUtil._get_max_workers_and_chunksize(
939
+ max_workers, chunksize = self._get_max_workers_and_chunksize(
1403
940
  max_workers, len(names), chunksize
1404
941
  )
1405
942
 
@@ -1407,51 +944,57 @@ class PasConnector(BaseConnector, ConnectorUtil):
1407
944
  kwargs = {}
1408
945
 
1409
946
  if progressbar:
1410
- return process_map(
1411
- partial(func, **kwargs),
1412
- names,
1413
- max_workers=max_workers,
1414
- chunksize=chunksize,
1415
- desc=desc,
1416
- total=len(names),
1417
- )
947
+ if initializer is not None:
948
+ result = []
949
+ with tqdm(total=len(names), desc=desc) as pbar:
950
+ with ProcessPoolExecutor(
951
+ max_workers=max_workers,
952
+ initializer=initializer,
953
+ initargs=initargs,
954
+ ) as executor:
955
+ for item in executor.map(
956
+ partial(func, **kwargs), names, chunksize=chunksize
957
+ ):
958
+ result.append(item)
959
+ pbar.update()
960
+ else:
961
+ result = process_map(
962
+ partial(func, **kwargs),
963
+ names,
964
+ max_workers=max_workers,
965
+ chunksize=chunksize,
966
+ desc=desc,
967
+ total=len(names),
968
+ )
1418
969
  else:
1419
970
  with ProcessPoolExecutor(max_workers=max_workers) as executor:
1420
971
  result = executor.map(
1421
972
  partial(func, **kwargs), names, chunksize=chunksize
1422
973
  )
1423
- return result
1424
-
1425
- @property
1426
- def oseries_names(self):
1427
- """List of oseries names."""
1428
- lib = self._get_library("oseries")
1429
- return [
1430
- i[:-4]
1431
- for i in os.listdir(lib)
1432
- if i.endswith(".pas")
1433
- if not i.endswith("_meta.pas")
1434
- ]
1435
-
1436
- @property
1437
- def stresses_names(self):
1438
- """List of stresses names."""
1439
- lib = self._get_library("stresses")
1440
- return [
1441
- i[:-4]
1442
- for i in os.listdir(lib)
1443
- if i.endswith(".pas")
1444
- if not i.endswith("_meta.pas")
1445
- ]
1446
-
1447
- @property
1448
- def model_names(self):
1449
- """List of model names."""
1450
- lib = self._get_library("models")
1451
- return [i[:-4] for i in os.listdir(lib) if i.endswith(".pas")]
1452
-
1453
- @property
1454
- def oseries_with_models(self):
1455
- """List of oseries with models."""
1456
- lib = self._get_library("oseries_models")
1457
- return [i[:-4] for i in os.listdir(lib) if i.endswith(".pas")]
974
+
975
+ # update links if models were stored
976
+ self._trigger_links_update_if_needed(modelnames=names)
977
+
978
+ return result
979
+
980
+ def _list_symbols(self, libname: AllLibs) -> List[str]:
981
+ """List symbols in a library (internal method).
982
+
983
+ Parameters
984
+ ----------
985
+ libname : str
986
+ name of the library
987
+
988
+ Returns
989
+ -------
990
+ list
991
+ list of symbols in the library
992
+ """
993
+ lib = self._get_library(libname)
994
+ return [i.stem for i in lib.glob("*.pas") if not i.stem.endswith("_meta")]
995
+
996
+ def _item_exists(self, libname: str, name: str) -> bool:
997
+ """Check if item exists without scanning directory."""
998
+ lib = self._get_library(libname)
999
+ path = lib / f"{name}.pas"
1000
+ return path.exists()