pastastore 1.7.2__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pastastore/base.py CHANGED
@@ -2,21 +2,15 @@
2
2
  """Base classes for PastaStore Connectors."""
3
3
 
4
4
  import functools
5
- import json
6
5
  import warnings
7
6
 
8
7
  # import weakref
9
8
  from abc import ABC, abstractmethod
10
- from collections.abc import Iterable
11
9
  from itertools import chain
12
- from typing import Dict, List, Optional, Tuple, Union
10
+ from typing import Callable, Dict, List, Optional, Tuple, Union
13
11
 
14
12
  import pandas as pd
15
13
  import pastas as ps
16
- from numpy import isin
17
- from packaging.version import parse as parse_version
18
- from pandas.testing import assert_series_equal
19
- from pastas.io.pas import PastasEncoder
20
14
  from tqdm.auto import tqdm
21
15
 
22
16
  from pastastore.util import ItemInLibraryException, _custom_warning, validate_names
@@ -70,7 +64,7 @@ class BaseConnector(ABC):
70
64
  def _get_library(self, libname: str):
71
65
  """Get library handle.
72
66
 
73
- Must be overriden by subclass.
67
+ Must be overridden by subclass.
74
68
 
75
69
  Parameters
76
70
  ----------
@@ -94,7 +88,7 @@ class BaseConnector(ABC):
94
88
  ) -> None:
95
89
  """Add item for both time series and pastas.Models (internal method).
96
90
 
97
- Must be overriden by subclass.
91
+ Must be overridden by subclass.
98
92
 
99
93
  Parameters
100
94
  ----------
@@ -112,7 +106,7 @@ class BaseConnector(ABC):
112
106
  def _get_item(self, libname: str, name: str) -> Union[FrameorSeriesUnion, Dict]:
113
107
  """Get item (series or pastas.Models) (internal method).
114
108
 
115
- Must be overriden by subclass.
109
+ Must be overridden by subclass.
116
110
 
117
111
  Parameters
118
112
  ----------
@@ -131,7 +125,7 @@ class BaseConnector(ABC):
131
125
  def _del_item(self, libname: str, name: str) -> None:
132
126
  """Delete items (series or models) (internal method).
133
127
 
134
- Must be overriden by subclass.
128
+ Must be overridden by subclass.
135
129
 
136
130
  Parameters
137
131
  ----------
@@ -145,7 +139,7 @@ class BaseConnector(ABC):
145
139
  def _get_metadata(self, libname: str, name: str) -> Dict:
146
140
  """Get metadata (internal method).
147
141
 
148
- Must be overriden by subclass.
142
+ Must be overridden by subclass.
149
143
 
150
144
  Parameters
151
145
  ----------
@@ -165,7 +159,7 @@ class BaseConnector(ABC):
165
159
  def oseries_names(self):
166
160
  """List of oseries names.
167
161
 
168
- Property must be overriden by subclass.
162
+ Property must be overridden by subclass.
169
163
  """
170
164
 
171
165
  @property
@@ -173,7 +167,7 @@ class BaseConnector(ABC):
173
167
  def stresses_names(self):
174
168
  """List of stresses names.
175
169
 
176
- Property must be overriden by subclass.
170
+ Property must be overridden by subclass.
177
171
  """
178
172
 
179
173
  @property
@@ -181,7 +175,37 @@ class BaseConnector(ABC):
181
175
  def model_names(self):
182
176
  """List of model names.
183
177
 
184
- Property must be overriden by subclass.
178
+ Property must be overridden by subclass.
179
+ """
180
+
181
+ @abstractmethod
182
+ def _parallel(
183
+ self,
184
+ func: Callable,
185
+ names: List[str],
186
+ progressbar: Optional[bool] = True,
187
+ max_workers: Optional[int] = None,
188
+ chunksize: Optional[int] = None,
189
+ desc: str = "",
190
+ ) -> None:
191
+ """Parallel processing of function.
192
+
193
+ Must be overridden by subclass.
194
+
195
+ Parameters
196
+ ----------
197
+ func : function
198
+ function to apply in parallel
199
+ names : list
200
+ list of names to apply function to
201
+ progressbar : bool, optional
202
+ show progressbar, by default True
203
+ max_workers : int, optional
204
+ maximum number of workers, by default None
205
+ chunksize : int, optional
206
+ chunksize for parallel processing, by default None
207
+ desc : str, optional
208
+ description for progressbar, by default ""
185
209
  """
186
210
 
187
211
  def set_check_model_series_values(self, b: bool):
@@ -308,7 +332,8 @@ class BaseConnector(ABC):
308
332
  self._clear_cache(libname)
309
333
  else:
310
334
  raise ItemInLibraryException(
311
- f"Item with name '{name}' already" f" in '{libname}' library!"
335
+ f"Time series with name '{name}' already in '{libname}' library! "
336
+ "Use overwrite=True to replace existing time series."
312
337
  )
313
338
 
314
339
  def _update_series(
@@ -551,7 +576,8 @@ class BaseConnector(ABC):
551
576
  )
552
577
  else:
553
578
  raise ItemInLibraryException(
554
- f"Model with name '{name}' " "already in 'models' library!"
579
+ f"Model with name '{name}' already in 'models' library! "
580
+ "Use overwrite=True to replace existing model."
555
581
  )
556
582
  self._clear_cache("_modelnames_cache")
557
583
  self._add_oseries_model_links(str(mldict["oseries"]["name"]), name)
@@ -1069,8 +1095,7 @@ class BaseConnector(ABC):
1069
1095
  """
1070
1096
  if prompt:
1071
1097
  ui = input(
1072
- f"Do you want to empty '{libname}'"
1073
- " library of all its contents? [y/N] "
1098
+ f"Do you want to empty '{libname}' library of all its contents? [y/N] "
1074
1099
  )
1075
1100
  if ui.lower() != "y":
1076
1101
  return
@@ -1091,7 +1116,7 @@ class BaseConnector(ABC):
1091
1116
  ):
1092
1117
  self._del_item(libname, name)
1093
1118
  self._clear_cache(libname)
1094
- print(f"Emptied library {libname} in {self.name}: " f"{self.__class__}")
1119
+ print(f"Emptied library {libname} in {self.name}: {self.__class__}")
1095
1120
 
1096
1121
  def _iter_series(self, libname: str, names: Optional[List[str]] = None):
1097
1122
  """Iterate over time series in library (internal method).
@@ -1111,8 +1136,8 @@ class BaseConnector(ABC):
1111
1136
  time series contained in library
1112
1137
  """
1113
1138
  names = self._parse_names(names, libname)
1114
- for nam in names:
1115
- yield self._get_series(libname, nam, progressbar=False)
1139
+ for name in names:
1140
+ yield self._get_series(libname, name, progressbar=False)
1116
1141
 
1117
1142
  def iter_oseries(self, names: Optional[List[str]] = None):
1118
1143
  """Iterate over oseries in library.
@@ -1338,633 +1363,39 @@ class BaseConnector(ABC):
1338
1363
  return d
1339
1364
 
1340
1365
 
1341
- class ConnectorUtil:
1342
- """Mix-in class for general Connector helper functions.
1343
-
1344
- Only for internal methods, and not methods that are related to CRUD operations on
1345
- database.
1346
- """
1347
-
1348
- def _parse_names(
1349
- self,
1350
- names: Optional[Union[list, str]] = None,
1351
- libname: Optional[str] = "oseries",
1352
- ) -> list:
1353
- """Parse names kwarg, returns iterable with name(s) (internal method).
1354
-
1355
- Parameters
1356
- ----------
1357
- names : Union[list, str], optional
1358
- str or list of str or None or 'all' (last two options
1359
- retrieves all names)
1360
- libname : str, optional
1361
- name of library, default is 'oseries'
1362
-
1363
- Returns
1364
- -------
1365
- list
1366
- list of names
1367
- """
1368
- if not isinstance(names, str) and isinstance(names, Iterable):
1369
- return names
1370
- elif isinstance(names, str) and names != "all":
1371
- return [names]
1372
- elif names is None or names == "all":
1373
- if libname == "oseries":
1374
- return self.oseries_names
1375
- elif libname == "stresses":
1376
- return self.stresses_names
1377
- elif libname == "models":
1378
- return self.model_names
1379
- elif libname == "oseries_models":
1380
- return self.oseries_with_models
1381
- else:
1382
- raise ValueError(f"No library '{libname}'!")
1383
- else:
1384
- raise NotImplementedError(f"Cannot parse 'names': {names}")
1385
-
1386
- @staticmethod
1387
- def _meta_list_to_frame(metalist: list, names: list):
1388
- """Convert list of metadata dictionaries to DataFrame.
1389
-
1390
- Parameters
1391
- ----------
1392
- metalist : list
1393
- list of metadata dictionaries
1394
- names : list
1395
- list of names corresponding to data in metalist
1396
-
1397
- Returns
1398
- -------
1399
- pandas.DataFrame
1400
- DataFrame containing overview of metadata
1401
- """
1402
- # convert to dataframe
1403
- if len(metalist) > 1:
1404
- meta = pd.DataFrame(metalist)
1405
- if len({"x", "y"}.difference(meta.columns)) == 0:
1406
- meta["x"] = meta["x"].astype(float)
1407
- meta["y"] = meta["y"].astype(float)
1408
- elif len(metalist) == 1:
1409
- meta = pd.DataFrame(metalist)
1410
- elif len(metalist) == 0:
1411
- meta = pd.DataFrame()
1412
-
1413
- meta.index = names
1414
- meta.index.name = "name"
1415
- return meta
1416
-
1417
- def _parse_model_dict(self, mdict: dict, update_ts_settings: bool = False):
1418
- """Parse dictionary describing pastas models (internal method).
1419
-
1420
- Parameters
1421
- ----------
1422
- mdict : dict
1423
- dictionary describing pastas.Model
1424
- update_ts_settings : bool, optional
1425
- update stored tmin and tmax in time series settings
1426
- based on time series loaded from store.
1427
-
1428
- Returns
1429
- -------
1430
- ml : pastas.Model
1431
- time series analysis model
1432
- """
1433
- PASFILE_LEQ_022 = parse_version(
1434
- mdict["file_info"]["pastas_version"]
1435
- ) <= parse_version("0.22.0")
1436
-
1437
- # oseries
1438
- if "series" not in mdict["oseries"]:
1439
- name = str(mdict["oseries"]["name"])
1440
- if name not in self.oseries.index:
1441
- msg = "oseries '{}' not present in library".format(name)
1442
- raise LookupError(msg)
1443
- mdict["oseries"]["series"] = self.get_oseries(name).squeeze()
1444
- # update tmin/tmax from time series
1445
- if update_ts_settings:
1446
- mdict["oseries"]["settings"]["tmin"] = mdict["oseries"]["series"].index[
1447
- 0
1448
- ]
1449
- mdict["oseries"]["settings"]["tmax"] = mdict["oseries"]["series"].index[
1450
- -1
1451
- ]
1452
-
1453
- # StressModel, WellModel
1454
- for ts in mdict["stressmodels"].values():
1455
- if "stress" in ts.keys():
1456
- # WellModel
1457
- classkey = "stressmodel" if PASFILE_LEQ_022 else "class"
1458
- if ts[classkey] == "WellModel":
1459
- for stress in ts["stress"]:
1460
- if "series" not in stress:
1461
- name = str(stress["name"])
1462
- if name in self.stresses.index:
1463
- stress["series"] = self.get_stresses(name).squeeze()
1464
- # update tmin/tmax from time series
1465
- if update_ts_settings:
1466
- stress["settings"]["tmin"] = stress["series"].index[
1467
- 0
1468
- ]
1469
- stress["settings"]["tmax"] = stress["series"].index[
1470
- -1
1471
- ]
1472
- # StressModel
1473
- else:
1474
- for stress in ts["stress"] if PASFILE_LEQ_022 else [ts["stress"]]:
1475
- if "series" not in stress:
1476
- name = str(stress["name"])
1477
- if name in self.stresses.index:
1478
- stress["series"] = self.get_stresses(name).squeeze()
1479
- # update tmin/tmax from time series
1480
- if update_ts_settings:
1481
- stress["settings"]["tmin"] = stress["series"].index[
1482
- 0
1483
- ]
1484
- stress["settings"]["tmax"] = stress["series"].index[
1485
- -1
1486
- ]
1487
-
1488
- # RechargeModel, TarsoModel
1489
- if ("prec" in ts.keys()) and ("evap" in ts.keys()):
1490
- for stress in [ts["prec"], ts["evap"]]:
1491
- if "series" not in stress:
1492
- name = str(stress["name"])
1493
- if name in self.stresses.index:
1494
- stress["series"] = self.get_stresses(name).squeeze()
1495
- # update tmin/tmax from time series
1496
- if update_ts_settings:
1497
- stress["settings"]["tmin"] = stress["series"].index[0]
1498
- stress["settings"]["tmax"] = stress["series"].index[-1]
1499
- else:
1500
- msg = "stress '{}' not present in library".format(name)
1501
- raise KeyError(msg)
1502
-
1503
- # hack for pcov w dtype object (when filled with NaNs on store?)
1504
- if "fit" in mdict:
1505
- if "pcov" in mdict["fit"]:
1506
- pcov = mdict["fit"]["pcov"]
1507
- if pcov.dtypes.apply(lambda dtyp: isinstance(dtyp, object)).any():
1508
- mdict["fit"]["pcov"] = pcov.astype(float)
1509
-
1510
- # check pastas version vs pas-file version
1511
- file_version = mdict["file_info"]["pastas_version"]
1512
-
1513
- # check file version and pastas version
1514
- # if file<0.23 and pastas>=1.0 --> error
1515
- PASTAS_GT_023 = parse_version(ps.__version__) > parse_version("0.23.1")
1516
- if PASFILE_LEQ_022 and PASTAS_GT_023:
1517
- raise UserWarning(
1518
- f"This file was created with Pastas v{file_version} "
1519
- f"and cannot be loaded with Pastas v{ps.__version__} Please load and "
1520
- "save the file with Pastas 0.23 first to update the file "
1521
- "format."
1522
- )
1523
-
1524
- try:
1525
- # pastas>=0.15.0
1526
- ml = ps.io.base._load_model(mdict)
1527
- except AttributeError:
1528
- # pastas<0.15.0
1529
- ml = ps.io.base.load_model(mdict)
1530
- return ml
1531
-
1532
- @staticmethod
1533
- def _validate_input_series(series):
1534
- """Check if series is pandas.DataFrame or pandas.Series.
1535
-
1536
- Parameters
1537
- ----------
1538
- series : object
1539
- object to validate
1540
-
1541
- Raises
1542
- ------
1543
- TypeError
1544
- if object is not of type pandas.DataFrame or pandas.Series
1545
- """
1546
- if not (isinstance(series, pd.DataFrame) or isinstance(series, pd.Series)):
1547
- raise TypeError("Please provide pandas.DataFrame or pandas.Series!")
1548
- if isinstance(series, pd.DataFrame):
1549
- if series.columns.size > 1:
1550
- raise ValueError("Only DataFrames with one column are supported!")
1551
-
1552
- @staticmethod
1553
- def _set_series_name(series, name):
1554
- """Set series name to match user defined name in store.
1555
-
1556
- Parameters
1557
- ----------
1558
- series : pandas.Series or pandas.DataFrame
1559
- set name for this time series
1560
- name : str
1561
- name of the time series (used in the pastastore)
1562
- """
1563
- if isinstance(series, pd.Series):
1564
- series.name = name
1565
- # empty string on index name causes trouble when reading
1566
- # data from ArcticDB: TODO: check if still an issue?
1567
- if series.index.name == "":
1568
- series.index.name = None
1569
-
1570
- if isinstance(series, pd.DataFrame):
1571
- series.columns = [name]
1572
- # check for hydropandas objects which are instances of DataFrame but
1573
- # do have a name attribute
1574
- if hasattr(series, "name"):
1575
- series.name = name
1576
- return series
1577
-
1578
- @staticmethod
1579
- def _check_stressmodels_supported(ml):
1580
- supported_stressmodels = [
1581
- "StressModel",
1582
- "StressModel2",
1583
- "RechargeModel",
1584
- "WellModel",
1585
- "TarsoModel",
1586
- "Constant",
1587
- "LinearTrend",
1588
- "StepModel",
1589
- ]
1590
- if isinstance(ml, ps.Model):
1591
- smtyps = [sm._name for sm in ml.stressmodels.values()]
1592
- elif isinstance(ml, dict):
1593
- classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
1594
- smtyps = [sm[classkey] for sm in ml["stressmodels"].values()]
1595
- check = isin(smtyps, supported_stressmodels)
1596
- if not all(check):
1597
- unsupported = set(smtyps) - set(supported_stressmodels)
1598
- raise NotImplementedError(
1599
- "PastaStore does not support storing models with the "
1600
- f"following stressmodels: {unsupported}"
1601
- )
1602
-
1603
- @staticmethod
1604
- def _check_model_series_names_for_store(ml):
1605
- prec_evap_model = ["RechargeModel", "TarsoModel"]
1606
-
1607
- if isinstance(ml, ps.Model):
1608
- series_names = [
1609
- istress.series.name
1610
- for sm in ml.stressmodels.values()
1611
- for istress in sm.stress
1612
- ]
1613
-
1614
- elif isinstance(ml, dict):
1615
- # non RechargeModel, Tarsomodel, WellModel stressmodels
1616
- classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
1617
- if PASTAS_LEQ_022:
1618
- series_names = [
1619
- istress["name"]
1620
- for sm in ml["stressmodels"].values()
1621
- if sm[classkey] not in (prec_evap_model + ["WellModel"])
1622
- for istress in sm["stress"]
1623
- ]
1624
- else:
1625
- series_names = [
1626
- sm["stress"]["name"]
1627
- for sm in ml["stressmodels"].values()
1628
- if sm[classkey] not in (prec_evap_model + ["WellModel"])
1629
- ]
1630
-
1631
- # WellModel
1632
- if isin(
1633
- ["WellModel"],
1634
- [i[classkey] for i in ml["stressmodels"].values()],
1635
- ).any():
1636
- series_names += [
1637
- istress["name"]
1638
- for sm in ml["stressmodels"].values()
1639
- if sm[classkey] in ["WellModel"]
1640
- for istress in sm["stress"]
1641
- ]
1642
-
1643
- # RechargeModel, TarsoModel
1644
- if isin(
1645
- prec_evap_model,
1646
- [i[classkey] for i in ml["stressmodels"].values()],
1647
- ).any():
1648
- series_names += [
1649
- istress["name"]
1650
- for sm in ml["stressmodels"].values()
1651
- if sm[classkey] in prec_evap_model
1652
- for istress in [sm["prec"], sm["evap"]]
1653
- ]
1654
-
1655
- else:
1656
- raise TypeError("Expected pastas.Model or dict!")
1657
- if len(series_names) - len(set(series_names)) > 0:
1658
- msg = (
1659
- "There are multiple stresses series with the same name! "
1660
- "Each series name must be unique for the PastaStore!"
1661
- )
1662
- raise ValueError(msg)
1663
-
1664
- def _check_oseries_in_store(self, ml: Union[ps.Model, dict]):
1665
- """Check if Model oseries are contained in PastaStore (internal method).
1666
-
1667
- Parameters
1668
- ----------
1669
- ml : Union[ps.Model, dict]
1670
- pastas Model
1671
- """
1672
- if isinstance(ml, ps.Model):
1673
- name = ml.oseries.name
1674
- elif isinstance(ml, dict):
1675
- name = str(ml["oseries"]["name"])
1676
- else:
1677
- raise TypeError("Expected pastas.Model or dict!")
1678
- if name not in self.oseries.index:
1679
- msg = (
1680
- f"Cannot add model because oseries '{name}' "
1681
- "is not contained in store."
1682
- )
1683
- raise LookupError(msg)
1684
- # expensive check
1685
- if self.CHECK_MODEL_SERIES_VALUES and isinstance(ml, ps.Model):
1686
- s_org = self.get_oseries(name).squeeze().dropna()
1687
- if PASTAS_LEQ_022:
1688
- so = ml.oseries.series_original
1689
- else:
1690
- so = ml.oseries._series_original
1691
- try:
1692
- assert_series_equal(
1693
- so.dropna(),
1694
- s_org,
1695
- atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
1696
- rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
1697
- )
1698
- except AssertionError as e:
1699
- raise ValueError(
1700
- f"Cannot add model because model oseries '{name}'"
1701
- " is different from stored oseries! See stacktrace for differences."
1702
- ) from e
1703
-
1704
- def _check_stresses_in_store(self, ml: Union[ps.Model, dict]):
1705
- """Check if stresses time series are contained in PastaStore (internal method).
1706
-
1707
- Parameters
1708
- ----------
1709
- ml : Union[ps.Model, dict]
1710
- pastas Model
1711
- """
1712
- prec_evap_model = ["RechargeModel", "TarsoModel"]
1713
- if isinstance(ml, ps.Model):
1714
- for sm in ml.stressmodels.values():
1715
- if sm._name in prec_evap_model:
1716
- stresses = [sm.prec, sm.evap]
1717
- else:
1718
- stresses = sm.stress
1719
- for s in stresses:
1720
- if str(s.name) not in self.stresses.index:
1721
- msg = (
1722
- f"Cannot add model because stress '{s.name}' "
1723
- "is not contained in store."
1724
- )
1725
- raise LookupError(msg)
1726
- if self.CHECK_MODEL_SERIES_VALUES:
1727
- s_org = self.get_stresses(s.name).squeeze()
1728
- if PASTAS_LEQ_022:
1729
- so = s.series_original
1730
- else:
1731
- so = s._series_original
1732
- try:
1733
- assert_series_equal(
1734
- so,
1735
- s_org,
1736
- atol=self.SERIES_EQUALITY_ABSOLUTE_TOLERANCE,
1737
- rtol=self.SERIES_EQUALITY_RELATIVE_TOLERANCE,
1738
- )
1739
- except AssertionError as e:
1740
- raise ValueError(
1741
- f"Cannot add model because model stress "
1742
- f"'{s.name}' is different from stored stress! "
1743
- "See stacktrace for differences."
1744
- ) from e
1745
- elif isinstance(ml, dict):
1746
- for sm in ml["stressmodels"].values():
1747
- classkey = "stressmodel" if PASTAS_LEQ_022 else "class"
1748
- if sm[classkey] in prec_evap_model:
1749
- stresses = [sm["prec"], sm["evap"]]
1750
- elif sm[classkey] in ["WellModel"]:
1751
- stresses = sm["stress"]
1752
- else:
1753
- stresses = sm["stress"] if PASTAS_LEQ_022 else [sm["stress"]]
1754
- for s in stresses:
1755
- if str(s["name"]) not in self.stresses.index:
1756
- msg = (
1757
- f"Cannot add model because stress '{s['name']}' "
1758
- "is not contained in store."
1759
- )
1760
- raise LookupError(msg)
1761
- else:
1762
- raise TypeError("Expected pastas.Model or dict!")
1763
-
1764
- def _stored_series_to_json(
1765
- self,
1766
- libname: str,
1767
- names: Optional[Union[list, str]] = None,
1768
- squeeze: bool = True,
1769
- progressbar: bool = False,
1770
- ):
1771
- """Write stored series to JSON.
1772
-
1773
- Parameters
1774
- ----------
1775
- libname : str
1776
- library name
1777
- names : Optional[Union[list, str]], optional
1778
- names of series, by default None
1779
- squeeze : bool, optional
1780
- return single entry as json string instead
1781
- of list, by default True
1782
- progressbar : bool, optional
1783
- show progressbar, by default False
1784
-
1785
- Returns
1786
- -------
1787
- files : list or str
1788
- list of series converted to JSON string or single string
1789
- if single entry is returned and squeeze is True
1790
- """
1791
- names = self._parse_names(names, libname=libname)
1792
- files = []
1793
- for n in tqdm(names, desc=libname) if progressbar else names:
1794
- s = self._get_series(libname, n, progressbar=False)
1795
- if isinstance(s, pd.Series):
1796
- s = s.to_frame()
1797
- try:
1798
- sjson = s.to_json(orient="columns")
1799
- except ValueError as e:
1800
- msg = (
1801
- f"DatetimeIndex of '{n}' probably contains NaT "
1802
- "or duplicate timestamps!"
1803
- )
1804
- raise ValueError(msg) from e
1805
- files.append(sjson)
1806
- if len(files) == 1 and squeeze:
1807
- return files[0]
1808
- else:
1809
- return files
1810
-
1811
- def _stored_metadata_to_json(
1812
- self,
1813
- libname: str,
1814
- names: Optional[Union[list, str]] = None,
1815
- squeeze: bool = True,
1816
- progressbar: bool = False,
1817
- ):
1818
- """Write metadata from stored series to JSON.
1819
-
1820
- Parameters
1821
- ----------
1822
- libname : str
1823
- library containing series
1824
- names : Optional[Union[list, str]], optional
1825
- names to parse, by default None
1826
- squeeze : bool, optional
1827
- return single entry as json string instead of list, by default True
1828
- progressbar : bool, optional
1829
- show progressbar, by default False
1830
-
1831
- Returns
1832
- -------
1833
- files : list or str
1834
- list of json string
1835
- """
1836
- names = self._parse_names(names, libname=libname)
1837
- files = []
1838
- for n in tqdm(names, desc=libname) if progressbar else names:
1839
- meta = self.get_metadata(libname, n, as_frame=False)
1840
- meta_json = json.dumps(meta, cls=PastasEncoder, indent=4)
1841
- files.append(meta_json)
1842
- if len(files) == 1 and squeeze:
1843
- return files[0]
1844
- else:
1845
- return files
1846
-
1847
- def _series_to_archive(
1848
- self,
1849
- archive,
1850
- libname: str,
1851
- names: Optional[Union[list, str]] = None,
1852
- progressbar: bool = True,
1853
- ):
1854
- """Write DataFrame or Series to zipfile (internal method).
1855
-
1856
- Parameters
1857
- ----------
1858
- archive : zipfile.ZipFile
1859
- reference to an archive to write data to
1860
- libname : str
1861
- name of the library to write to zipfile
1862
- names : str or list of str, optional
1863
- names of the time series to write to archive, by default None,
1864
- which writes all time series to archive
1865
- progressbar : bool, optional
1866
- show progressbar, by default True
1867
- """
1868
- names = self._parse_names(names, libname=libname)
1869
- for n in tqdm(names, desc=libname) if progressbar else names:
1870
- sjson = self._stored_series_to_json(
1871
- libname, names=n, progressbar=False, squeeze=True
1872
- )
1873
- meta_json = self._stored_metadata_to_json(
1874
- libname, names=n, progressbar=False, squeeze=True
1875
- )
1876
- archive.writestr(f"{libname}/{n}.json", sjson)
1877
- archive.writestr(f"{libname}/{n}_meta.json", meta_json)
1366
+ class ModelAccessor:
1367
+ """Object for managing access to stored models.
1878
1368
 
1879
- def _models_to_archive(self, archive, names=None, progressbar=True):
1880
- """Write pastas.Model to zipfile (internal method).
1369
+ The ModelAccessor object allows dictionary-like assignment and access to models.
1370
+ In addition it provides some useful utilities for working with stored models
1371
+ in the database.
1881
1372
 
1882
- Parameters
1883
- ----------
1884
- archive : zipfile.ZipFile
1885
- reference to an archive to write data to
1886
- names : str or list of str, optional
1887
- names of the models to write to archive, by default None,
1888
- which writes all models to archive
1889
- progressbar : bool, optional
1890
- show progressbar, by default True
1891
- """
1892
- names = self._parse_names(names, libname="models")
1893
- for n in tqdm(names, desc="models") if progressbar else names:
1894
- m = self.get_models(n, return_dict=True)
1895
- jsondict = json.dumps(m, cls=PastasEncoder, indent=4)
1896
- archive.writestr(f"models/{n}.pas", jsondict)
1373
+ Examples
1374
+ --------
1375
+ Get a model by name::
1897
1376
 
1898
- @staticmethod
1899
- def _series_from_json(fjson: str, squeeze: bool = True):
1900
- """Load time series from JSON.
1377
+ >>> model = pstore.models["my_model"]
1901
1378
 
1902
- Parameters
1903
- ----------
1904
- fjson : str
1905
- path to file
1906
- squeeze : bool, optional
1907
- squeeze time series object to obtain pandas Series
1379
+ Store a model in the database::
1908
1380
 
1909
- Returns
1910
- -------
1911
- s : pd.DataFrame
1912
- DataFrame containing time series
1913
- """
1914
- s = pd.read_json(fjson, orient="columns", precise_float=True, dtype=False)
1915
- if not isinstance(s.index, pd.DatetimeIndex):
1916
- s.index = pd.to_datetime(s.index, unit="ms")
1917
- s = s.sort_index() # needed for some reason ...
1918
- if squeeze:
1919
- return s.squeeze()
1920
- return s
1381
+ >>> pstore.models["my_model_v2"] = model
1921
1382
 
1922
- @staticmethod
1923
- def _metadata_from_json(fjson: str):
1924
- """Load metadata dictionary from JSON.
1383
+ Get model metadata dataframe::
1925
1384
 
1926
- Parameters
1927
- ----------
1928
- fjson : str
1929
- path to file
1385
+ >>> pstore.models.metadata
1930
1386
 
1931
- Returns
1932
- -------
1933
- meta : dict
1934
- dictionary containing metadata
1935
- """
1936
- with open(fjson, "r") as f:
1937
- meta = json.load(f)
1938
- return meta
1387
+ Number of models::
1939
1388
 
1940
- def _get_model_orphans(self):
1941
- """Get models whose oseries no longer exist in database.
1389
+ >>> len(pstore.models)
1942
1390
 
1943
- Returns
1944
- -------
1945
- dict
1946
- dictionary with oseries names as keys and lists of model names
1947
- as values
1948
- """
1949
- d = {}
1950
- for mlnam in tqdm(self.model_names, desc="Identifying model orphans"):
1951
- mdict = self.get_models(mlnam, return_dict=True)
1952
- onam = mdict["oseries"]["name"]
1953
- if onam not in self.oseries_names:
1954
- if onam in d:
1955
- d[onam] = d[onam].append(mlnam)
1956
- else:
1957
- d[onam] = [mlnam]
1958
- return d
1391
+ Random model::
1959
1392
 
1393
+ >>> model = pstore.models.random()
1960
1394
 
1961
- class ModelAccessor:
1962
- """Object for managing access to stored models.
1395
+ Iterate over stored models::
1963
1396
 
1964
- Provides dict-like access to models (i.e. PastaStore.models["model1"]), or allows
1965
- adding models to the PastaStore using dict-like assignment (i.e.
1966
- PastaStore.models["model1"] = ml), and it can serve as an iterator (i.e. [ml for ml
1967
- in pstore.models]).
1397
+ >>> for ml in pstore.models:
1398
+ >>> ml.solve()
1968
1399
  """
1969
1400
 
1970
1401
  def __init__(self, conn):
@@ -1978,8 +1409,11 @@ class ModelAccessor:
1978
1409
  self.conn = conn
1979
1410
 
1980
1411
  def __repr__(self):
1981
- """Representation of the object is a list of modelnames."""
1982
- return self.conn._modelnames_cache.__repr__()
1412
+ """Representation contains the number of models and the list of model names."""
1413
+ return (
1414
+ f"<{self.__class__.__name__}> {len(self)} model(s): \n"
1415
+ + self.conn._modelnames_cache.__repr__()
1416
+ )
1983
1417
 
1984
1418
  def __getitem__(self, name: str):
1985
1419
  """Get model from store with model name as key.
@@ -2032,3 +1466,27 @@ class ModelAccessor:
2032
1466
  from random import choice
2033
1467
 
2034
1468
  return self.conn.get_models(choice(self.conn._modelnames_cache))
1469
+
1470
+ @property
1471
+ def metadata(self):
1472
+ """Dataframe with overview of models metadata."""
1473
+ # NOTE: cannot be cached as this dataframe is not a property of the connector
1474
+ # I'm not sure how to clear this cache when models are added/removed.
1475
+ idx = pd.MultiIndex.from_tuples(
1476
+ ((k, i) for k, v in self.conn.oseries_models.items() for i in v),
1477
+ names=["oseries", "modelname"],
1478
+ )
1479
+ modeldf = pd.DataFrame(index=idx)
1480
+ modeldf = modeldf.join(
1481
+ self.conn.oseries, on=modeldf.index.get_level_values(0)
1482
+ ).drop("key_0", axis=1)
1483
+ modeldf["n_stressmodels"] = 0
1484
+ for onam, mlnam in modeldf.index:
1485
+ mldict = self.conn.get_models(mlnam, return_dict=True)
1486
+ modeldf.loc[(onam, mlnam), "n_stressmodels"] = len(mldict["stressmodels"])
1487
+ modeldf.loc[(onam, mlnam), "stressmodel_names"] = ",".join(
1488
+ list(mldict["stressmodels"].keys())
1489
+ )
1490
+ for setting in mldict["settings"].keys():
1491
+ modeldf.loc[(onam, mlnam), setting] = mldict["settings"][setting]
1492
+ return modeldf