flixopt 2.2.0rc2__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flixopt/__init__.py +33 -4
- flixopt/aggregation.py +60 -80
- flixopt/calculation.py +403 -182
- flixopt/commons.py +1 -10
- flixopt/components.py +939 -448
- flixopt/config.py +553 -191
- flixopt/core.py +513 -846
- flixopt/effects.py +644 -178
- flixopt/elements.py +610 -355
- flixopt/features.py +394 -966
- flixopt/flow_system.py +736 -219
- flixopt/interface.py +1104 -302
- flixopt/io.py +103 -79
- flixopt/linear_converters.py +387 -95
- flixopt/modeling.py +757 -0
- flixopt/network_app.py +73 -39
- flixopt/plotting.py +294 -138
- flixopt/results.py +1254 -300
- flixopt/solvers.py +25 -21
- flixopt/structure.py +938 -396
- flixopt/utils.py +36 -12
- flixopt-3.0.1.dist-info/METADATA +209 -0
- flixopt-3.0.1.dist-info/RECORD +26 -0
- flixopt-3.0.1.dist-info/top_level.txt +1 -0
- docs/examples/00-Minimal Example.md +0 -5
- docs/examples/01-Basic Example.md +0 -5
- docs/examples/02-Complex Example.md +0 -10
- docs/examples/03-Calculation Modes.md +0 -5
- docs/examples/index.md +0 -5
- docs/faq/contribute.md +0 -61
- docs/faq/index.md +0 -3
- docs/images/architecture_flixOpt-pre2.0.0.png +0 -0
- docs/images/architecture_flixOpt.png +0 -0
- docs/images/flixopt-icon.svg +0 -1
- docs/javascripts/mathjax.js +0 -18
- docs/user-guide/Mathematical Notation/Bus.md +0 -33
- docs/user-guide/Mathematical Notation/Effects, Penalty & Objective.md +0 -132
- docs/user-guide/Mathematical Notation/Flow.md +0 -26
- docs/user-guide/Mathematical Notation/LinearConverter.md +0 -21
- docs/user-guide/Mathematical Notation/Piecewise.md +0 -49
- docs/user-guide/Mathematical Notation/Storage.md +0 -44
- docs/user-guide/Mathematical Notation/index.md +0 -22
- docs/user-guide/Mathematical Notation/others.md +0 -3
- docs/user-guide/index.md +0 -124
- flixopt/config.yaml +0 -10
- flixopt-2.2.0rc2.dist-info/METADATA +0 -167
- flixopt-2.2.0rc2.dist-info/RECORD +0 -54
- flixopt-2.2.0rc2.dist-info/top_level.txt +0 -5
- pics/architecture_flixOpt-pre2.0.0.png +0 -0
- pics/architecture_flixOpt.png +0 -0
- pics/flixOpt_plotting.jpg +0 -0
- pics/flixopt-icon.svg +0 -1
- pics/pics.pptx +0 -0
- scripts/extract_release_notes.py +0 -45
- scripts/gen_ref_pages.py +0 -54
- tests/ressources/Zeitreihen2020.csv +0 -35137
- {flixopt-2.2.0rc2.dist-info → flixopt-3.0.1.dist-info}/WHEEL +0 -0
- {flixopt-2.2.0rc2.dist-info → flixopt-3.0.1.dist-info}/licenses/LICENSE +0 -0
flixopt/core.py
CHANGED
|
@@ -3,12 +3,10 @@ This module contains the core functionality of the flixopt framework.
|
|
|
3
3
|
It provides Datatypes, logging functionality, and some functions to transform data structures.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
import inspect
|
|
7
|
-
import json
|
|
8
6
|
import logging
|
|
9
|
-
import
|
|
10
|
-
from
|
|
11
|
-
from typing import Any,
|
|
7
|
+
import warnings
|
|
8
|
+
from itertools import permutations
|
|
9
|
+
from typing import Any, Literal, Union
|
|
12
10
|
|
|
13
11
|
import numpy as np
|
|
14
12
|
import pandas as pd
|
|
@@ -16,14 +14,17 @@ import xarray as xr
|
|
|
16
14
|
|
|
17
15
|
logger = logging.getLogger('flixopt')
|
|
18
16
|
|
|
19
|
-
Scalar =
|
|
20
|
-
"""A
|
|
17
|
+
Scalar = int | float
|
|
18
|
+
"""A single number, either integer or float."""
|
|
21
19
|
|
|
22
|
-
|
|
23
|
-
"""
|
|
20
|
+
PeriodicDataUser = int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray
|
|
21
|
+
"""User data which has no time dimension. Internally converted to a Scalar or an xr.DataArray without a time dimension."""
|
|
24
22
|
|
|
25
|
-
|
|
26
|
-
"""
|
|
23
|
+
PeriodicData = xr.DataArray
|
|
24
|
+
"""Internally used datatypes for periodic data."""
|
|
25
|
+
|
|
26
|
+
FlowSystemDimensions = Literal['time', 'period', 'scenario']
|
|
27
|
+
"""Possible dimensions of a FlowSystem."""
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class PlausibilityError(Exception):
|
|
@@ -38,941 +39,607 @@ class ConversionError(Exception):
|
|
|
38
39
|
pass
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
class
|
|
42
|
-
"""
|
|
43
|
-
Converts various data types into xarray.DataArray with a timesteps index.
|
|
44
|
-
|
|
45
|
-
Supports: scalars, arrays, Series, DataFrames, and DataArrays.
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
@staticmethod
|
|
49
|
-
def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex) -> xr.DataArray:
|
|
50
|
-
"""Convert data to xarray.DataArray with specified timesteps index."""
|
|
51
|
-
if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0:
|
|
52
|
-
raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}')
|
|
53
|
-
if not timesteps.name == 'time':
|
|
54
|
-
raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}')
|
|
55
|
-
|
|
56
|
-
coords = [timesteps]
|
|
57
|
-
dims = ['time']
|
|
58
|
-
expected_shape = (len(timesteps),)
|
|
42
|
+
class TimeSeriesData(xr.DataArray):
|
|
43
|
+
"""Minimal TimeSeriesData that inherits from xr.DataArray with aggregation metadata."""
|
|
59
44
|
|
|
60
|
-
|
|
61
|
-
if isinstance(data, (int, float, np.integer, np.floating)):
|
|
62
|
-
return xr.DataArray(data, coords=coords, dims=dims)
|
|
63
|
-
elif isinstance(data, pd.DataFrame):
|
|
64
|
-
if not data.index.equals(timesteps):
|
|
65
|
-
raise ConversionError(
|
|
66
|
-
f"DataFrame index doesn't match timesteps index. "
|
|
67
|
-
f'Its missing the following time steps: {timesteps.difference(data.index)}. '
|
|
68
|
-
f'Some parameters might need an extra timestep at the end.'
|
|
69
|
-
)
|
|
70
|
-
if not len(data.columns) == 1:
|
|
71
|
-
raise ConversionError('DataFrame must have exactly one column')
|
|
72
|
-
return xr.DataArray(data.values.flatten(), coords=coords, dims=dims)
|
|
73
|
-
elif isinstance(data, pd.Series):
|
|
74
|
-
if not data.index.equals(timesteps):
|
|
75
|
-
raise ConversionError(
|
|
76
|
-
f"Series index doesn't match timesteps index. "
|
|
77
|
-
f'Its missing the following time steps: {timesteps.difference(data.index)}. '
|
|
78
|
-
f'Some parameters might need an extra timestep at the end.'
|
|
79
|
-
)
|
|
80
|
-
return xr.DataArray(data.values, coords=coords, dims=dims)
|
|
81
|
-
elif isinstance(data, np.ndarray):
|
|
82
|
-
if data.ndim != 1:
|
|
83
|
-
raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}')
|
|
84
|
-
elif data.shape[0] != expected_shape[0]:
|
|
85
|
-
raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}")
|
|
86
|
-
return xr.DataArray(data, coords=coords, dims=dims)
|
|
87
|
-
elif isinstance(data, xr.DataArray):
|
|
88
|
-
if data.dims != tuple(dims):
|
|
89
|
-
raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}")
|
|
90
|
-
if data.sizes[dims[0]] != len(coords[0]):
|
|
91
|
-
raise ConversionError(
|
|
92
|
-
f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}"
|
|
93
|
-
)
|
|
94
|
-
return data.copy(deep=True)
|
|
95
|
-
else:
|
|
96
|
-
raise ConversionError(f'Unsupported type: {type(data).__name__}')
|
|
97
|
-
except Exception as e:
|
|
98
|
-
if isinstance(e, ConversionError):
|
|
99
|
-
raise
|
|
100
|
-
raise ConversionError(f'Converting data {type(data)} to xarray.Dataset raised an error: {str(e)}') from e
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
class TimeSeriesData:
|
|
104
|
-
# TODO: Move to Interface.py
|
|
105
|
-
def __init__(self, data: NumericData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None):
|
|
106
|
-
"""
|
|
107
|
-
timeseries class for transmit timeseries AND special characteristics of timeseries,
|
|
108
|
-
i.g. to define weights needed in calculation_type 'aggregated'
|
|
109
|
-
EXAMPLE solar:
|
|
110
|
-
you have several solar timeseries. These should not be overweighted
|
|
111
|
-
compared to the remaining timeseries (i.g. heat load, price)!
|
|
112
|
-
fixed_relative_profile_solar1 = TimeSeriesData(sol_array_1, type = 'solar')
|
|
113
|
-
fixed_relative_profile_solar2 = TimeSeriesData(sol_array_2, type = 'solar')
|
|
114
|
-
fixed_relative_profile_solar3 = TimeSeriesData(sol_array_3, type = 'solar')
|
|
115
|
-
--> this 3 series of same type share one weight, i.e. internally assigned each weight = 1/3
|
|
116
|
-
(instead of standard weight = 1)
|
|
117
|
-
|
|
118
|
-
Args:
|
|
119
|
-
data: The timeseries data, which can be a scalar, array, or numpy array.
|
|
120
|
-
agg_group: The group this TimeSeriesData is a part of. agg_weight is split between members of a group. Default is None.
|
|
121
|
-
agg_weight: The weight for calculation_type 'aggregated', should be between 0 and 1. Default is None.
|
|
122
|
-
|
|
123
|
-
Raises:
|
|
124
|
-
Exception: If both agg_group and agg_weight are set, an exception is raised.
|
|
125
|
-
"""
|
|
126
|
-
self.data = data
|
|
127
|
-
self.agg_group = agg_group
|
|
128
|
-
self.agg_weight = agg_weight
|
|
129
|
-
if (agg_group is not None) and (agg_weight is not None):
|
|
130
|
-
raise ValueError('Either <agg_group> or explicit <agg_weigth> can be used. Not both!')
|
|
131
|
-
self.label: Optional[str] = None
|
|
132
|
-
|
|
133
|
-
def __repr__(self):
|
|
134
|
-
# Get the constructor arguments and their current values
|
|
135
|
-
init_signature = inspect.signature(self.__init__)
|
|
136
|
-
init_args = init_signature.parameters
|
|
137
|
-
|
|
138
|
-
# Create a dictionary with argument names and their values
|
|
139
|
-
args_str = ', '.join(f'{name}={repr(getattr(self, name, None))}' for name in init_args if name != 'self')
|
|
140
|
-
return f'{self.__class__.__name__}({args_str})'
|
|
141
|
-
|
|
142
|
-
def __str__(self):
|
|
143
|
-
return str(self.data)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
class TimeSeries:
|
|
147
|
-
"""
|
|
148
|
-
A class representing time series data with active and stored states.
|
|
149
|
-
|
|
150
|
-
TimeSeries provides a way to store time-indexed data and work with temporal subsets.
|
|
151
|
-
It supports arithmetic operations, aggregation, and JSON serialization.
|
|
152
|
-
|
|
153
|
-
Attributes:
|
|
154
|
-
name (str): The name of the time series
|
|
155
|
-
aggregation_weight (Optional[float]): Weight used for aggregation
|
|
156
|
-
aggregation_group (Optional[str]): Group name for shared aggregation weighting
|
|
157
|
-
needs_extra_timestep (bool): Whether this series needs an extra timestep
|
|
158
|
-
"""
|
|
159
|
-
|
|
160
|
-
@classmethod
|
|
161
|
-
def from_datasource(
|
|
162
|
-
cls,
|
|
163
|
-
data: NumericData,
|
|
164
|
-
name: str,
|
|
165
|
-
timesteps: pd.DatetimeIndex,
|
|
166
|
-
aggregation_weight: Optional[float] = None,
|
|
167
|
-
aggregation_group: Optional[str] = None,
|
|
168
|
-
needs_extra_timestep: bool = False,
|
|
169
|
-
) -> 'TimeSeries':
|
|
170
|
-
"""
|
|
171
|
-
Initialize the TimeSeries from multiple data sources.
|
|
172
|
-
|
|
173
|
-
Args:
|
|
174
|
-
data: The time series data
|
|
175
|
-
name: The name of the TimeSeries
|
|
176
|
-
timesteps: The timesteps of the TimeSeries
|
|
177
|
-
aggregation_weight: The weight in aggregation calculations
|
|
178
|
-
aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing
|
|
179
|
-
needs_extra_timestep: Whether this series requires an extra timestep
|
|
180
|
-
|
|
181
|
-
Returns:
|
|
182
|
-
A new TimeSeries instance
|
|
183
|
-
"""
|
|
184
|
-
return cls(
|
|
185
|
-
DataConverter.as_dataarray(data, timesteps),
|
|
186
|
-
name,
|
|
187
|
-
aggregation_weight,
|
|
188
|
-
aggregation_group,
|
|
189
|
-
needs_extra_timestep,
|
|
190
|
-
)
|
|
191
|
-
|
|
192
|
-
@classmethod
|
|
193
|
-
def from_json(cls, data: Optional[Dict[str, Any]] = None, path: Optional[str] = None) -> 'TimeSeries':
|
|
194
|
-
"""
|
|
195
|
-
Load a TimeSeries from a dictionary or json file.
|
|
196
|
-
|
|
197
|
-
Args:
|
|
198
|
-
data: Dictionary containing TimeSeries data
|
|
199
|
-
path: Path to a JSON file containing TimeSeries data
|
|
200
|
-
|
|
201
|
-
Returns:
|
|
202
|
-
A new TimeSeries instance
|
|
203
|
-
|
|
204
|
-
Raises:
|
|
205
|
-
ValueError: If both path and data are provided or neither is provided
|
|
206
|
-
"""
|
|
207
|
-
if (path is None and data is None) or (path is not None and data is not None):
|
|
208
|
-
raise ValueError("Exactly one of 'path' or 'data' must be provided")
|
|
209
|
-
|
|
210
|
-
if path is not None:
|
|
211
|
-
with open(path, 'r') as f:
|
|
212
|
-
data = json.load(f)
|
|
213
|
-
|
|
214
|
-
# Convert ISO date strings to datetime objects
|
|
215
|
-
data['data']['coords']['time']['data'] = pd.to_datetime(data['data']['coords']['time']['data'])
|
|
216
|
-
|
|
217
|
-
# Create the TimeSeries instance
|
|
218
|
-
return cls(
|
|
219
|
-
data=xr.DataArray.from_dict(data['data']),
|
|
220
|
-
name=data['name'],
|
|
221
|
-
aggregation_weight=data['aggregation_weight'],
|
|
222
|
-
aggregation_group=data['aggregation_group'],
|
|
223
|
-
needs_extra_timestep=data['needs_extra_timestep'],
|
|
224
|
-
)
|
|
45
|
+
__slots__ = () # No additional instance attributes - everything goes in attrs
|
|
225
46
|
|
|
226
47
|
def __init__(
|
|
227
48
|
self,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
aggregation_weight:
|
|
231
|
-
|
|
232
|
-
|
|
49
|
+
*args: Any,
|
|
50
|
+
aggregation_group: str | None = None,
|
|
51
|
+
aggregation_weight: float | None = None,
|
|
52
|
+
agg_group: str | None = None,
|
|
53
|
+
agg_weight: float | None = None,
|
|
54
|
+
**kwargs: Any,
|
|
233
55
|
):
|
|
234
56
|
"""
|
|
235
|
-
Initialize a TimeSeries with a DataArray.
|
|
236
|
-
|
|
237
|
-
Args:
|
|
238
|
-
data: The DataArray containing time series data
|
|
239
|
-
name: The name of the TimeSeries
|
|
240
|
-
aggregation_weight: The weight in aggregation calculations
|
|
241
|
-
aggregation_group: Group this TimeSeries belongs to for weight sharing
|
|
242
|
-
needs_extra_timestep: Whether this series requires an extra timestep
|
|
243
|
-
|
|
244
|
-
Raises:
|
|
245
|
-
ValueError: If data doesn't have a 'time' index or has more than 1 dimension
|
|
246
|
-
"""
|
|
247
|
-
if 'time' not in data.indexes:
|
|
248
|
-
raise ValueError(f'DataArray must have a "time" index. Got {data.indexes}')
|
|
249
|
-
if data.ndim > 1:
|
|
250
|
-
raise ValueError(f'Number of dimensions of DataArray must be 1. Got {data.ndim}')
|
|
251
|
-
|
|
252
|
-
self.name = name
|
|
253
|
-
self.aggregation_weight = aggregation_weight
|
|
254
|
-
self.aggregation_group = aggregation_group
|
|
255
|
-
self.needs_extra_timestep = needs_extra_timestep
|
|
256
|
-
|
|
257
|
-
# Data management
|
|
258
|
-
self._stored_data = data.copy(deep=True)
|
|
259
|
-
self._backup = self._stored_data.copy(deep=True)
|
|
260
|
-
self._active_timesteps = self._stored_data.indexes['time']
|
|
261
|
-
self._active_data = None
|
|
262
|
-
self._update_active_data()
|
|
263
|
-
|
|
264
|
-
def reset(self):
|
|
265
|
-
"""
|
|
266
|
-
Reset active timesteps to the full set of stored timesteps.
|
|
267
|
-
"""
|
|
268
|
-
self.active_timesteps = None
|
|
269
|
-
|
|
270
|
-
def restore_data(self):
|
|
271
|
-
"""
|
|
272
|
-
Restore stored_data from the backup and reset active timesteps.
|
|
273
|
-
"""
|
|
274
|
-
self._stored_data = self._backup.copy(deep=True)
|
|
275
|
-
self.reset()
|
|
276
|
-
|
|
277
|
-
def to_json(self, path: Optional[pathlib.Path] = None) -> Dict[str, Any]:
|
|
278
|
-
"""
|
|
279
|
-
Save the TimeSeries to a dictionary or JSON file.
|
|
280
|
-
|
|
281
57
|
Args:
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
'aggregation_group'
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
58
|
+
*args: Arguments passed to DataArray
|
|
59
|
+
aggregation_group: Aggregation group name
|
|
60
|
+
aggregation_weight: Aggregation weight (0-1)
|
|
61
|
+
agg_group: Deprecated, use aggregation_group instead
|
|
62
|
+
agg_weight: Deprecated, use aggregation_weight instead
|
|
63
|
+
**kwargs: Additional arguments passed to DataArray
|
|
64
|
+
"""
|
|
65
|
+
if agg_group is not None:
|
|
66
|
+
warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2)
|
|
67
|
+
aggregation_group = agg_group
|
|
68
|
+
if agg_weight is not None:
|
|
69
|
+
warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2)
|
|
70
|
+
aggregation_weight = agg_weight
|
|
71
|
+
|
|
72
|
+
if (aggregation_group is not None) and (aggregation_weight is not None):
|
|
73
|
+
raise ValueError('Use either aggregation_group or aggregation_weight, not both')
|
|
74
|
+
|
|
75
|
+
# Let xarray handle all the initialization complexity
|
|
76
|
+
super().__init__(*args, **kwargs)
|
|
77
|
+
|
|
78
|
+
# Add our metadata to attrs after initialization
|
|
79
|
+
if aggregation_group is not None:
|
|
80
|
+
self.attrs['aggregation_group'] = aggregation_group
|
|
81
|
+
if aggregation_weight is not None:
|
|
82
|
+
self.attrs['aggregation_weight'] = aggregation_weight
|
|
83
|
+
|
|
84
|
+
# Always mark as TimeSeriesData
|
|
85
|
+
self.attrs['__timeseries_data__'] = True
|
|
86
|
+
|
|
87
|
+
def fit_to_coords(
|
|
88
|
+
self,
|
|
89
|
+
coords: dict[str, pd.Index],
|
|
90
|
+
name: str | None = None,
|
|
91
|
+
) -> 'TimeSeriesData':
|
|
92
|
+
"""Fit the data to the given coordinates. Returns a new TimeSeriesData object if the current coords are different."""
|
|
93
|
+
if self.coords.equals(xr.Coordinates(coords)):
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
da = DataConverter.to_dataarray(self.data, coords=coords)
|
|
97
|
+
return self.__class__(
|
|
98
|
+
da,
|
|
99
|
+
aggregation_group=self.aggregation_group,
|
|
100
|
+
aggregation_weight=self.aggregation_weight,
|
|
101
|
+
name=name if name is not None else self.name,
|
|
102
|
+
)
|
|
321
103
|
|
|
322
104
|
@property
|
|
323
|
-
def
|
|
324
|
-
|
|
325
|
-
return np.unique(self.active_data.values).size == 1
|
|
105
|
+
def aggregation_group(self) -> str | None:
|
|
106
|
+
return self.attrs.get('aggregation_group')
|
|
326
107
|
|
|
327
108
|
@property
|
|
328
|
-
def
|
|
329
|
-
|
|
330
|
-
return self._active_timesteps
|
|
109
|
+
def aggregation_weight(self) -> float | None:
|
|
110
|
+
return self.attrs.get('aggregation_weight')
|
|
331
111
|
|
|
332
|
-
@
|
|
333
|
-
def
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
self._active_timesteps = self.stored_data.indexes['time']
|
|
345
|
-
elif isinstance(timesteps, pd.DatetimeIndex):
|
|
346
|
-
self._active_timesteps = timesteps
|
|
347
|
-
else:
|
|
348
|
-
raise TypeError('active_timesteps must be a pandas DatetimeIndex or None')
|
|
349
|
-
|
|
350
|
-
self._update_active_data()
|
|
351
|
-
|
|
352
|
-
@property
|
|
353
|
-
def active_data(self) -> xr.DataArray:
|
|
354
|
-
"""Get a view of stored_data based on active_timesteps."""
|
|
355
|
-
return self._active_data
|
|
356
|
-
|
|
357
|
-
@property
|
|
358
|
-
def stored_data(self) -> xr.DataArray:
|
|
359
|
-
"""Get a copy of the full stored data."""
|
|
360
|
-
return self._stored_data.copy()
|
|
112
|
+
@classmethod
|
|
113
|
+
def from_dataarray(
|
|
114
|
+
cls, da: xr.DataArray, aggregation_group: str | None = None, aggregation_weight: float | None = None
|
|
115
|
+
):
|
|
116
|
+
"""Create TimeSeriesData from DataArray, extracting metadata from attrs."""
|
|
117
|
+
# Get aggregation metadata from attrs or parameters
|
|
118
|
+
final_aggregation_group = (
|
|
119
|
+
aggregation_group if aggregation_group is not None else da.attrs.get('aggregation_group')
|
|
120
|
+
)
|
|
121
|
+
final_aggregation_weight = (
|
|
122
|
+
aggregation_weight if aggregation_weight is not None else da.attrs.get('aggregation_weight')
|
|
123
|
+
)
|
|
361
124
|
|
|
362
|
-
|
|
363
|
-
def stored_data(self, value: NumericData):
|
|
364
|
-
"""
|
|
365
|
-
Update stored_data and refresh active_data.
|
|
125
|
+
return cls(da, aggregation_group=final_aggregation_group, aggregation_weight=final_aggregation_weight)
|
|
366
126
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
"""
|
|
370
|
-
|
|
127
|
+
@classmethod
|
|
128
|
+
def is_timeseries_data(cls, obj) -> bool:
|
|
129
|
+
"""Check if an object is TimeSeriesData."""
|
|
130
|
+
return isinstance(obj, xr.DataArray) and obj.attrs.get('__timeseries_data__', False)
|
|
371
131
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
132
|
+
def __repr__(self):
|
|
133
|
+
agg_info = []
|
|
134
|
+
if self.aggregation_group:
|
|
135
|
+
agg_info.append(f"aggregation_group='{self.aggregation_group}'")
|
|
136
|
+
if self.aggregation_weight is not None:
|
|
137
|
+
agg_info.append(f'aggregation_weight={self.aggregation_weight}')
|
|
375
138
|
|
|
376
|
-
|
|
377
|
-
|
|
139
|
+
info_str = f'TimeSeriesData({", ".join(agg_info)})' if agg_info else 'TimeSeriesData'
|
|
140
|
+
return f'{info_str}\n{super().__repr__()}'
|
|
378
141
|
|
|
379
142
|
@property
|
|
380
|
-
def
|
|
381
|
-
|
|
143
|
+
def agg_group(self):
|
|
144
|
+
warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2)
|
|
145
|
+
return self.aggregation_group
|
|
382
146
|
|
|
383
147
|
@property
|
|
384
|
-
def
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
def _apply_operation(self, other, op):
|
|
388
|
-
"""Apply an operation between this TimeSeries and another object."""
|
|
389
|
-
if isinstance(other, TimeSeries):
|
|
390
|
-
other = other.active_data
|
|
391
|
-
return op(self.active_data, other)
|
|
148
|
+
def agg_weight(self):
|
|
149
|
+
warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2)
|
|
150
|
+
return self.aggregation_weight
|
|
392
151
|
|
|
393
|
-
def __add__(self, other):
|
|
394
|
-
return self._apply_operation(other, lambda x, y: x + y)
|
|
395
152
|
|
|
396
|
-
|
|
397
|
-
|
|
153
|
+
TemporalDataUser = (
|
|
154
|
+
int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray | TimeSeriesData
|
|
155
|
+
)
|
|
156
|
+
"""User data which might have a time dimension. Internally converted to an xr.DataArray with time dimension."""
|
|
398
157
|
|
|
399
|
-
|
|
400
|
-
|
|
158
|
+
TemporalData = xr.DataArray | TimeSeriesData
|
|
159
|
+
"""Internally used datatypes for temporal data (data with a time dimension)."""
|
|
401
160
|
|
|
402
|
-
def __truediv__(self, other):
|
|
403
|
-
return self._apply_operation(other, lambda x, y: x / y)
|
|
404
161
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
return abs(self.active_data)
|
|
425
|
-
|
|
426
|
-
def __gt__(self, other):
|
|
427
|
-
"""
|
|
428
|
-
Compare if this TimeSeries is greater than another.
|
|
429
|
-
|
|
430
|
-
Args:
|
|
431
|
-
other: Another TimeSeries to compare with
|
|
162
|
+
class DataConverter:
|
|
163
|
+
"""
|
|
164
|
+
Converts various data types into xarray.DataArray with specified target coordinates.
|
|
165
|
+
|
|
166
|
+
This converter handles intelligent dimension matching and broadcasting to ensure
|
|
167
|
+
the output DataArray always conforms to the specified coordinate structure.
|
|
168
|
+
|
|
169
|
+
Supported input types:
|
|
170
|
+
- Scalars: int, float, np.number (broadcast to all target dimensions)
|
|
171
|
+
- 1D data: np.ndarray, pd.Series, single-column DataFrame (matched by length/index)
|
|
172
|
+
- Multi-dimensional arrays: np.ndarray, DataFrame (matched by shape)
|
|
173
|
+
- xr.DataArray: validated and potentially broadcast to target dimensions
|
|
174
|
+
|
|
175
|
+
The converter uses smart matching strategies:
|
|
176
|
+
- Series: matched by exact index comparison
|
|
177
|
+
- 1D arrays: matched by length to target coordinates
|
|
178
|
+
- Multi-dimensional arrays: matched by shape permutation analysis
|
|
179
|
+
- DataArrays: validated for compatibility and broadcast as needed
|
|
180
|
+
"""
|
|
432
181
|
|
|
433
|
-
|
|
434
|
-
|
|
182
|
+
@staticmethod
|
|
183
|
+
def _match_series_by_index_alignment(
|
|
184
|
+
data: pd.Series, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
|
|
185
|
+
) -> xr.DataArray:
|
|
435
186
|
"""
|
|
436
|
-
|
|
437
|
-
return self.active_data > other.active_data
|
|
438
|
-
return self.active_data > other
|
|
187
|
+
Match pandas Series to target dimension by exact index comparison.
|
|
439
188
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
Compare if this TimeSeries is greater than or equal to another.
|
|
189
|
+
Attempts to find a target dimension whose coordinates exactly match
|
|
190
|
+
the Series index values, ensuring proper alignment.
|
|
443
191
|
|
|
444
192
|
Args:
|
|
445
|
-
|
|
193
|
+
data: pandas Series to convert
|
|
194
|
+
target_coords: Available target coordinates {dim_name: coordinate_index}
|
|
195
|
+
target_dims: Target dimension names to consider for matching
|
|
446
196
|
|
|
447
197
|
Returns:
|
|
448
|
-
|
|
449
|
-
"""
|
|
450
|
-
if isinstance(other, TimeSeries):
|
|
451
|
-
return self.active_data >= other.active_data
|
|
452
|
-
return self.active_data >= other
|
|
453
|
-
|
|
454
|
-
def __lt__(self, other):
|
|
455
|
-
"""
|
|
456
|
-
Compare if this TimeSeries is less than another.
|
|
198
|
+
DataArray with Series matched to the appropriate dimension
|
|
457
199
|
|
|
458
|
-
|
|
459
|
-
|
|
200
|
+
Raises:
|
|
201
|
+
ConversionError: If Series cannot be matched to any target dimension,
|
|
202
|
+
or if no target dimensions provided for multi-element Series
|
|
203
|
+
"""
|
|
204
|
+
# Handle edge case: no target dimensions
|
|
205
|
+
if len(target_dims) == 0:
|
|
206
|
+
if len(data) != 1:
|
|
207
|
+
raise ConversionError(
|
|
208
|
+
f'Cannot convert multi-element Series without target dimensions. '
|
|
209
|
+
f'Series has {len(data)} elements but no target dimensions specified.'
|
|
210
|
+
)
|
|
211
|
+
return xr.DataArray(data.iloc[0])
|
|
212
|
+
|
|
213
|
+
# Attempt exact index matching with each target dimension
|
|
214
|
+
for dim_name in target_dims:
|
|
215
|
+
target_index = target_coords[dim_name]
|
|
216
|
+
if data.index.equals(target_index):
|
|
217
|
+
return xr.DataArray(data.values.copy(), coords={dim_name: target_index}, dims=dim_name)
|
|
218
|
+
|
|
219
|
+
# No exact matches found
|
|
220
|
+
available_lengths = {dim: len(target_coords[dim]) for dim in target_dims}
|
|
221
|
+
raise ConversionError(
|
|
222
|
+
f'Series index does not match any target dimension coordinates. '
|
|
223
|
+
f'Series length: {len(data)}, available coordinate lengths: {available_lengths}'
|
|
224
|
+
)
|
|
460
225
|
|
|
461
|
-
|
|
462
|
-
|
|
226
|
+
@staticmethod
|
|
227
|
+
def _match_1d_array_by_length(
|
|
228
|
+
data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
|
|
229
|
+
) -> xr.DataArray:
|
|
463
230
|
"""
|
|
464
|
-
|
|
465
|
-
return self.active_data < other.active_data
|
|
466
|
-
return self.active_data < other
|
|
231
|
+
Match 1D numpy array to target dimension by length comparison.
|
|
467
232
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
Compare if this TimeSeries is less than or equal to another.
|
|
233
|
+
Finds target dimensions whose coordinate length matches the array length.
|
|
234
|
+
Requires unique length match to avoid ambiguity.
|
|
471
235
|
|
|
472
236
|
Args:
|
|
473
|
-
|
|
237
|
+
data: 1D numpy array to convert
|
|
238
|
+
target_coords: Available target coordinates {dim_name: coordinate_index}
|
|
239
|
+
target_dims: Target dimension names to consider for matching
|
|
474
240
|
|
|
475
241
|
Returns:
|
|
476
|
-
|
|
477
|
-
"""
|
|
478
|
-
if isinstance(other, TimeSeries):
|
|
479
|
-
return self.active_data <= other.active_data
|
|
480
|
-
return self.active_data <= other
|
|
481
|
-
|
|
482
|
-
def __eq__(self, other):
|
|
483
|
-
"""
|
|
484
|
-
Compare if this TimeSeries is equal to another.
|
|
242
|
+
DataArray with array matched to the uniquely identified dimension
|
|
485
243
|
|
|
486
|
-
|
|
487
|
-
|
|
244
|
+
Raises:
|
|
245
|
+
ConversionError: If array length matches zero or multiple target dimensions,
|
|
246
|
+
or if no target dimensions provided for multi-element array
|
|
247
|
+
"""
|
|
248
|
+
# Handle edge case: no target dimensions
|
|
249
|
+
if len(target_dims) == 0:
|
|
250
|
+
if len(data) != 1:
|
|
251
|
+
raise ConversionError(
|
|
252
|
+
f'Cannot convert multi-element array without target dimensions. Array has {len(data)} elements.'
|
|
253
|
+
)
|
|
254
|
+
return xr.DataArray(data[0])
|
|
255
|
+
|
|
256
|
+
# Find all dimensions with matching lengths
|
|
257
|
+
array_length = len(data)
|
|
258
|
+
matching_dims = []
|
|
259
|
+
coordinate_lengths = {}
|
|
260
|
+
|
|
261
|
+
for dim_name in target_dims:
|
|
262
|
+
coord_length = len(target_coords[dim_name])
|
|
263
|
+
coordinate_lengths[dim_name] = coord_length
|
|
264
|
+
if array_length == coord_length:
|
|
265
|
+
matching_dims.append(dim_name)
|
|
266
|
+
|
|
267
|
+
# Validate matching results
|
|
268
|
+
if len(matching_dims) == 0:
|
|
269
|
+
raise ConversionError(
|
|
270
|
+
f'Array length {array_length} does not match any target dimension lengths: {coordinate_lengths}'
|
|
271
|
+
)
|
|
272
|
+
elif len(matching_dims) > 1:
|
|
273
|
+
raise ConversionError(
|
|
274
|
+
f'Array length {array_length} matches multiple dimensions: {matching_dims}. '
|
|
275
|
+
f'Cannot uniquely determine target dimension. Consider using explicit '
|
|
276
|
+
f'dimension specification or converting to DataArray manually.'
|
|
277
|
+
)
|
|
488
278
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
if isinstance(other, TimeSeries):
|
|
493
|
-
return self.active_data == other.active_data
|
|
494
|
-
return self.active_data == other
|
|
279
|
+
# Create DataArray with the uniquely matched dimension
|
|
280
|
+
matched_dim = matching_dims[0]
|
|
281
|
+
return xr.DataArray(data.copy(), coords={matched_dim: target_coords[matched_dim]}, dims=matched_dim)
|
|
495
282
|
|
|
496
|
-
|
|
283
|
+
@staticmethod
|
|
284
|
+
def _match_multidim_array_by_shape_permutation(
|
|
285
|
+
data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
|
|
286
|
+
) -> xr.DataArray:
|
|
497
287
|
"""
|
|
498
|
-
|
|
288
|
+
Match multi-dimensional array to target dimensions using shape permutation analysis.
|
|
499
289
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
# Convert any TimeSeries inputs to their active_data
|
|
503
|
-
inputs = [x.active_data if isinstance(x, TimeSeries) else x for x in inputs]
|
|
504
|
-
return getattr(ufunc, method)(*inputs, **kwargs)
|
|
290
|
+
Analyzes all possible mappings between array shape and target coordinate lengths
|
|
291
|
+
to find the unique valid dimension assignment.
|
|
505
292
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
293
|
+
Args:
|
|
294
|
+
data: Multi-dimensional numpy array to convert
|
|
295
|
+
target_coords: Available target coordinates {dim_name: coordinate_index}
|
|
296
|
+
target_dims: Target dimension names to consider for matching
|
|
509
297
|
|
|
510
298
|
Returns:
|
|
511
|
-
|
|
512
|
-
"""
|
|
513
|
-
attrs = {
|
|
514
|
-
'name': self.name,
|
|
515
|
-
'aggregation_weight': self.aggregation_weight,
|
|
516
|
-
'aggregation_group': self.aggregation_group,
|
|
517
|
-
'needs_extra_timestep': self.needs_extra_timestep,
|
|
518
|
-
'shape': self.active_data.shape,
|
|
519
|
-
'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}',
|
|
520
|
-
}
|
|
521
|
-
attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items())
|
|
522
|
-
return f'TimeSeries({attr_str})'
|
|
523
|
-
|
|
524
|
-
def __str__(self):
|
|
525
|
-
"""
|
|
526
|
-
Get a human-readable string representation.
|
|
299
|
+
DataArray with array dimensions mapped to target dimensions by shape
|
|
527
300
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
301
|
+
Raises:
|
|
302
|
+
ConversionError: If array shape cannot be uniquely mapped to target dimensions,
|
|
303
|
+
or if no target dimensions provided for multi-element array
|
|
304
|
+
"""
|
|
305
|
+
# Handle edge case: no target dimensions
|
|
306
|
+
if len(target_dims) == 0:
|
|
307
|
+
if data.size != 1:
|
|
308
|
+
raise ConversionError(
|
|
309
|
+
f'Cannot convert multi-element array without target dimensions. '
|
|
310
|
+
f'Array has {data.size} elements with shape {data.shape}.'
|
|
311
|
+
)
|
|
312
|
+
return xr.DataArray(data.item())
|
|
313
|
+
|
|
314
|
+
array_shape = data.shape
|
|
315
|
+
coordinate_lengths = {dim: len(target_coords[dim]) for dim in target_dims}
|
|
316
|
+
|
|
317
|
+
# Find all valid dimension permutations that match the array shape
|
|
318
|
+
valid_mappings = []
|
|
319
|
+
for dim_permutation in permutations(target_dims, data.ndim):
|
|
320
|
+
shape_matches = all(
|
|
321
|
+
array_shape[i] == coordinate_lengths[dim_permutation[i]] for i in range(len(dim_permutation))
|
|
322
|
+
)
|
|
323
|
+
if shape_matches:
|
|
324
|
+
valid_mappings.append(dim_permutation)
|
|
325
|
+
|
|
326
|
+
# Validate mapping results
|
|
327
|
+
if len(valid_mappings) == 0:
|
|
328
|
+
raise ConversionError(
|
|
329
|
+
f'Array shape {array_shape} cannot be mapped to any combination of target '
|
|
330
|
+
f'coordinate lengths: {coordinate_lengths}. Consider reshaping the array '
|
|
331
|
+
f'or adjusting target coordinates.'
|
|
332
|
+
)
|
|
532
333
|
|
|
334
|
+
if len(valid_mappings) > 1:
|
|
335
|
+
raise ConversionError(
|
|
336
|
+
f'Array shape {array_shape} matches multiple dimension combinations: '
|
|
337
|
+
f'{valid_mappings}. Cannot uniquely determine dimension mapping. '
|
|
338
|
+
f'Consider using explicit dimension specification.'
|
|
339
|
+
)
|
|
533
340
|
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
341
|
+
# Create DataArray with the uniquely determined mapping
|
|
342
|
+
matched_dims = valid_mappings[0]
|
|
343
|
+
matched_coords = {dim: target_coords[dim] for dim in matched_dims}
|
|
537
344
|
|
|
538
|
-
|
|
539
|
-
timesteps, provides operations on collections, and manages extra timesteps.
|
|
540
|
-
"""
|
|
345
|
+
return xr.DataArray(data.copy(), coords=matched_coords, dims=matched_dims)
|
|
541
346
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None,
|
|
547
|
-
):
|
|
548
|
-
"""
|
|
549
|
-
Args:
|
|
550
|
-
timesteps: The timesteps of the Collection.
|
|
551
|
-
hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified
|
|
552
|
-
hours_of_previous_timesteps: The duration of previous timesteps.
|
|
553
|
-
If None, the first time increment of time_series is used.
|
|
554
|
-
This is needed to calculate previous durations (for example consecutive_on_hours).
|
|
555
|
-
If you use an array, take care that its long enough to cover all previous values!
|
|
347
|
+
@staticmethod
|
|
348
|
+
def _broadcast_dataarray_to_target_specification(
|
|
349
|
+
source_data: xr.DataArray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
|
|
350
|
+
) -> xr.DataArray:
|
|
556
351
|
"""
|
|
557
|
-
|
|
558
|
-
self._validate_timesteps(timesteps)
|
|
559
|
-
self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps(
|
|
560
|
-
timesteps, hours_of_previous_timesteps
|
|
561
|
-
)
|
|
562
|
-
|
|
563
|
-
# Set up timesteps and hours
|
|
564
|
-
self.all_timesteps = timesteps
|
|
565
|
-
self.all_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep)
|
|
566
|
-
self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra)
|
|
567
|
-
|
|
568
|
-
# Active timestep tracking
|
|
569
|
-
self._active_timesteps = None
|
|
570
|
-
self._active_timesteps_extra = None
|
|
571
|
-
self._active_hours_per_timestep = None
|
|
352
|
+
Broadcast DataArray to conform to target coordinate and dimension specification.
|
|
572
353
|
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
# Aggregation
|
|
577
|
-
self.group_weights: Dict[str, float] = {}
|
|
578
|
-
self.weights: Dict[str, float] = {}
|
|
579
|
-
|
|
580
|
-
@classmethod
|
|
581
|
-
def with_uniform_timesteps(
|
|
582
|
-
cls, start_time: pd.Timestamp, periods: int, freq: str, hours_per_step: Optional[float] = None
|
|
583
|
-
) -> 'TimeSeriesCollection':
|
|
584
|
-
"""Create a collection with uniform timesteps."""
|
|
585
|
-
timesteps = pd.date_range(start_time, periods=periods, freq=freq, name='time')
|
|
586
|
-
return cls(timesteps, hours_of_previous_timesteps=hours_per_step)
|
|
587
|
-
|
|
588
|
-
def create_time_series(
|
|
589
|
-
self, data: Union[NumericData, TimeSeriesData], name: str, needs_extra_timestep: bool = False
|
|
590
|
-
) -> TimeSeries:
|
|
591
|
-
"""
|
|
592
|
-
Creates a TimeSeries from the given data and adds it to the collection.
|
|
354
|
+
Performs comprehensive validation and broadcasting to ensure the result exactly
|
|
355
|
+
matches the target specification. Handles scalar expansion, dimension validation,
|
|
356
|
+
coordinate compatibility checking, and broadcasting to additional dimensions.
|
|
593
357
|
|
|
594
358
|
Args:
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
The data to create the TimeSeries from.
|
|
359
|
+
source_data: Source DataArray to broadcast
|
|
360
|
+
target_coords: Target coordinates {dim_name: coordinate_index}
|
|
361
|
+
target_dims: Target dimension names in desired order
|
|
599
362
|
|
|
600
363
|
Returns:
|
|
601
|
-
|
|
364
|
+
DataArray broadcast to target specification with proper dimension ordering
|
|
602
365
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
time_series = TimeSeries.from_datasource(
|
|
614
|
-
name=name,
|
|
615
|
-
data=data.data,
|
|
616
|
-
timesteps=timesteps_to_use,
|
|
617
|
-
aggregation_weight=data.agg_weight,
|
|
618
|
-
aggregation_group=data.agg_group,
|
|
619
|
-
needs_extra_timestep=needs_extra_timestep,
|
|
620
|
-
)
|
|
621
|
-
# Connect the user time series to the created TimeSeries
|
|
622
|
-
data.label = name
|
|
623
|
-
else:
|
|
624
|
-
time_series = TimeSeries.from_datasource(
|
|
625
|
-
name=name, data=data, timesteps=timesteps_to_use, needs_extra_timestep=needs_extra_timestep
|
|
366
|
+
Raises:
|
|
367
|
+
ConversionError: If broadcasting is impossible due to incompatible dimensions
|
|
368
|
+
or coordinate mismatches
|
|
369
|
+
"""
|
|
370
|
+
# Validate: cannot reduce dimensions
|
|
371
|
+
if len(source_data.dims) > len(target_dims):
|
|
372
|
+
raise ConversionError(
|
|
373
|
+
f'Cannot reduce DataArray dimensionality from {len(source_data.dims)} '
|
|
374
|
+
f'to {len(target_dims)} dimensions. Source dims: {source_data.dims}, '
|
|
375
|
+
f'target dims: {target_dims}'
|
|
626
376
|
)
|
|
627
377
|
|
|
628
|
-
#
|
|
629
|
-
|
|
378
|
+
# Validate: all source dimensions must exist in target
|
|
379
|
+
missing_dims = set(source_data.dims) - set(target_dims)
|
|
380
|
+
if missing_dims:
|
|
381
|
+
raise ConversionError(
|
|
382
|
+
f'Source DataArray has dimensions {missing_dims} not present in target dimensions {target_dims}'
|
|
383
|
+
)
|
|
630
384
|
|
|
631
|
-
|
|
385
|
+
# Validate: coordinate compatibility for overlapping dimensions
|
|
386
|
+
for dim in source_data.dims:
|
|
387
|
+
if dim in source_data.coords and dim in target_coords:
|
|
388
|
+
source_coords = source_data.coords[dim]
|
|
389
|
+
target_coords_for_dim = target_coords[dim]
|
|
632
390
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
391
|
+
if not np.array_equal(source_coords.values, target_coords_for_dim.values):
|
|
392
|
+
raise ConversionError(
|
|
393
|
+
f'Coordinate mismatch for dimension "{dim}". '
|
|
394
|
+
f'Source and target coordinates have different values.'
|
|
395
|
+
)
|
|
637
396
|
|
|
638
|
-
|
|
639
|
-
|
|
397
|
+
# Create target template for broadcasting
|
|
398
|
+
target_shape = [len(target_coords[dim]) for dim in target_dims]
|
|
399
|
+
target_template = xr.DataArray(np.empty(target_shape), coords=target_coords, dims=target_dims)
|
|
640
400
|
|
|
641
|
-
|
|
401
|
+
# Perform broadcasting and ensure proper dimension ordering
|
|
402
|
+
broadcasted = source_data.broadcast_like(target_template)
|
|
403
|
+
return broadcasted.transpose(*target_dims)
|
|
642
404
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
405
|
+
@classmethod
|
|
406
|
+
def to_dataarray(
|
|
407
|
+
cls,
|
|
408
|
+
data: int
|
|
409
|
+
| float
|
|
410
|
+
| bool
|
|
411
|
+
| np.integer
|
|
412
|
+
| np.floating
|
|
413
|
+
| np.bool_
|
|
414
|
+
| np.ndarray
|
|
415
|
+
| pd.Series
|
|
416
|
+
| pd.DataFrame
|
|
417
|
+
| xr.DataArray,
|
|
418
|
+
coords: dict[str, pd.Index] | None = None,
|
|
419
|
+
) -> xr.DataArray:
|
|
420
|
+
"""
|
|
421
|
+
Convert various data types to xarray.DataArray with specified target coordinates.
|
|
422
|
+
|
|
423
|
+
This is the main conversion method that intelligently handles different input types
|
|
424
|
+
and ensures the result conforms to the specified coordinate structure through
|
|
425
|
+
smart dimension matching and broadcasting.
|
|
647
426
|
|
|
648
427
|
Args:
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection')
|
|
657
|
-
|
|
658
|
-
# Calculate derived timesteps
|
|
659
|
-
self._active_timesteps = active_timesteps
|
|
660
|
-
first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0]
|
|
661
|
-
last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0]
|
|
662
|
-
self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2]
|
|
663
|
-
self._active_hours_per_timestep = self.all_hours_per_timestep.isel(time=slice(first_ts_index, last_ts_idx + 1))
|
|
664
|
-
|
|
665
|
-
# Update all time series
|
|
666
|
-
self._update_time_series_timesteps()
|
|
428
|
+
data: Input data to convert. Supported types:
|
|
429
|
+
- Scalars: int, float, bool, np.integer, np.floating, np.bool_
|
|
430
|
+
- Arrays: np.ndarray (1D and multi-dimensional)
|
|
431
|
+
- Pandas: pd.Series, pd.DataFrame
|
|
432
|
+
- xarray: xr.DataArray
|
|
433
|
+
coords: Target coordinate specification as {dimension_name: coordinate_index}.
|
|
434
|
+
All coordinate indices must be pandas.Index objects.
|
|
667
435
|
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
self._active_timesteps_extra = None
|
|
672
|
-
self._active_hours_per_timestep = None
|
|
673
|
-
|
|
674
|
-
for time_series in self.time_series_data.values():
|
|
675
|
-
time_series.reset()
|
|
676
|
-
|
|
677
|
-
def restore_data(self):
|
|
678
|
-
"""Restore original data for all time series."""
|
|
679
|
-
for time_series in self.time_series_data.values():
|
|
680
|
-
time_series.restore_data()
|
|
681
|
-
|
|
682
|
-
def add_time_series(self, time_series: TimeSeries):
|
|
683
|
-
"""Add an existing TimeSeries to the collection."""
|
|
684
|
-
if time_series.name in self.time_series_data:
|
|
685
|
-
raise ValueError(f"TimeSeries '{time_series.name}' already exists in this collection")
|
|
436
|
+
Returns:
|
|
437
|
+
DataArray conforming to the target coordinate specification,
|
|
438
|
+
with input data appropriately matched and broadcast
|
|
686
439
|
|
|
687
|
-
|
|
440
|
+
Raises:
|
|
441
|
+
ConversionError: If data type is unsupported, conversion fails,
|
|
442
|
+
or broadcasting to target coordinates is impossible
|
|
443
|
+
|
|
444
|
+
Examples:
|
|
445
|
+
# Scalar broadcasting
|
|
446
|
+
>>> coords = {'x': pd.Index([1, 2, 3]), 'y': pd.Index(['a', 'b'])}
|
|
447
|
+
>>> converter.to_dataarray(42, coords)
|
|
448
|
+
# Returns: DataArray with shape (3, 2), all values = 42
|
|
449
|
+
|
|
450
|
+
# Series index matching
|
|
451
|
+
>>> series = pd.Series([10, 20, 30], index=[1, 2, 3])
|
|
452
|
+
>>> converter.to_dataarray(series, coords)
|
|
453
|
+
# Returns: DataArray matched to 'x' dimension, broadcast to 'y'
|
|
454
|
+
|
|
455
|
+
# Array shape matching
|
|
456
|
+
>>> array = np.array([[1, 2], [3, 4], [5, 6]]) # Shape (3, 2)
|
|
457
|
+
>>> converter.to_dataarray(array, coords)
|
|
458
|
+
# Returns: DataArray with dimensions ('x', 'y') based on shape
|
|
459
|
+
"""
|
|
460
|
+
# Prepare and validate target specification
|
|
461
|
+
if coords is None:
|
|
462
|
+
coords = {}
|
|
463
|
+
|
|
464
|
+
validated_coords, target_dims = cls._validate_and_prepare_target_coordinates(coords)
|
|
465
|
+
|
|
466
|
+
# Convert input data to intermediate DataArray based on type
|
|
467
|
+
if isinstance(data, (int, float, bool, np.integer, np.floating, np.bool_)):
|
|
468
|
+
# Scalar values - create scalar DataArray
|
|
469
|
+
intermediate = xr.DataArray(data.item() if hasattr(data, 'item') else data)
|
|
470
|
+
|
|
471
|
+
elif isinstance(data, np.ndarray):
|
|
472
|
+
# NumPy arrays - dispatch based on dimensionality
|
|
473
|
+
if data.ndim == 0:
|
|
474
|
+
# 0-dimensional array (scalar)
|
|
475
|
+
intermediate = xr.DataArray(data.item())
|
|
476
|
+
elif data.ndim == 1:
|
|
477
|
+
# 1-dimensional array
|
|
478
|
+
intermediate = cls._match_1d_array_by_length(data, validated_coords, target_dims)
|
|
479
|
+
else:
|
|
480
|
+
# Multi-dimensional array
|
|
481
|
+
intermediate = cls._match_multidim_array_by_shape_permutation(data, validated_coords, target_dims)
|
|
482
|
+
|
|
483
|
+
elif isinstance(data, pd.Series):
|
|
484
|
+
# Pandas Series - validate and match by index
|
|
485
|
+
if isinstance(data.index, pd.MultiIndex):
|
|
486
|
+
raise ConversionError('MultiIndex Series are not supported. Please use a single-level index.')
|
|
487
|
+
intermediate = cls._match_series_by_index_alignment(data, validated_coords, target_dims)
|
|
488
|
+
|
|
489
|
+
elif isinstance(data, pd.DataFrame):
|
|
490
|
+
# Pandas DataFrame - validate and convert
|
|
491
|
+
if isinstance(data.index, pd.MultiIndex):
|
|
492
|
+
raise ConversionError('MultiIndex DataFrames are not supported. Please use a single-level index.')
|
|
493
|
+
if len(data.columns) == 0 or data.empty:
|
|
494
|
+
raise ConversionError('DataFrame must have at least one column and cannot be empty.')
|
|
495
|
+
|
|
496
|
+
if len(data.columns) == 1:
|
|
497
|
+
# Single-column DataFrame - treat as Series
|
|
498
|
+
series_data = data.iloc[:, 0]
|
|
499
|
+
intermediate = cls._match_series_by_index_alignment(series_data, validated_coords, target_dims)
|
|
500
|
+
else:
|
|
501
|
+
# Multi-column DataFrame - treat as multi-dimensional array
|
|
502
|
+
intermediate = cls._match_multidim_array_by_shape_permutation(
|
|
503
|
+
data.to_numpy(), validated_coords, target_dims
|
|
504
|
+
)
|
|
688
505
|
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
506
|
+
elif isinstance(data, xr.DataArray):
|
|
507
|
+
# Existing DataArray - use as-is
|
|
508
|
+
intermediate = data.copy()
|
|
692
509
|
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
510
|
+
else:
|
|
511
|
+
# Unsupported data type
|
|
512
|
+
supported_types = [
|
|
513
|
+
'int',
|
|
514
|
+
'float',
|
|
515
|
+
'bool',
|
|
516
|
+
'np.integer',
|
|
517
|
+
'np.floating',
|
|
518
|
+
'np.bool_',
|
|
519
|
+
'np.ndarray',
|
|
520
|
+
'pd.Series',
|
|
521
|
+
'pd.DataFrame',
|
|
522
|
+
'xr.DataArray',
|
|
523
|
+
]
|
|
524
|
+
raise ConversionError(
|
|
525
|
+
f'Unsupported data type: {type(data).__name__}. Supported types: {", ".join(supported_types)}'
|
|
705
526
|
)
|
|
706
527
|
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
if not ts.needs_extra_timestep:
|
|
710
|
-
# For time series without extra timestep
|
|
711
|
-
if include_extra_timestep:
|
|
712
|
-
# If data includes extra timestep but series doesn't need it, exclude the last point
|
|
713
|
-
ts.stored_data = data[name].iloc[:-1]
|
|
714
|
-
else:
|
|
715
|
-
# Use data as is
|
|
716
|
-
ts.stored_data = data[name]
|
|
717
|
-
else:
|
|
718
|
-
# For time series with extra timestep
|
|
719
|
-
if include_extra_timestep:
|
|
720
|
-
# Data already includes extra timestep
|
|
721
|
-
ts.stored_data = data[name]
|
|
722
|
-
else:
|
|
723
|
-
# Need to add extra timestep - extrapolate from the last value
|
|
724
|
-
extra_step_value = data[name].iloc[-1]
|
|
725
|
-
extra_step_index = pd.DatetimeIndex([self.timesteps_extra[-1]], name='time')
|
|
726
|
-
extra_step_series = pd.Series([extra_step_value], index=extra_step_index)
|
|
727
|
-
|
|
728
|
-
# Combine the regular data with the extra timestep
|
|
729
|
-
ts.stored_data = pd.concat([data[name], extra_step_series])
|
|
730
|
-
|
|
731
|
-
logger.debug(f'Updated data for {name}')
|
|
732
|
-
|
|
733
|
-
def to_dataframe(
|
|
734
|
-
self, filtered: Literal['all', 'constant', 'non_constant'] = 'non_constant', include_extra_timestep: bool = True
|
|
735
|
-
) -> pd.DataFrame:
|
|
736
|
-
"""
|
|
737
|
-
Convert collection to DataFrame with optional filtering and timestep control.
|
|
738
|
-
|
|
739
|
-
Args:
|
|
740
|
-
filtered: Filter time series by variability, by default 'non_constant'
|
|
741
|
-
include_extra_timestep: Whether to include the extra timestep in the result, by default True
|
|
528
|
+
# Broadcast intermediate result to target specification
|
|
529
|
+
return cls._broadcast_dataarray_to_target_specification(intermediate, validated_coords, target_dims)
|
|
742
530
|
|
|
743
|
-
|
|
744
|
-
|
|
531
|
+
@staticmethod
|
|
532
|
+
def _validate_and_prepare_target_coordinates(
|
|
533
|
+
coords: dict[str, pd.Index],
|
|
534
|
+
) -> tuple[dict[str, pd.Index], tuple[str, ...]]:
|
|
745
535
|
"""
|
|
746
|
-
|
|
747
|
-
ds = self.to_dataset(include_constants=include_constants)
|
|
748
|
-
|
|
749
|
-
if not include_extra_timestep:
|
|
750
|
-
ds = ds.isel(time=slice(None, -1))
|
|
751
|
-
|
|
752
|
-
df = ds.to_dataframe()
|
|
753
|
-
|
|
754
|
-
# Apply filtering
|
|
755
|
-
if filtered == 'all':
|
|
756
|
-
return df
|
|
757
|
-
elif filtered == 'constant':
|
|
758
|
-
return df.loc[:, df.nunique() == 1]
|
|
759
|
-
elif filtered == 'non_constant':
|
|
760
|
-
return df.loc[:, df.nunique() > 1]
|
|
761
|
-
else:
|
|
762
|
-
raise ValueError("filtered must be one of: 'all', 'constant', 'non_constant'")
|
|
536
|
+
Validate and prepare target coordinate specification for DataArray creation.
|
|
763
537
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
Combine all time series into a single Dataset with all timesteps.
|
|
538
|
+
Performs comprehensive validation of coordinate inputs and prepares them
|
|
539
|
+
for use in DataArray construction with appropriate naming and type checking.
|
|
767
540
|
|
|
768
541
|
Args:
|
|
769
|
-
|
|
542
|
+
coords: Raw coordinate specification {dimension_name: coordinate_index}
|
|
770
543
|
|
|
771
544
|
Returns:
|
|
772
|
-
|
|
773
|
-
"""
|
|
774
|
-
# Determine which series to include
|
|
775
|
-
if include_constants:
|
|
776
|
-
series_to_include = self.time_series_data.values()
|
|
777
|
-
else:
|
|
778
|
-
series_to_include = self.non_constants
|
|
779
|
-
|
|
780
|
-
# Create individual datasets and merge them
|
|
781
|
-
ds = xr.merge([ts.active_data.to_dataset(name=ts.name) for ts in series_to_include])
|
|
782
|
-
|
|
783
|
-
# Ensure the correct time coordinates
|
|
784
|
-
ds = ds.reindex(time=self.timesteps_extra)
|
|
785
|
-
|
|
786
|
-
ds.attrs.update(
|
|
787
|
-
{
|
|
788
|
-
'timesteps_extra': f'{self.timesteps_extra[0]} ... {self.timesteps_extra[-1]} | len={len(self.timesteps_extra)}',
|
|
789
|
-
'hours_per_timestep': self._format_stats(self.hours_per_timestep),
|
|
790
|
-
}
|
|
791
|
-
)
|
|
792
|
-
|
|
793
|
-
return ds
|
|
794
|
-
|
|
795
|
-
def _update_time_series_timesteps(self):
|
|
796
|
-
"""Update active timesteps for all time series."""
|
|
797
|
-
for ts in self.time_series_data.values():
|
|
798
|
-
if ts.needs_extra_timestep:
|
|
799
|
-
ts.active_timesteps = self.timesteps_extra
|
|
800
|
-
else:
|
|
801
|
-
ts.active_timesteps = self.timesteps
|
|
545
|
+
Tuple of (validated_coordinates_dict, dimension_names_tuple)
|
|
802
546
|
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
if len(timesteps) < 2:
|
|
810
|
-
raise ValueError('timesteps must contain at least 2 timestamps')
|
|
811
|
-
|
|
812
|
-
# Ensure timesteps has the required name
|
|
813
|
-
if timesteps.name != 'time':
|
|
814
|
-
logger.warning('Renamed timesteps to "time" (was "%s")', timesteps.name)
|
|
815
|
-
timesteps.name = 'time'
|
|
816
|
-
|
|
817
|
-
@staticmethod
|
|
818
|
-
def _create_timesteps_with_extra(
|
|
819
|
-
timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float]
|
|
820
|
-
) -> pd.DatetimeIndex:
|
|
821
|
-
"""Create timesteps with an extra step at the end."""
|
|
822
|
-
if hours_of_last_timestep is not None:
|
|
823
|
-
# Create the extra timestep using the specified duration
|
|
824
|
-
last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time')
|
|
825
|
-
else:
|
|
826
|
-
# Use the last interval as the extra timestep duration
|
|
827
|
-
last_date = pd.DatetimeIndex([timesteps[-1] + (timesteps[-1] - timesteps[-2])], name='time')
|
|
828
|
-
|
|
829
|
-
# Combine with original timesteps
|
|
830
|
-
return pd.DatetimeIndex(timesteps.append(last_date), name='time')
|
|
831
|
-
|
|
832
|
-
@staticmethod
|
|
833
|
-
def _calculate_hours_of_previous_timesteps(
|
|
834
|
-
timesteps: pd.DatetimeIndex, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]]
|
|
835
|
-
) -> Union[float, np.ndarray]:
|
|
836
|
-
"""Calculate duration of regular timesteps."""
|
|
837
|
-
if hours_of_previous_timesteps is not None:
|
|
838
|
-
return hours_of_previous_timesteps
|
|
547
|
+
Raises:
|
|
548
|
+
ConversionError: If any coordinates are invalid, improperly typed,
|
|
549
|
+
or have inconsistent naming
|
|
550
|
+
"""
|
|
551
|
+
validated_coords = {}
|
|
552
|
+
dimension_names = []
|
|
839
553
|
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
554
|
+
for dim_name, coord_index in coords.items():
|
|
555
|
+
# Type validation
|
|
556
|
+
if not isinstance(coord_index, pd.Index):
|
|
557
|
+
raise ConversionError(
|
|
558
|
+
f'Coordinate for dimension "{dim_name}" must be a pandas.Index, got {type(coord_index).__name__}'
|
|
559
|
+
)
|
|
843
560
|
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
# Calculate differences between consecutive timestamps
|
|
848
|
-
hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1)
|
|
561
|
+
# Non-empty validation
|
|
562
|
+
if len(coord_index) == 0:
|
|
563
|
+
raise ConversionError(f'Coordinate for dimension "{dim_name}" cannot be empty')
|
|
849
564
|
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
def _calculate_group_weights(self) -> Dict[str, float]:
|
|
855
|
-
"""Calculate weights for aggregation groups."""
|
|
856
|
-
# Count series in each group
|
|
857
|
-
groups = [ts.aggregation_group for ts in self.time_series_data.values() if ts.aggregation_group is not None]
|
|
858
|
-
group_counts = Counter(groups)
|
|
859
|
-
|
|
860
|
-
# Calculate weight for each group (1/count)
|
|
861
|
-
return {group: 1 / count for group, count in group_counts.items()}
|
|
862
|
-
|
|
863
|
-
def _calculate_weights(self) -> Dict[str, float]:
|
|
864
|
-
"""Calculate weights for all time series."""
|
|
865
|
-
# Calculate weight for each time series
|
|
866
|
-
weights = {}
|
|
867
|
-
for name, ts in self.time_series_data.items():
|
|
868
|
-
if ts.aggregation_group is not None:
|
|
869
|
-
# Use group weight
|
|
870
|
-
weights[name] = self.group_weights.get(ts.aggregation_group, 1)
|
|
871
|
-
else:
|
|
872
|
-
# Use individual weight or default to 1
|
|
873
|
-
weights[name] = ts.aggregation_weight or 1
|
|
565
|
+
# Ensure coordinate index has consistent naming
|
|
566
|
+
if coord_index.name != dim_name:
|
|
567
|
+
coord_index = coord_index.rename(dim_name)
|
|
874
568
|
|
|
875
|
-
|
|
569
|
+
# Special validation for time dimensions (common pattern)
|
|
570
|
+
if dim_name == 'time' and not isinstance(coord_index, pd.DatetimeIndex):
|
|
571
|
+
raise ConversionError(
|
|
572
|
+
f'Dimension named "time" should use DatetimeIndex for proper '
|
|
573
|
+
f'time-series functionality, got {type(coord_index).__name__}'
|
|
574
|
+
)
|
|
876
575
|
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
if hasattr(data, 'values'):
|
|
880
|
-
values = data.values
|
|
881
|
-
else:
|
|
882
|
-
values = np.asarray(data)
|
|
576
|
+
validated_coords[dim_name] = coord_index
|
|
577
|
+
dimension_names.append(dim_name)
|
|
883
578
|
|
|
884
|
-
|
|
885
|
-
min_val = np.min(values)
|
|
886
|
-
max_val = np.max(values)
|
|
579
|
+
return validated_coords, tuple(dimension_names)
|
|
887
580
|
|
|
888
|
-
return f'mean: {mean_val:.2f}, min: {min_val:.2f}, max: {max_val:.2f}'
|
|
889
581
|
|
|
890
|
-
|
|
891
|
-
|
|
582
|
+
def get_dataarray_stats(arr: xr.DataArray) -> dict:
|
|
583
|
+
"""Generate statistical summary of a DataArray."""
|
|
584
|
+
stats = {}
|
|
585
|
+
if arr.dtype.kind in 'biufc': # bool, int, uint, float, complex
|
|
892
586
|
try:
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
return len(self.time_series_data)
|
|
904
|
-
|
|
905
|
-
def __contains__(self, item: Union[str, TimeSeries]) -> bool:
|
|
906
|
-
"""Check if a TimeSeries exists in the collection."""
|
|
907
|
-
if isinstance(item, str):
|
|
908
|
-
return item in self.time_series_data
|
|
909
|
-
elif isinstance(item, TimeSeries):
|
|
910
|
-
return any([item is ts for ts in self.time_series_data.values()])
|
|
911
|
-
return False
|
|
587
|
+
stats.update(
|
|
588
|
+
{
|
|
589
|
+
'min': float(arr.min().values),
|
|
590
|
+
'max': float(arr.max().values),
|
|
591
|
+
'mean': float(arr.mean().values),
|
|
592
|
+
'median': float(arr.median().values),
|
|
593
|
+
'std': float(arr.std().values),
|
|
594
|
+
'count': int(arr.count().values), # non-null count
|
|
595
|
+
}
|
|
596
|
+
)
|
|
912
597
|
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
598
|
+
# Add null count only if there are nulls
|
|
599
|
+
null_count = int(arr.isnull().sum().values)
|
|
600
|
+
if null_count > 0:
|
|
601
|
+
stats['nulls'] = null_count
|
|
917
602
|
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
"""Get time series with constant values."""
|
|
921
|
-
return [ts for ts in self.time_series_data.values() if ts.all_equal]
|
|
603
|
+
except Exception:
|
|
604
|
+
pass
|
|
922
605
|
|
|
923
|
-
|
|
924
|
-
def timesteps(self) -> pd.DatetimeIndex:
|
|
925
|
-
"""Get the active timesteps."""
|
|
926
|
-
return self.all_timesteps if self._active_timesteps is None else self._active_timesteps
|
|
606
|
+
return stats
|
|
927
607
|
|
|
928
|
-
@property
|
|
929
|
-
def timesteps_extra(self) -> pd.DatetimeIndex:
|
|
930
|
-
"""Get the active timesteps with extra step."""
|
|
931
|
-
return self.all_timesteps_extra if self._active_timesteps_extra is None else self._active_timesteps_extra
|
|
932
608
|
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
"""Get the duration of each active timestep."""
|
|
936
|
-
return (
|
|
937
|
-
self.all_hours_per_timestep if self._active_hours_per_timestep is None else self._active_hours_per_timestep
|
|
938
|
-
)
|
|
609
|
+
def drop_constant_arrays(ds: xr.Dataset, dim: str = 'time', drop_arrays_without_dim: bool = True) -> xr.Dataset:
|
|
610
|
+
"""Drop variables with constant values along a dimension.
|
|
939
611
|
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
def __repr__(self):
|
|
946
|
-
return f'TimeSeriesCollection:\n{self.to_dataset()}'
|
|
947
|
-
|
|
948
|
-
def __str__(self):
|
|
949
|
-
longest_name = max([time_series.name for time_series in self.time_series_data], key=len)
|
|
612
|
+
Args:
|
|
613
|
+
ds: Input dataset to filter.
|
|
614
|
+
dim: Dimension along which to check for constant values.
|
|
615
|
+
drop_arrays_without_dim: If True, also drop variables that don't have the specified dimension.
|
|
950
616
|
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
617
|
+
Returns:
|
|
618
|
+
Dataset with constant variables removed.
|
|
619
|
+
"""
|
|
620
|
+
drop_vars = []
|
|
621
|
+
|
|
622
|
+
for name, da in ds.data_vars.items():
|
|
623
|
+
# Skip variables without the dimension
|
|
624
|
+
if dim not in da.dims:
|
|
625
|
+
if drop_arrays_without_dim:
|
|
626
|
+
drop_vars.append(name)
|
|
627
|
+
continue
|
|
628
|
+
|
|
629
|
+
# Check if variable is constant along the dimension
|
|
630
|
+
if (da.max(dim, skipna=True) == da.min(dim, skipna=True)).all().item():
|
|
631
|
+
drop_vars.append(name)
|
|
632
|
+
|
|
633
|
+
if drop_vars:
|
|
634
|
+
drop_vars = sorted(drop_vars)
|
|
635
|
+
logger.debug(
|
|
636
|
+
f'Dropping {len(drop_vars)} constant/dimension-less arrays: {drop_vars[:5]}{"..." if len(drop_vars) > 5 else ""}'
|
|
956
637
|
)
|
|
957
638
|
|
|
958
|
-
|
|
959
|
-
f'TimeSeriesCollection with {len(self.time_series_data)} series\n'
|
|
960
|
-
f' Time Range: {self.timesteps[0]} → {self.timesteps[-1]}\n'
|
|
961
|
-
f' No. of timesteps: {len(self.timesteps)} + 1 extra\n'
|
|
962
|
-
f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n'
|
|
963
|
-
f' Time Series Data:\n'
|
|
964
|
-
f'{stats_summary}'
|
|
965
|
-
)
|
|
639
|
+
return ds.drop_vars(drop_vars)
|
|
966
640
|
|
|
967
641
|
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
return f'{data.max().item():{format_spec}} (constant)'
|
|
973
|
-
mean = data.mean().item()
|
|
974
|
-
median = data.median().item()
|
|
975
|
-
min_val = data.min().item()
|
|
976
|
-
max_val = data.max().item()
|
|
977
|
-
std = data.std().item()
|
|
978
|
-
return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
|
|
642
|
+
# Backward compatibility aliases
|
|
643
|
+
# TODO: Needed?
|
|
644
|
+
NonTemporalDataUser = PeriodicDataUser
|
|
645
|
+
NonTemporalData = PeriodicData
|