flixopt 2.2.0b0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flixopt might be problematic. Click here for more details.
- flixopt/__init__.py +35 -1
- flixopt/aggregation.py +60 -81
- flixopt/calculation.py +381 -196
- flixopt/components.py +1022 -359
- flixopt/config.py +553 -191
- flixopt/core.py +475 -1315
- flixopt/effects.py +477 -214
- flixopt/elements.py +591 -344
- flixopt/features.py +403 -957
- flixopt/flow_system.py +781 -293
- flixopt/interface.py +1159 -189
- flixopt/io.py +50 -55
- flixopt/linear_converters.py +384 -92
- flixopt/modeling.py +759 -0
- flixopt/network_app.py +789 -0
- flixopt/plotting.py +273 -135
- flixopt/results.py +639 -383
- flixopt/solvers.py +25 -21
- flixopt/structure.py +928 -442
- flixopt/utils.py +34 -5
- flixopt-3.0.0.dist-info/METADATA +209 -0
- flixopt-3.0.0.dist-info/RECORD +26 -0
- {flixopt-2.2.0b0.dist-info → flixopt-3.0.0.dist-info}/WHEEL +1 -1
- flixopt-3.0.0.dist-info/top_level.txt +1 -0
- docs/examples/00-Minimal Example.md +0 -5
- docs/examples/01-Basic Example.md +0 -5
- docs/examples/02-Complex Example.md +0 -10
- docs/examples/03-Calculation Modes.md +0 -5
- docs/examples/index.md +0 -5
- docs/faq/contribute.md +0 -49
- docs/faq/index.md +0 -3
- docs/images/architecture_flixOpt-pre2.0.0.png +0 -0
- docs/images/architecture_flixOpt.png +0 -0
- docs/images/flixopt-icon.svg +0 -1
- docs/javascripts/mathjax.js +0 -18
- docs/release-notes/_template.txt +0 -32
- docs/release-notes/index.md +0 -7
- docs/release-notes/v2.0.0.md +0 -93
- docs/release-notes/v2.0.1.md +0 -12
- docs/release-notes/v2.1.0.md +0 -31
- docs/release-notes/v2.2.0.md +0 -55
- docs/user-guide/Mathematical Notation/Bus.md +0 -33
- docs/user-guide/Mathematical Notation/Effects, Penalty & Objective.md +0 -132
- docs/user-guide/Mathematical Notation/Flow.md +0 -26
- docs/user-guide/Mathematical Notation/Investment.md +0 -115
- docs/user-guide/Mathematical Notation/LinearConverter.md +0 -21
- docs/user-guide/Mathematical Notation/Piecewise.md +0 -49
- docs/user-guide/Mathematical Notation/Storage.md +0 -44
- docs/user-guide/Mathematical Notation/index.md +0 -22
- docs/user-guide/Mathematical Notation/others.md +0 -3
- docs/user-guide/index.md +0 -124
- flixopt/config.yaml +0 -10
- flixopt-2.2.0b0.dist-info/METADATA +0 -146
- flixopt-2.2.0b0.dist-info/RECORD +0 -59
- flixopt-2.2.0b0.dist-info/top_level.txt +0 -5
- pics/architecture_flixOpt-pre2.0.0.png +0 -0
- pics/architecture_flixOpt.png +0 -0
- pics/flixOpt_plotting.jpg +0 -0
- pics/flixopt-icon.svg +0 -1
- pics/pics.pptx +0 -0
- scripts/gen_ref_pages.py +0 -54
- tests/ressources/Zeitreihen2020.csv +0 -35137
- {flixopt-2.2.0b0.dist-info → flixopt-3.0.0.dist-info}/licenses/LICENSE +0 -0
flixopt/core.py
CHANGED
|
@@ -3,13 +3,10 @@ This module contains the core functionality of the flixopt framework.
|
|
|
3
3
|
It provides Datatypes, logging functionality, and some functions to transform data structures.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
import inspect
|
|
7
|
-
import json
|
|
8
6
|
import logging
|
|
9
|
-
import
|
|
10
|
-
import
|
|
11
|
-
from
|
|
12
|
-
from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
|
|
7
|
+
import warnings
|
|
8
|
+
from itertools import permutations
|
|
9
|
+
from typing import Literal, Union
|
|
13
10
|
|
|
14
11
|
import numpy as np
|
|
15
12
|
import pandas as pd
|
|
@@ -17,20 +14,17 @@ import xarray as xr
|
|
|
17
14
|
|
|
18
15
|
logger = logging.getLogger('flixopt')
|
|
19
16
|
|
|
20
|
-
Scalar =
|
|
21
|
-
"""A
|
|
17
|
+
Scalar = int | float
|
|
18
|
+
"""A single number, either integer or float."""
|
|
22
19
|
|
|
23
|
-
|
|
24
|
-
"""
|
|
20
|
+
PeriodicDataUser = int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray
|
|
21
|
+
"""User data which has no time dimension. Internally converted to a Scalar or an xr.DataArray without a time dimension."""
|
|
25
22
|
|
|
26
|
-
|
|
27
|
-
"""
|
|
23
|
+
PeriodicData = xr.DataArray
|
|
24
|
+
"""Internally used datatypes for periodic data."""
|
|
28
25
|
|
|
29
|
-
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
ScenarioData = NumericData
|
|
33
|
-
"""Represents any form of numeric data that corresponds to scenarios."""
|
|
26
|
+
FlowSystemDimensions = Literal['time', 'period', 'scenario']
|
|
27
|
+
"""Possible dimensions of a FlowSystem."""
|
|
34
28
|
|
|
35
29
|
|
|
36
30
|
class PlausibilityError(Exception):
|
|
@@ -45,1441 +39,607 @@ class ConversionError(Exception):
|
|
|
45
39
|
pass
|
|
46
40
|
|
|
47
41
|
|
|
48
|
-
class
|
|
49
|
-
"""
|
|
50
|
-
Converts various data types into xarray.DataArray with optional time and scenario dimension.
|
|
42
|
+
class TimeSeriesData(xr.DataArray):
|
|
43
|
+
"""Minimal TimeSeriesData that inherits from xr.DataArray with aggregation metadata."""
|
|
51
44
|
|
|
52
|
-
|
|
53
|
-
- Scalar values
|
|
54
|
-
- NumPy arrays
|
|
55
|
-
- xarray.DataArray
|
|
56
|
-
"""
|
|
45
|
+
__slots__ = () # No additional instance attributes - everything goes in attrs
|
|
57
46
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
*args,
|
|
50
|
+
aggregation_group: str | None = None,
|
|
51
|
+
aggregation_weight: float | None = None,
|
|
52
|
+
agg_group: str | None = None,
|
|
53
|
+
agg_weight: float | None = None,
|
|
54
|
+
**kwargs,
|
|
55
|
+
):
|
|
62
56
|
"""
|
|
63
|
-
Convert data to xarray.DataArray with specified dimensions.
|
|
64
|
-
|
|
65
57
|
Args:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
58
|
+
*args: Arguments passed to DataArray
|
|
59
|
+
aggregation_group: Aggregation group name
|
|
60
|
+
aggregation_weight: Aggregation weight (0-1)
|
|
61
|
+
agg_group: Deprecated, use aggregation_group instead
|
|
62
|
+
agg_weight: Deprecated, use aggregation_weight instead
|
|
63
|
+
**kwargs: Additional arguments passed to DataArray
|
|
72
64
|
"""
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
elif isinstance(data, xr.DataArray):
|
|
81
|
-
return DataConverter._convert_dataarray(data, coords, dims)
|
|
82
|
-
|
|
83
|
-
elif isinstance(data, np.ndarray):
|
|
84
|
-
return DataConverter._convert_ndarray(data, coords, dims)
|
|
65
|
+
if agg_group is not None:
|
|
66
|
+
warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2)
|
|
67
|
+
aggregation_group = agg_group
|
|
68
|
+
if agg_weight is not None:
|
|
69
|
+
warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2)
|
|
70
|
+
aggregation_weight = agg_weight
|
|
85
71
|
|
|
86
|
-
|
|
87
|
-
|
|
72
|
+
if (aggregation_group is not None) and (aggregation_weight is not None):
|
|
73
|
+
raise ValueError('Use either aggregation_group or aggregation_weight, not both')
|
|
88
74
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
else:
|
|
93
|
-
raise ConversionError(f'Unsupported data type: {type(data).__name__}')
|
|
75
|
+
# Let xarray handle all the initialization complexity
|
|
76
|
+
super().__init__(*args, **kwargs)
|
|
94
77
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
78
|
+
# Add our metadata to attrs after initialization
|
|
79
|
+
if aggregation_group is not None:
|
|
80
|
+
self.attrs['aggregation_group'] = aggregation_group
|
|
81
|
+
if aggregation_weight is not None:
|
|
82
|
+
self.attrs['aggregation_weight'] = aggregation_weight
|
|
99
83
|
|
|
100
|
-
|
|
101
|
-
|
|
84
|
+
# Always mark as TimeSeriesData
|
|
85
|
+
self.attrs['__timeseries_data__'] = True
|
|
102
86
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
87
|
+
def fit_to_coords(
|
|
88
|
+
self,
|
|
89
|
+
coords: dict[str, pd.Index],
|
|
90
|
+
name: str | None = None,
|
|
91
|
+
) -> 'TimeSeriesData':
|
|
92
|
+
"""Fit the data to the given coordinates. Returns a new TimeSeriesData object if the current coords are different."""
|
|
93
|
+
if self.coords.equals(xr.Coordinates(coords)):
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
da = DataConverter.to_dataarray(self.data, coords=coords)
|
|
97
|
+
return self.__class__(
|
|
98
|
+
da,
|
|
99
|
+
aggregation_group=self.aggregation_group,
|
|
100
|
+
aggregation_weight=self.aggregation_weight,
|
|
101
|
+
name=name if name is not None else self.name,
|
|
102
|
+
)
|
|
108
103
|
|
|
109
|
-
|
|
110
|
-
|
|
104
|
+
@property
|
|
105
|
+
def aggregation_group(self) -> str | None:
|
|
106
|
+
return self.attrs.get('aggregation_group')
|
|
111
107
|
|
|
112
|
-
|
|
108
|
+
@property
|
|
109
|
+
def aggregation_weight(self) -> float | None:
|
|
110
|
+
return self.attrs.get('aggregation_weight')
|
|
113
111
|
|
|
114
|
-
@
|
|
115
|
-
def
|
|
116
|
-
|
|
117
|
-
|
|
112
|
+
@classmethod
|
|
113
|
+
def from_dataarray(
|
|
114
|
+
cls, da: xr.DataArray, aggregation_group: str | None = None, aggregation_weight: float | None = None
|
|
115
|
+
):
|
|
116
|
+
"""Create TimeSeriesData from DataArray, extracting metadata from attrs."""
|
|
117
|
+
# Get aggregation metadata from attrs or parameters
|
|
118
|
+
final_aggregation_group = (
|
|
119
|
+
aggregation_group if aggregation_group is not None else da.attrs.get('aggregation_group')
|
|
120
|
+
)
|
|
121
|
+
final_aggregation_weight = (
|
|
122
|
+
aggregation_weight if aggregation_weight is not None else da.attrs.get('aggregation_weight')
|
|
123
|
+
)
|
|
118
124
|
|
|
119
|
-
|
|
120
|
-
scenarios: The scenario index to validate
|
|
121
|
-
"""
|
|
122
|
-
if not isinstance(scenarios, pd.Index) or len(scenarios) == 0:
|
|
123
|
-
raise ConversionError('Scenarios must be a non-empty Index')
|
|
125
|
+
return cls(da, aggregation_group=final_aggregation_group, aggregation_weight=final_aggregation_weight)
|
|
124
126
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
+
@classmethod
|
|
128
|
+
def is_timeseries_data(cls, obj) -> bool:
|
|
129
|
+
"""Check if an object is TimeSeriesData."""
|
|
130
|
+
return isinstance(obj, xr.DataArray) and obj.attrs.get('__timeseries_data__', False)
|
|
127
131
|
|
|
128
|
-
|
|
132
|
+
def __repr__(self):
|
|
133
|
+
agg_info = []
|
|
134
|
+
if self.aggregation_group:
|
|
135
|
+
agg_info.append(f"aggregation_group='{self.aggregation_group}'")
|
|
136
|
+
if self.aggregation_weight is not None:
|
|
137
|
+
agg_info.append(f'aggregation_weight={self.aggregation_weight}')
|
|
129
138
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
timesteps: Optional[pd.DatetimeIndex], scenarios: Optional[pd.Index]
|
|
133
|
-
) -> Tuple[Dict[str, pd.Index], Tuple[str, ...]]:
|
|
134
|
-
"""
|
|
135
|
-
Prepare coordinates and dimensions for the DataArray.
|
|
139
|
+
info_str = f'TimeSeriesData({", ".join(agg_info)})' if agg_info else 'TimeSeriesData'
|
|
140
|
+
return f'{info_str}\n{super().__repr__()}'
|
|
136
141
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
142
|
+
@property
|
|
143
|
+
def agg_group(self):
|
|
144
|
+
warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2)
|
|
145
|
+
return self.aggregation_group
|
|
140
146
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
if timesteps is not None:
|
|
146
|
-
timesteps = DataConverter._validate_timesteps(timesteps)
|
|
147
|
+
@property
|
|
148
|
+
def agg_weight(self):
|
|
149
|
+
warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2)
|
|
150
|
+
return self.aggregation_weight
|
|
147
151
|
|
|
148
|
-
if scenarios is not None:
|
|
149
|
-
scenarios = DataConverter._validate_scenarios(scenarios)
|
|
150
152
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
153
|
+
TemporalDataUser = (
|
|
154
|
+
int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray | TimeSeriesData
|
|
155
|
+
)
|
|
156
|
+
"""User data which might have a time dimension. Internally converted to an xr.DataArray with time dimension."""
|
|
154
157
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
dims.append('time')
|
|
158
|
+
TemporalData = xr.DataArray | TimeSeriesData
|
|
159
|
+
"""Internally used datatypes for temporal data (data with a time dimension)."""
|
|
158
160
|
|
|
159
|
-
if scenarios is not None:
|
|
160
|
-
coords['scenario'] = scenarios
|
|
161
|
-
dims.append('scenario')
|
|
162
161
|
|
|
163
|
-
|
|
162
|
+
class DataConverter:
|
|
163
|
+
"""
|
|
164
|
+
Converts various data types into xarray.DataArray with specified target coordinates.
|
|
165
|
+
|
|
166
|
+
This converter handles intelligent dimension matching and broadcasting to ensure
|
|
167
|
+
the output DataArray always conforms to the specified coordinate structure.
|
|
168
|
+
|
|
169
|
+
Supported input types:
|
|
170
|
+
- Scalars: int, float, np.number (broadcast to all target dimensions)
|
|
171
|
+
- 1D data: np.ndarray, pd.Series, single-column DataFrame (matched by length/index)
|
|
172
|
+
- Multi-dimensional arrays: np.ndarray, DataFrame (matched by shape)
|
|
173
|
+
- xr.DataArray: validated and potentially broadcast to target dimensions
|
|
174
|
+
|
|
175
|
+
The converter uses smart matching strategies:
|
|
176
|
+
- Series: matched by exact index comparison
|
|
177
|
+
- 1D arrays: matched by length to target coordinates
|
|
178
|
+
- Multi-dimensional arrays: matched by shape permutation analysis
|
|
179
|
+
- DataArrays: validated for compatibility and broadcast as needed
|
|
180
|
+
"""
|
|
164
181
|
|
|
165
182
|
@staticmethod
|
|
166
|
-
def
|
|
167
|
-
data:
|
|
183
|
+
def _match_series_by_index_alignment(
|
|
184
|
+
data: pd.Series, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
|
|
168
185
|
) -> xr.DataArray:
|
|
169
186
|
"""
|
|
170
|
-
|
|
187
|
+
Match pandas Series to target dimension by exact index comparison.
|
|
188
|
+
|
|
189
|
+
Attempts to find a target dimension whose coordinates exactly match
|
|
190
|
+
the Series index values, ensuring proper alignment.
|
|
171
191
|
|
|
172
192
|
Args:
|
|
173
|
-
data:
|
|
174
|
-
|
|
175
|
-
|
|
193
|
+
data: pandas Series to convert
|
|
194
|
+
target_coords: Available target coordinates {dim_name: coordinate_index}
|
|
195
|
+
target_dims: Target dimension names to consider for matching
|
|
176
196
|
|
|
177
197
|
Returns:
|
|
178
|
-
DataArray with the
|
|
179
|
-
"""
|
|
180
|
-
if isinstance(data, (np.integer, np.floating)):
|
|
181
|
-
data = data.item()
|
|
182
|
-
return xr.DataArray(data, coords=coords, dims=dims)
|
|
198
|
+
DataArray with Series matched to the appropriate dimension
|
|
183
199
|
|
|
184
|
-
|
|
185
|
-
|
|
200
|
+
Raises:
|
|
201
|
+
ConversionError: If Series cannot be matched to any target dimension,
|
|
202
|
+
or if no target dimensions provided for multi-element Series
|
|
186
203
|
"""
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
204
|
+
# Handle edge case: no target dimensions
|
|
205
|
+
if len(target_dims) == 0:
|
|
206
|
+
if len(data) != 1:
|
|
207
|
+
raise ConversionError(
|
|
208
|
+
f'Cannot convert multi-element Series without target dimensions. '
|
|
209
|
+
f'Series has {len(data)} elements but no target dimensions specified.'
|
|
210
|
+
)
|
|
211
|
+
return xr.DataArray(data.iloc[0])
|
|
193
212
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
if data.size != 1:
|
|
200
|
-
raise ConversionError('When converting to dimensionless DataArray, source must be scalar')
|
|
201
|
-
return xr.DataArray(data.values.item())
|
|
202
|
-
|
|
203
|
-
# Check if data already has matching dimensions and coordinates
|
|
204
|
-
if set(data.dims) == set(dims):
|
|
205
|
-
# Check if coordinates match
|
|
206
|
-
is_compatible = True
|
|
207
|
-
for dim in dims:
|
|
208
|
-
if dim in data.dims and not np.array_equal(data.coords[dim].values, coords[dim].values):
|
|
209
|
-
is_compatible = False
|
|
210
|
-
break
|
|
211
|
-
|
|
212
|
-
if is_compatible:
|
|
213
|
-
# Ensure dimensions are in the correct order
|
|
214
|
-
if data.dims != dims:
|
|
215
|
-
# Transpose to get dimensions in the right order
|
|
216
|
-
return data.transpose(*dims).copy(deep=True)
|
|
217
|
-
else:
|
|
218
|
-
# Return existing DataArray if compatible and order is correct
|
|
219
|
-
return data.copy(deep=True)
|
|
220
|
-
|
|
221
|
-
# Handle dimension broadcasting
|
|
222
|
-
if len(data.dims) == 1 and len(dims) == 2:
|
|
223
|
-
# Single dimension to two dimensions
|
|
224
|
-
if data.dims[0] == 'time' and 'scenario' in dims:
|
|
225
|
-
# Broadcast time dimension to include scenarios
|
|
226
|
-
return DataConverter._broadcast_time_to_scenarios(data, coords, dims)
|
|
227
|
-
|
|
228
|
-
elif data.dims[0] == 'scenario' and 'time' in dims:
|
|
229
|
-
# Broadcast scenario dimension to include time
|
|
230
|
-
return DataConverter._broadcast_scenario_to_time(data, coords, dims)
|
|
213
|
+
# Attempt exact index matching with each target dimension
|
|
214
|
+
for dim_name in target_dims:
|
|
215
|
+
target_index = target_coords[dim_name]
|
|
216
|
+
if data.index.equals(target_index):
|
|
217
|
+
return xr.DataArray(data.values.copy(), coords={dim_name: target_index}, dims=dim_name)
|
|
231
218
|
|
|
219
|
+
# No exact matches found
|
|
220
|
+
available_lengths = {dim: len(target_coords[dim]) for dim in target_dims}
|
|
232
221
|
raise ConversionError(
|
|
233
|
-
f'
|
|
222
|
+
f'Series index does not match any target dimension coordinates. '
|
|
223
|
+
f'Series length: {len(data)}, available coordinate lengths: {available_lengths}'
|
|
234
224
|
)
|
|
235
|
-
@staticmethod
|
|
236
|
-
def _broadcast_time_to_scenarios(
|
|
237
|
-
data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
|
|
238
|
-
) -> xr.DataArray:
|
|
239
|
-
"""
|
|
240
|
-
Broadcast a time-only DataArray to include scenarios.
|
|
241
|
-
|
|
242
|
-
Args:
|
|
243
|
-
data: The time-indexed DataArray
|
|
244
|
-
coords: Target coordinates
|
|
245
|
-
dims: Target dimensions
|
|
246
|
-
|
|
247
|
-
Returns:
|
|
248
|
-
DataArray with time and scenario dimensions
|
|
249
|
-
"""
|
|
250
|
-
# Check compatibility
|
|
251
|
-
if not np.array_equal(data.coords['time'].values, coords['time'].values):
|
|
252
|
-
raise ConversionError("Source time coordinates don't match target time coordinates")
|
|
253
|
-
|
|
254
|
-
if len(coords['scenario']) <= 1:
|
|
255
|
-
return data.copy(deep=True)
|
|
256
|
-
|
|
257
|
-
# Broadcast values
|
|
258
|
-
values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
|
|
259
|
-
return xr.DataArray(values.copy(), coords=coords, dims=dims)
|
|
260
225
|
|
|
261
226
|
@staticmethod
|
|
262
|
-
def
|
|
263
|
-
data:
|
|
227
|
+
def _match_1d_array_by_length(
|
|
228
|
+
data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
|
|
264
229
|
) -> xr.DataArray:
|
|
265
230
|
"""
|
|
266
|
-
|
|
231
|
+
Match 1D numpy array to target dimension by length comparison.
|
|
232
|
+
|
|
233
|
+
Finds target dimensions whose coordinate length matches the array length.
|
|
234
|
+
Requires unique length match to avoid ambiguity.
|
|
267
235
|
|
|
268
236
|
Args:
|
|
269
|
-
data:
|
|
270
|
-
|
|
271
|
-
|
|
237
|
+
data: 1D numpy array to convert
|
|
238
|
+
target_coords: Available target coordinates {dim_name: coordinate_index}
|
|
239
|
+
target_dims: Target dimension names to consider for matching
|
|
272
240
|
|
|
273
241
|
Returns:
|
|
274
|
-
DataArray with
|
|
275
|
-
"""
|
|
276
|
-
# Check compatibility
|
|
277
|
-
if not np.array_equal(data.coords['scenario'].values, coords['scenario'].values):
|
|
278
|
-
raise ConversionError("Source scenario coordinates don't match target scenario coordinates")
|
|
279
|
-
|
|
280
|
-
# Broadcast values
|
|
281
|
-
values = np.repeat(data.values[:, np.newaxis], len(coords['time']), axis=1).T
|
|
282
|
-
return xr.DataArray(values.copy(), coords=coords, dims=dims)
|
|
242
|
+
DataArray with array matched to the uniquely identified dimension
|
|
283
243
|
|
|
284
|
-
|
|
285
|
-
|
|
244
|
+
Raises:
|
|
245
|
+
ConversionError: If array length matches zero or multiple target dimensions,
|
|
246
|
+
or if no target dimensions provided for multi-element array
|
|
286
247
|
"""
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
248
|
+
# Handle edge case: no target dimensions
|
|
249
|
+
if len(target_dims) == 0:
|
|
250
|
+
if len(data) != 1:
|
|
251
|
+
raise ConversionError(
|
|
252
|
+
f'Cannot convert multi-element array without target dimensions. Array has {len(data)} elements.'
|
|
253
|
+
)
|
|
254
|
+
return xr.DataArray(data[0])
|
|
293
255
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
if len(dims) == 0:
|
|
299
|
-
if data.size != 1:
|
|
300
|
-
raise ConversionError('Without dimensions, can only convert scalar arrays')
|
|
301
|
-
return xr.DataArray(data.item())
|
|
256
|
+
# Find all dimensions with matching lengths
|
|
257
|
+
array_length = len(data)
|
|
258
|
+
matching_dims = []
|
|
259
|
+
coordinate_lengths = {}
|
|
302
260
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
261
|
+
for dim_name in target_dims:
|
|
262
|
+
coord_length = len(target_coords[dim_name])
|
|
263
|
+
coordinate_lengths[dim_name] = coord_length
|
|
264
|
+
if array_length == coord_length:
|
|
265
|
+
matching_dims.append(dim_name)
|
|
306
266
|
|
|
307
|
-
#
|
|
308
|
-
|
|
309
|
-
|
|
267
|
+
# Validate matching results
|
|
268
|
+
if len(matching_dims) == 0:
|
|
269
|
+
raise ConversionError(
|
|
270
|
+
f'Array length {array_length} does not match any target dimension lengths: {coordinate_lengths}'
|
|
271
|
+
)
|
|
272
|
+
elif len(matching_dims) > 1:
|
|
273
|
+
raise ConversionError(
|
|
274
|
+
f'Array length {array_length} matches multiple dimensions: {matching_dims}. '
|
|
275
|
+
f'Cannot uniquely determine target dimension. Consider using explicit '
|
|
276
|
+
f'dimension specification or converting to DataArray manually.'
|
|
277
|
+
)
|
|
310
278
|
|
|
311
|
-
|
|
312
|
-
|
|
279
|
+
# Create DataArray with the uniquely matched dimension
|
|
280
|
+
matched_dim = matching_dims[0]
|
|
281
|
+
return xr.DataArray(data.copy(), coords={matched_dim: target_coords[matched_dim]}, dims=matched_dim)
|
|
313
282
|
|
|
314
283
|
@staticmethod
|
|
315
|
-
def
|
|
316
|
-
data: np.ndarray,
|
|
284
|
+
def _match_multidim_array_by_shape_permutation(
|
|
285
|
+
data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
|
|
317
286
|
) -> xr.DataArray:
|
|
318
287
|
"""
|
|
319
|
-
|
|
288
|
+
Match multi-dimensional array to target dimensions using shape permutation analysis.
|
|
320
289
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
coords: Target coordinates
|
|
324
|
-
dims: Target dimensions (length 1)
|
|
325
|
-
|
|
326
|
-
Returns:
|
|
327
|
-
DataArray with single dimension
|
|
328
|
-
"""
|
|
329
|
-
dim_name = dims[0]
|
|
330
|
-
dim_length = len(coords[dim_name])
|
|
331
|
-
|
|
332
|
-
if data.ndim == 1:
|
|
333
|
-
# 1D array must match dimension length
|
|
334
|
-
if data.shape[0] != dim_length:
|
|
335
|
-
raise ConversionError(f"Array length {data.shape[0]} doesn't match {dim_name} length {dim_length}")
|
|
336
|
-
return xr.DataArray(data, coords=coords, dims=dims)
|
|
337
|
-
else:
|
|
338
|
-
raise ConversionError(f'Expected 1D array for single dimension, got {data.ndim}D')
|
|
339
|
-
|
|
340
|
-
@staticmethod
|
|
341
|
-
def _convert_ndarray_two_dims(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
|
|
342
|
-
"""
|
|
343
|
-
Convert a NumPy array to a two-dimension DataArray.
|
|
290
|
+
Analyzes all possible mappings between array shape and target coordinate lengths
|
|
291
|
+
to find the unique valid dimension assignment.
|
|
344
292
|
|
|
345
293
|
Args:
|
|
346
|
-
data:
|
|
347
|
-
|
|
348
|
-
|
|
294
|
+
data: Multi-dimensional numpy array to convert
|
|
295
|
+
target_coords: Available target coordinates {dim_name: coordinate_index}
|
|
296
|
+
target_dims: Target dimension names to consider for matching
|
|
349
297
|
|
|
350
298
|
Returns:
|
|
351
|
-
DataArray with
|
|
352
|
-
"""
|
|
353
|
-
scenario_length = len(coords['scenario'])
|
|
354
|
-
time_length = len(coords['time'])
|
|
355
|
-
|
|
356
|
-
if data.ndim == 1:
|
|
357
|
-
# For 1D array, create 2D array based on which dimension it matches
|
|
358
|
-
if data.shape[0] == time_length:
|
|
359
|
-
# Broadcast across scenarios
|
|
360
|
-
values = np.repeat(data[:, np.newaxis], scenario_length, axis=1)
|
|
361
|
-
return xr.DataArray(values, coords=coords, dims=dims)
|
|
362
|
-
elif data.shape[0] == scenario_length:
|
|
363
|
-
# Broadcast across time
|
|
364
|
-
values = np.repeat(data[np.newaxis, :], time_length, axis=0)
|
|
365
|
-
return xr.DataArray(values, coords=coords, dims=dims)
|
|
366
|
-
else:
|
|
367
|
-
raise ConversionError(f"1D array length {data.shape[0]} doesn't match either dimension")
|
|
368
|
-
|
|
369
|
-
elif data.ndim == 2:
|
|
370
|
-
# For 2D array, shape must match dimensions
|
|
371
|
-
expected_shape = (time_length, scenario_length)
|
|
372
|
-
if data.shape != expected_shape:
|
|
373
|
-
raise ConversionError(f"2D array shape {data.shape} doesn't match expected shape {expected_shape}")
|
|
374
|
-
return xr.DataArray(data, coords=coords, dims=dims)
|
|
375
|
-
|
|
376
|
-
else:
|
|
377
|
-
raise ConversionError(f'Expected 1D or 2D array for two dimensions, got {data.ndim}D')
|
|
378
|
-
|
|
379
|
-
@staticmethod
|
|
380
|
-
def _convert_series(data: pd.Series, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
|
|
381
|
-
"""
|
|
382
|
-
Convert pandas Series to xarray DataArray.
|
|
383
|
-
|
|
384
|
-
Args:
|
|
385
|
-
data: pandas Series to convert
|
|
386
|
-
coords: Target coordinates
|
|
387
|
-
dims: Target dimensions
|
|
299
|
+
DataArray with array dimensions mapped to target dimensions by shape
|
|
388
300
|
|
|
389
|
-
|
|
390
|
-
|
|
301
|
+
Raises:
|
|
302
|
+
ConversionError: If array shape cannot be uniquely mapped to target dimensions,
|
|
303
|
+
or if no target dimensions provided for multi-element array
|
|
391
304
|
"""
|
|
392
|
-
# Handle
|
|
393
|
-
if len(
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
# Check if series index matches the dimension
|
|
397
|
-
if data.index.equals(coords[dim_name]):
|
|
398
|
-
return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
|
|
399
|
-
else:
|
|
400
|
-
raise ConversionError(
|
|
401
|
-
f"Series index doesn't match {dim_name} coordinates.\n"
|
|
402
|
-
f'Series index: {data.index}\n'
|
|
403
|
-
f'Target {dim_name} coordinates: {coords[dim_name]}'
|
|
404
|
-
)
|
|
405
|
-
|
|
406
|
-
# Handle two dimensions case
|
|
407
|
-
elif len(dims) == 2:
|
|
408
|
-
# Check if dimensions are time and scenario
|
|
409
|
-
if dims != ('time', 'scenario'):
|
|
410
|
-
raise ConversionError(
|
|
411
|
-
f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}'
|
|
412
|
-
)
|
|
413
|
-
|
|
414
|
-
# Case 1: Series is indexed by time
|
|
415
|
-
if data.index.equals(coords['time']):
|
|
416
|
-
# Broadcast across scenarios
|
|
417
|
-
values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
|
|
418
|
-
return xr.DataArray(values.copy(), coords=coords, dims=dims)
|
|
419
|
-
|
|
420
|
-
# Case 2: Series is indexed by scenario
|
|
421
|
-
elif data.index.equals(coords['scenario']):
|
|
422
|
-
# Broadcast across time
|
|
423
|
-
values = np.repeat(data.values[np.newaxis, :], len(coords['time']), axis=0)
|
|
424
|
-
return xr.DataArray(values.copy(), coords=coords, dims=dims)
|
|
425
|
-
|
|
426
|
-
else:
|
|
305
|
+
# Handle edge case: no target dimensions
|
|
306
|
+
if len(target_dims) == 0:
|
|
307
|
+
if data.size != 1:
|
|
427
308
|
raise ConversionError(
|
|
428
|
-
|
|
429
|
-
f'
|
|
430
|
-
f'Target time coordinates: {coords["time"]}\n'
|
|
431
|
-
f'Target scenario coordinates: {coords["scenario"]}'
|
|
309
|
+
f'Cannot convert multi-element array without target dimensions. '
|
|
310
|
+
f'Array has {data.size} elements with shape {data.shape}.'
|
|
432
311
|
)
|
|
312
|
+
return xr.DataArray(data.item())
|
|
433
313
|
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
@staticmethod
|
|
438
|
-
def _convert_dataframe(data: pd.DataFrame, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
|
|
439
|
-
"""
|
|
440
|
-
Convert pandas DataFrame to xarray DataArray.
|
|
441
|
-
Only allows time as index and scenarios as columns.
|
|
442
|
-
|
|
443
|
-
Args:
|
|
444
|
-
data: pandas DataFrame to convert
|
|
445
|
-
coords: Target coordinates
|
|
446
|
-
dims: Target dimensions
|
|
314
|
+
array_shape = data.shape
|
|
315
|
+
coordinate_lengths = {dim: len(target_coords[dim]) for dim in target_dims}
|
|
447
316
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
if
|
|
455
|
-
|
|
456
|
-
return DataConverter._convert_series(series, coords, dims)
|
|
317
|
+
# Find all valid dimension permutations that match the array shape
|
|
318
|
+
valid_mappings = []
|
|
319
|
+
for dim_permutation in permutations(target_dims, data.ndim):
|
|
320
|
+
shape_matches = all(
|
|
321
|
+
array_shape[i] == coordinate_lengths[dim_permutation[i]] for i in range(len(dim_permutation))
|
|
322
|
+
)
|
|
323
|
+
if shape_matches:
|
|
324
|
+
valid_mappings.append(dim_permutation)
|
|
457
325
|
|
|
326
|
+
# Validate mapping results
|
|
327
|
+
if len(valid_mappings) == 0:
|
|
458
328
|
raise ConversionError(
|
|
459
|
-
f'
|
|
329
|
+
f'Array shape {array_shape} cannot be mapped to any combination of target '
|
|
330
|
+
f'coordinate lengths: {coordinate_lengths}. Consider reshaping the array '
|
|
331
|
+
f'or adjusting target coordinates.'
|
|
460
332
|
)
|
|
461
333
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
)
|
|
469
|
-
|
|
470
|
-
# DataFrame must have time as index and scenarios as columns
|
|
471
|
-
if data.index.equals(coords['time']) and data.columns.equals(coords['scenario']):
|
|
472
|
-
# Create DataArray with proper dimension order
|
|
473
|
-
return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
|
|
474
|
-
else:
|
|
475
|
-
raise ConversionError(
|
|
476
|
-
'DataFrame must have time as index and scenarios as columns.\n'
|
|
477
|
-
f'DataFrame index: {data.index}\n'
|
|
478
|
-
f'DataFrame columns: {data.columns}\n'
|
|
479
|
-
f'Target time coordinates: {coords["time"]}\n'
|
|
480
|
-
f'Target scenario coordinates: {coords["scenario"]}'
|
|
481
|
-
)
|
|
482
|
-
|
|
483
|
-
else:
|
|
484
|
-
raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}')
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
class TimeSeriesData:
|
|
488
|
-
# TODO: Move to Interface.py
|
|
489
|
-
def __init__(self, data: TimestepData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None):
|
|
490
|
-
"""
|
|
491
|
-
timeseries class for transmit timeseries AND special characteristics of timeseries,
|
|
492
|
-
i.g. to define weights needed in calculation_type 'aggregated'
|
|
493
|
-
EXAMPLE solar:
|
|
494
|
-
you have several solar timeseries. These should not be overweighted
|
|
495
|
-
compared to the remaining timeseries (i.g. heat load, price)!
|
|
496
|
-
fixed_relative_profile_solar1 = TimeSeriesData(sol_array_1, type = 'solar')
|
|
497
|
-
fixed_relative_profile_solar2 = TimeSeriesData(sol_array_2, type = 'solar')
|
|
498
|
-
fixed_relative_profile_solar3 = TimeSeriesData(sol_array_3, type = 'solar')
|
|
499
|
-
--> this 3 series of same type share one weight, i.e. internally assigned each weight = 1/3
|
|
500
|
-
(instead of standard weight = 1)
|
|
501
|
-
|
|
502
|
-
Args:
|
|
503
|
-
data: The timeseries data, which can be a scalar, array, or numpy array.
|
|
504
|
-
agg_group: The group this TimeSeriesData is a part of. agg_weight is split between members of a group. Default is None.
|
|
505
|
-
agg_weight: The weight for calculation_type 'aggregated', should be between 0 and 1. Default is None.
|
|
506
|
-
|
|
507
|
-
Raises:
|
|
508
|
-
Exception: If both agg_group and agg_weight are set, an exception is raised.
|
|
509
|
-
"""
|
|
510
|
-
self.data = data
|
|
511
|
-
self.agg_group = agg_group
|
|
512
|
-
self.agg_weight = agg_weight
|
|
513
|
-
if (agg_group is not None) and (agg_weight is not None):
|
|
514
|
-
raise ValueError('Either <agg_group> or explicit <agg_weigth> can be used. Not both!')
|
|
515
|
-
self.label: Optional[str] = None
|
|
516
|
-
|
|
517
|
-
def __repr__(self):
|
|
518
|
-
# Get the constructor arguments and their current values
|
|
519
|
-
init_signature = inspect.signature(self.__init__)
|
|
520
|
-
init_args = init_signature.parameters
|
|
521
|
-
|
|
522
|
-
# Create a dictionary with argument names and their values
|
|
523
|
-
args_str = ', '.join(f'{name}={repr(getattr(self, name, None))}' for name in init_args if name != 'self')
|
|
524
|
-
return f'{self.__class__.__name__}({args_str})'
|
|
525
|
-
|
|
526
|
-
def __str__(self):
|
|
527
|
-
return str(self.data)
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
class TimeSeries:
|
|
531
|
-
"""
|
|
532
|
-
A class representing time series data with active and stored states.
|
|
533
|
-
|
|
534
|
-
TimeSeries provides a way to store time-indexed data and work with temporal subsets.
|
|
535
|
-
It supports arithmetic operations, aggregation, and JSON serialization.
|
|
334
|
+
if len(valid_mappings) > 1:
|
|
335
|
+
raise ConversionError(
|
|
336
|
+
f'Array shape {array_shape} matches multiple dimension combinations: '
|
|
337
|
+
f'{valid_mappings}. Cannot uniquely determine dimension mapping. '
|
|
338
|
+
f'Consider using explicit dimension specification.'
|
|
339
|
+
)
|
|
536
340
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
aggregation_group (Optional[str]): Group name for shared aggregation weighting
|
|
541
|
-
has_extra_timestep (bool): Whether this series needs an extra timestep
|
|
542
|
-
"""
|
|
341
|
+
# Create DataArray with the uniquely determined mapping
|
|
342
|
+
matched_dims = valid_mappings[0]
|
|
343
|
+
matched_coords = {dim: target_coords[dim] for dim in matched_dims}
|
|
543
344
|
|
|
544
|
-
|
|
545
|
-
def from_datasource(
|
|
546
|
-
cls,
|
|
547
|
-
data: NumericDataTS,
|
|
548
|
-
name: str,
|
|
549
|
-
timesteps: pd.DatetimeIndex,
|
|
550
|
-
scenarios: Optional[pd.Index] = None,
|
|
551
|
-
aggregation_weight: Optional[float] = None,
|
|
552
|
-
aggregation_group: Optional[str] = None,
|
|
553
|
-
has_extra_timestep: bool = False,
|
|
554
|
-
) -> 'TimeSeries':
|
|
555
|
-
"""
|
|
556
|
-
Initialize the TimeSeries from multiple data sources.
|
|
345
|
+
return xr.DataArray(data.copy(), coords=matched_coords, dims=matched_dims)
|
|
557
346
|
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
scenarios: The scenarios of the TimeSeries
|
|
563
|
-
aggregation_weight: The weight in aggregation calculations
|
|
564
|
-
aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing
|
|
565
|
-
has_extra_timestep: Whether this series requires an extra timestep
|
|
566
|
-
|
|
567
|
-
Returns:
|
|
568
|
-
A new TimeSeries instance
|
|
347
|
+
@staticmethod
|
|
348
|
+
def _broadcast_dataarray_to_target_specification(
|
|
349
|
+
source_data: xr.DataArray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
|
|
350
|
+
) -> xr.DataArray:
|
|
569
351
|
"""
|
|
570
|
-
|
|
571
|
-
DataConverter.as_dataarray(data, timesteps, scenarios),
|
|
572
|
-
name,
|
|
573
|
-
aggregation_weight,
|
|
574
|
-
aggregation_group,
|
|
575
|
-
has_extra_timestep,
|
|
576
|
-
)
|
|
352
|
+
Broadcast DataArray to conform to target coordinate and dimension specification.
|
|
577
353
|
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
Load a TimeSeries from a dictionary or json file.
|
|
354
|
+
Performs comprehensive validation and broadcasting to ensure the result exactly
|
|
355
|
+
matches the target specification. Handles scalar expansion, dimension validation,
|
|
356
|
+
coordinate compatibility checking, and broadcasting to additional dimensions.
|
|
582
357
|
|
|
583
358
|
Args:
|
|
584
|
-
|
|
585
|
-
|
|
359
|
+
source_data: Source DataArray to broadcast
|
|
360
|
+
target_coords: Target coordinates {dim_name: coordinate_index}
|
|
361
|
+
target_dims: Target dimension names in desired order
|
|
586
362
|
|
|
587
363
|
Returns:
|
|
588
|
-
|
|
364
|
+
DataArray broadcast to target specification with proper dimension ordering
|
|
589
365
|
|
|
590
366
|
Raises:
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
if (path is None and data is None) or (path is not None and data is not None):
|
|
594
|
-
raise ValueError("Exactly one of 'path' or 'data' must be provided")
|
|
595
|
-
|
|
596
|
-
if path is not None:
|
|
597
|
-
with open(path, 'r') as f:
|
|
598
|
-
data = json.load(f)
|
|
599
|
-
|
|
600
|
-
# Convert ISO date strings to datetime objects
|
|
601
|
-
data['data']['coords']['time']['data'] = pd.to_datetime(data['data']['coords']['time']['data'])
|
|
602
|
-
|
|
603
|
-
# Create the TimeSeries instance
|
|
604
|
-
return cls(
|
|
605
|
-
data=xr.DataArray.from_dict(data['data']),
|
|
606
|
-
name=data['name'],
|
|
607
|
-
aggregation_weight=data['aggregation_weight'],
|
|
608
|
-
aggregation_group=data['aggregation_group'],
|
|
609
|
-
has_extra_timestep=data['has_extra_timestep'],
|
|
610
|
-
)
|
|
611
|
-
|
|
612
|
-
def __init__(
|
|
613
|
-
self,
|
|
614
|
-
data: xr.DataArray,
|
|
615
|
-
name: str,
|
|
616
|
-
aggregation_weight: Optional[float] = None,
|
|
617
|
-
aggregation_group: Optional[str] = None,
|
|
618
|
-
has_extra_timestep: bool = False,
|
|
619
|
-
):
|
|
367
|
+
ConversionError: If broadcasting is impossible due to incompatible dimensions
|
|
368
|
+
or coordinate mismatches
|
|
620
369
|
"""
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
has_extra_timestep: Whether this series requires an extra timestep
|
|
629
|
-
|
|
630
|
-
Raises:
|
|
631
|
-
ValueError: If data has unsupported dimensions
|
|
632
|
-
"""
|
|
633
|
-
allowed_dims = {'time', 'scenario'}
|
|
634
|
-
if not set(data.dims).issubset(allowed_dims):
|
|
635
|
-
raise ValueError(f'DataArray dimensions must be subset of {allowed_dims}. Got {data.dims}')
|
|
370
|
+
# Validate: cannot reduce dimensions
|
|
371
|
+
if len(source_data.dims) > len(target_dims):
|
|
372
|
+
raise ConversionError(
|
|
373
|
+
f'Cannot reduce DataArray dimensionality from {len(source_data.dims)} '
|
|
374
|
+
f'to {len(target_dims)} dimensions. Source dims: {source_data.dims}, '
|
|
375
|
+
f'target dims: {target_dims}'
|
|
376
|
+
)
|
|
636
377
|
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
378
|
+
# Validate: all source dimensions must exist in target
|
|
379
|
+
missing_dims = set(source_data.dims) - set(target_dims)
|
|
380
|
+
if missing_dims:
|
|
381
|
+
raise ConversionError(
|
|
382
|
+
f'Source DataArray has dimensions {missing_dims} not present in target dimensions {target_dims}'
|
|
383
|
+
)
|
|
641
384
|
|
|
642
|
-
#
|
|
643
|
-
|
|
644
|
-
|
|
385
|
+
# Validate: coordinate compatibility for overlapping dimensions
|
|
386
|
+
for dim in source_data.dims:
|
|
387
|
+
if dim in source_data.coords and dim in target_coords:
|
|
388
|
+
source_coords = source_data.coords[dim]
|
|
389
|
+
target_coords_for_dim = target_coords[dim]
|
|
645
390
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
391
|
+
if not np.array_equal(source_coords.values, target_coords_for_dim.values):
|
|
392
|
+
raise ConversionError(
|
|
393
|
+
f'Coordinate mismatch for dimension "{dim}". '
|
|
394
|
+
f'Source and target coordinates have different values.'
|
|
395
|
+
)
|
|
649
396
|
|
|
650
|
-
#
|
|
651
|
-
|
|
652
|
-
|
|
397
|
+
# Create target template for broadcasting
|
|
398
|
+
target_shape = [len(target_coords[dim]) for dim in target_dims]
|
|
399
|
+
target_template = xr.DataArray(np.empty(target_shape), coords=target_coords, dims=target_dims)
|
|
653
400
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
This is equivalent to clearing all selections.
|
|
658
|
-
"""
|
|
659
|
-
self.set_selection(None, None)
|
|
401
|
+
# Perform broadcasting and ensure proper dimension ordering
|
|
402
|
+
broadcasted = source_data.broadcast_like(target_template)
|
|
403
|
+
return broadcasted.transpose(*target_dims)
|
|
660
404
|
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
405
|
+
@classmethod
|
|
406
|
+
def to_dataarray(
|
|
407
|
+
cls,
|
|
408
|
+
data: int
|
|
409
|
+
| float
|
|
410
|
+
| bool
|
|
411
|
+
| np.integer
|
|
412
|
+
| np.floating
|
|
413
|
+
| np.bool_
|
|
414
|
+
| np.ndarray
|
|
415
|
+
| pd.Series
|
|
416
|
+
| pd.DataFrame
|
|
417
|
+
| xr.DataArray,
|
|
418
|
+
coords: dict[str, pd.Index] | None = None,
|
|
419
|
+
) -> xr.DataArray:
|
|
664
420
|
"""
|
|
665
|
-
|
|
666
|
-
self.reset()
|
|
421
|
+
Convert various data types to xarray.DataArray with specified target coordinates.
|
|
667
422
|
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
423
|
+
This is the main conversion method that intelligently handles different input types
|
|
424
|
+
and ensures the result conforms to the specified coordinate structure through
|
|
425
|
+
smart dimension matching and broadcasting.
|
|
671
426
|
|
|
672
427
|
Args:
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
'aggregation_weight': self.aggregation_weight,
|
|
681
|
-
'aggregation_group': self.aggregation_group,
|
|
682
|
-
'has_extra_timestep': self.has_extra_timestep,
|
|
683
|
-
'data': self.selected_data.to_dict(),
|
|
684
|
-
}
|
|
685
|
-
|
|
686
|
-
# Convert datetime objects to ISO strings
|
|
687
|
-
data['data']['coords']['time']['data'] = [date.isoformat() for date in data['data']['coords']['time']['data']]
|
|
688
|
-
|
|
689
|
-
# Save to file if path is provided
|
|
690
|
-
if path is not None:
|
|
691
|
-
indent = 4 if len(self.selected_timesteps) <= 480 else None
|
|
692
|
-
with open(path, 'w', encoding='utf-8') as f:
|
|
693
|
-
json.dump(data, f, indent=indent, ensure_ascii=False)
|
|
694
|
-
|
|
695
|
-
return data
|
|
696
|
-
|
|
697
|
-
@property
|
|
698
|
-
def stats(self) -> str:
|
|
699
|
-
"""
|
|
700
|
-
Return a statistical summary of the active data.
|
|
428
|
+
data: Input data to convert. Supported types:
|
|
429
|
+
- Scalars: int, float, bool, np.integer, np.floating, np.bool_
|
|
430
|
+
- Arrays: np.ndarray (1D and multi-dimensional)
|
|
431
|
+
- Pandas: pd.Series, pd.DataFrame
|
|
432
|
+
- xarray: xr.DataArray
|
|
433
|
+
coords: Target coordinate specification as {dimension_name: coordinate_index}.
|
|
434
|
+
All coordinate indices must be pandas.Index objects.
|
|
701
435
|
|
|
702
436
|
Returns:
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
return get_numeric_stats(self.selected_data, padd=0, by_scenario=True)
|
|
437
|
+
DataArray conforming to the target coordinate specification,
|
|
438
|
+
with input data appropriately matched and broadcast
|
|
706
439
|
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
return np.unique(self.selected_data.values).size == 1
|
|
711
|
-
|
|
712
|
-
@property
|
|
713
|
-
def selected_data(self) -> xr.DataArray:
|
|
714
|
-
"""
|
|
715
|
-
Get a view of stored_data based on current selections.
|
|
716
|
-
This computes the view dynamically based on the current selection state.
|
|
717
|
-
"""
|
|
718
|
-
return self._stored_data.sel(**self._valid_selector)
|
|
719
|
-
|
|
720
|
-
@property
|
|
721
|
-
def selected_timesteps(self) -> Optional[pd.DatetimeIndex]:
|
|
722
|
-
"""Get the current active timesteps, or None if no time dimension."""
|
|
723
|
-
if not self.has_time_dim:
|
|
724
|
-
return None
|
|
725
|
-
if self._selected_timesteps is None:
|
|
726
|
-
return self._stored_data.indexes['time']
|
|
727
|
-
return self._selected_timesteps
|
|
728
|
-
|
|
729
|
-
@property
|
|
730
|
-
def active_scenarios(self) -> Optional[pd.Index]:
|
|
731
|
-
"""Get the current active scenarios, or None if no scenario dimension."""
|
|
732
|
-
if not self.has_scenario_dim:
|
|
733
|
-
return None
|
|
734
|
-
if self._selected_scenarios is None:
|
|
735
|
-
return self._stored_data.indexes['scenario']
|
|
736
|
-
return self._selected_scenarios
|
|
440
|
+
Raises:
|
|
441
|
+
ConversionError: If data type is unsupported, conversion fails,
|
|
442
|
+
or broadcasting to target coordinates is impossible
|
|
737
443
|
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
444
|
+
Examples:
|
|
445
|
+
# Scalar broadcasting
|
|
446
|
+
>>> coords = {'x': pd.Index([1, 2, 3]), 'y': pd.Index(['a', 'b'])}
|
|
447
|
+
>>> converter.to_dataarray(42, coords)
|
|
448
|
+
# Returns: DataArray with shape (3, 2), all values = 42
|
|
742
449
|
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
450
|
+
# Series index matching
|
|
451
|
+
>>> series = pd.Series([10, 20, 30], index=[1, 2, 3])
|
|
452
|
+
>>> converter.to_dataarray(series, coords)
|
|
453
|
+
# Returns: DataArray matched to 'x' dimension, broadcast to 'y'
|
|
746
454
|
|
|
747
|
-
|
|
748
|
-
|
|
455
|
+
# Array shape matching
|
|
456
|
+
>>> array = np.array([[1, 2], [3, 4], [5, 6]]) # Shape (3, 2)
|
|
457
|
+
>>> converter.to_dataarray(array, coords)
|
|
458
|
+
# Returns: DataArray with dimensions ('x', 'y') based on shape
|
|
749
459
|
"""
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
scenarios=self.active_scenarios if self.has_scenario_dim else None,
|
|
754
|
-
)
|
|
460
|
+
# Prepare and validate target specification
|
|
461
|
+
if coords is None:
|
|
462
|
+
coords = {}
|
|
755
463
|
|
|
756
|
-
|
|
757
|
-
if new_data.equals(self._stored_data):
|
|
758
|
-
return
|
|
464
|
+
validated_coords, target_dims = cls._validate_and_prepare_target_coordinates(coords)
|
|
759
465
|
|
|
760
|
-
|
|
761
|
-
|
|
466
|
+
# Convert input data to intermediate DataArray based on type
|
|
467
|
+
if isinstance(data, (int, float, bool, np.integer, np.floating, np.bool_)):
|
|
468
|
+
# Scalar values - create scalar DataArray
|
|
469
|
+
intermediate = xr.DataArray(data.item() if hasattr(data, 'item') else data)
|
|
762
470
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
# Only update timesteps if the series has time dimension
|
|
772
|
-
if self.has_time_dim:
|
|
773
|
-
if timesteps is None or timesteps.equals(self._stored_data.indexes['time']):
|
|
774
|
-
self._selected_timesteps = None
|
|
775
|
-
else:
|
|
776
|
-
self._selected_timesteps = timesteps
|
|
777
|
-
|
|
778
|
-
# Only update scenarios if the series has scenario dimension
|
|
779
|
-
if self.has_scenario_dim:
|
|
780
|
-
if scenarios is None or scenarios.equals(self._stored_data.indexes['scenario']):
|
|
781
|
-
self._selected_scenarios = None
|
|
471
|
+
elif isinstance(data, np.ndarray):
|
|
472
|
+
# NumPy arrays - dispatch based on dimensionality
|
|
473
|
+
if data.ndim == 0:
|
|
474
|
+
# 0-dimensional array (scalar)
|
|
475
|
+
intermediate = xr.DataArray(data.item())
|
|
476
|
+
elif data.ndim == 1:
|
|
477
|
+
# 1-dimensional array
|
|
478
|
+
intermediate = cls._match_1d_array_by_length(data, validated_coords, target_dims)
|
|
782
479
|
else:
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
@property
|
|
786
|
-
def sel(self):
|
|
787
|
-
"""Direct access to the selected_data's sel method for convenience."""
|
|
788
|
-
return self.selected_data.sel
|
|
789
|
-
|
|
790
|
-
@property
|
|
791
|
-
def isel(self):
|
|
792
|
-
"""Direct access to the selected_data's isel method for convenience."""
|
|
793
|
-
return self.selected_data.isel
|
|
794
|
-
|
|
795
|
-
@property
|
|
796
|
-
def _valid_selector(self) -> Dict[str, pd.Index]:
|
|
797
|
-
"""Get the current selection as a dictionary."""
|
|
798
|
-
selector = {}
|
|
799
|
-
|
|
800
|
-
# Only include time in selector if series has time dimension
|
|
801
|
-
if self.has_time_dim and self._selected_timesteps is not None:
|
|
802
|
-
selector['time'] = self._selected_timesteps
|
|
803
|
-
|
|
804
|
-
# Only include scenario in selector if series has scenario dimension
|
|
805
|
-
if self.has_scenario_dim and self._selected_scenarios is not None:
|
|
806
|
-
selector['scenario'] = self._selected_scenarios
|
|
807
|
-
|
|
808
|
-
return selector
|
|
809
|
-
|
|
810
|
-
def _apply_operation(self, other, op):
|
|
811
|
-
"""Apply an operation between this TimeSeries and another object."""
|
|
812
|
-
if isinstance(other, TimeSeries):
|
|
813
|
-
other = other.selected_data
|
|
814
|
-
return op(self.selected_data, other)
|
|
815
|
-
|
|
816
|
-
def __add__(self, other):
|
|
817
|
-
return self._apply_operation(other, lambda x, y: x + y)
|
|
818
|
-
|
|
819
|
-
def __sub__(self, other):
|
|
820
|
-
return self._apply_operation(other, lambda x, y: x - y)
|
|
821
|
-
|
|
822
|
-
def __mul__(self, other):
|
|
823
|
-
return self._apply_operation(other, lambda x, y: x * y)
|
|
824
|
-
|
|
825
|
-
def __truediv__(self, other):
|
|
826
|
-
return self._apply_operation(other, lambda x, y: x / y)
|
|
827
|
-
|
|
828
|
-
def __radd__(self, other):
|
|
829
|
-
return other + self.selected_data
|
|
830
|
-
|
|
831
|
-
def __rsub__(self, other):
|
|
832
|
-
return other - self.selected_data
|
|
833
|
-
|
|
834
|
-
def __rmul__(self, other):
|
|
835
|
-
return other * self.selected_data
|
|
836
|
-
|
|
837
|
-
def __rtruediv__(self, other):
|
|
838
|
-
return other / self.selected_data
|
|
839
|
-
|
|
840
|
-
def __neg__(self) -> xr.DataArray:
|
|
841
|
-
return -self.selected_data
|
|
842
|
-
|
|
843
|
-
def __pos__(self) -> xr.DataArray:
|
|
844
|
-
return +self.selected_data
|
|
845
|
-
|
|
846
|
-
def __abs__(self) -> xr.DataArray:
|
|
847
|
-
return abs(self.selected_data)
|
|
848
|
-
|
|
849
|
-
def __gt__(self, other):
|
|
850
|
-
"""
|
|
851
|
-
Compare if this TimeSeries is greater than another.
|
|
852
|
-
|
|
853
|
-
Args:
|
|
854
|
-
other: Another TimeSeries to compare with
|
|
855
|
-
|
|
856
|
-
Returns:
|
|
857
|
-
True if all values in this TimeSeries are greater than other
|
|
858
|
-
"""
|
|
859
|
-
if isinstance(other, TimeSeries):
|
|
860
|
-
return self.selected_data > other.selected_data
|
|
861
|
-
return self.selected_data > other
|
|
862
|
-
|
|
863
|
-
def __ge__(self, other):
|
|
864
|
-
"""
|
|
865
|
-
Compare if this TimeSeries is greater than or equal to another.
|
|
866
|
-
|
|
867
|
-
Args:
|
|
868
|
-
other: Another TimeSeries to compare with
|
|
869
|
-
|
|
870
|
-
Returns:
|
|
871
|
-
True if all values in this TimeSeries are greater than or equal to other
|
|
872
|
-
"""
|
|
873
|
-
if isinstance(other, TimeSeries):
|
|
874
|
-
return self.selected_data >= other.selected_data
|
|
875
|
-
return self.selected_data >= other
|
|
876
|
-
|
|
877
|
-
def __lt__(self, other):
|
|
878
|
-
"""
|
|
879
|
-
Compare if this TimeSeries is less than another.
|
|
880
|
-
|
|
881
|
-
Args:
|
|
882
|
-
other: Another TimeSeries to compare with
|
|
883
|
-
|
|
884
|
-
Returns:
|
|
885
|
-
True if all values in this TimeSeries are less than other
|
|
886
|
-
"""
|
|
887
|
-
if isinstance(other, TimeSeries):
|
|
888
|
-
return self.selected_data < other.selected_data
|
|
889
|
-
return self.selected_data < other
|
|
890
|
-
|
|
891
|
-
def __le__(self, other):
|
|
892
|
-
"""
|
|
893
|
-
Compare if this TimeSeries is less than or equal to another.
|
|
894
|
-
|
|
895
|
-
Args:
|
|
896
|
-
other: Another TimeSeries to compare with
|
|
897
|
-
|
|
898
|
-
Returns:
|
|
899
|
-
True if all values in this TimeSeries are less than or equal to other
|
|
900
|
-
"""
|
|
901
|
-
if isinstance(other, TimeSeries):
|
|
902
|
-
return self.selected_data <= other.selected_data
|
|
903
|
-
return self.selected_data <= other
|
|
904
|
-
|
|
905
|
-
def __eq__(self, other):
|
|
906
|
-
"""
|
|
907
|
-
Compare if this TimeSeries is equal to another.
|
|
908
|
-
|
|
909
|
-
Args:
|
|
910
|
-
other: Another TimeSeries to compare with
|
|
911
|
-
|
|
912
|
-
Returns:
|
|
913
|
-
True if all values in this TimeSeries are equal to other
|
|
914
|
-
"""
|
|
915
|
-
if isinstance(other, TimeSeries):
|
|
916
|
-
return self.selected_data == other.selected_data
|
|
917
|
-
return self.selected_data == other
|
|
918
|
-
|
|
919
|
-
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
|
|
920
|
-
"""
|
|
921
|
-
Handle NumPy universal functions.
|
|
922
|
-
|
|
923
|
-
This allows NumPy functions to work with TimeSeries objects.
|
|
924
|
-
"""
|
|
925
|
-
# Convert any TimeSeries inputs to their selected_data
|
|
926
|
-
inputs = [x.selected_data if isinstance(x, TimeSeries) else x for x in inputs]
|
|
927
|
-
return getattr(ufunc, method)(*inputs, **kwargs)
|
|
928
|
-
|
|
929
|
-
def __repr__(self):
|
|
930
|
-
"""
|
|
931
|
-
Get a string representation of the TimeSeries.
|
|
932
|
-
|
|
933
|
-
Returns:
|
|
934
|
-
String showing TimeSeries details
|
|
935
|
-
"""
|
|
936
|
-
attrs = {
|
|
937
|
-
'name': self.name,
|
|
938
|
-
'aggregation_weight': self.aggregation_weight,
|
|
939
|
-
'aggregation_group': self.aggregation_group,
|
|
940
|
-
'has_extra_timestep': self.has_extra_timestep,
|
|
941
|
-
'shape': self.selected_data.shape,
|
|
942
|
-
}
|
|
943
|
-
|
|
944
|
-
attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items())
|
|
945
|
-
return f'TimeSeries({attr_str})'
|
|
946
|
-
|
|
947
|
-
def __str__(self):
|
|
948
|
-
"""
|
|
949
|
-
Get a human-readable string representation.
|
|
950
|
-
|
|
951
|
-
Returns:
|
|
952
|
-
Descriptive string with statistics
|
|
953
|
-
"""
|
|
954
|
-
return f'TimeSeries "{self.name}":\n{textwrap.indent(self.stats, " ")}'
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
class TimeSeriesCollection:
|
|
958
|
-
"""
|
|
959
|
-
Simplified central manager for time series data with reference tracking.
|
|
960
|
-
|
|
961
|
-
Provides a way to store time series data and work with subsets of dimensions
|
|
962
|
-
that automatically update all references when changed.
|
|
963
|
-
"""
|
|
964
|
-
|
|
965
|
-
def __init__(
|
|
966
|
-
self,
|
|
967
|
-
timesteps: pd.DatetimeIndex,
|
|
968
|
-
scenarios: Optional[pd.Index] = None,
|
|
969
|
-
hours_of_last_timestep: Optional[float] = None,
|
|
970
|
-
hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None,
|
|
971
|
-
):
|
|
972
|
-
"""Initialize a TimeSeriesCollection."""
|
|
973
|
-
self._full_timesteps = self._validate_timesteps(timesteps)
|
|
974
|
-
self._full_scenarios = self._validate_scenarios(scenarios)
|
|
975
|
-
|
|
976
|
-
self._full_timesteps_extra = self._create_timesteps_with_extra(
|
|
977
|
-
self._full_timesteps,
|
|
978
|
-
self._calculate_hours_of_final_timestep(
|
|
979
|
-
self._full_timesteps, hours_of_final_timestep=hours_of_last_timestep
|
|
980
|
-
),
|
|
981
|
-
)
|
|
982
|
-
self._full_hours_per_timestep = self.calculate_hours_per_timestep(
|
|
983
|
-
self._full_timesteps_extra, self._full_scenarios
|
|
984
|
-
)
|
|
985
|
-
|
|
986
|
-
self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps(
|
|
987
|
-
timesteps, hours_of_previous_timesteps
|
|
988
|
-
) # TODO: Make dynamic
|
|
989
|
-
|
|
990
|
-
# Series that need extra timestep
|
|
991
|
-
self._has_extra_timestep: set = set()
|
|
992
|
-
|
|
993
|
-
# Storage for TimeSeries objects
|
|
994
|
-
self._time_series: Dict[str, TimeSeries] = {}
|
|
995
|
-
|
|
996
|
-
# Active subset selectors
|
|
997
|
-
self._selected_timesteps: Optional[pd.DatetimeIndex] = None
|
|
998
|
-
self._selected_scenarios: Optional[pd.Index] = None
|
|
999
|
-
self._selected_timesteps_extra: Optional[pd.DatetimeIndex] = None
|
|
1000
|
-
self._selected_hours_per_timestep: Optional[xr.DataArray] = None
|
|
1001
|
-
|
|
1002
|
-
def add_time_series(
|
|
1003
|
-
self,
|
|
1004
|
-
name: str,
|
|
1005
|
-
data: Union[NumericDataTS, TimeSeries],
|
|
1006
|
-
has_time_dim: bool = True,
|
|
1007
|
-
has_scenario_dim: bool = True,
|
|
1008
|
-
aggregation_weight: Optional[float] = None,
|
|
1009
|
-
aggregation_group: Optional[str] = None,
|
|
1010
|
-
has_extra_timestep: bool = False,
|
|
1011
|
-
) -> TimeSeries:
|
|
1012
|
-
"""
|
|
1013
|
-
Add a new TimeSeries to the allocator.
|
|
1014
|
-
|
|
1015
|
-
Args:
|
|
1016
|
-
name: Name of the time series
|
|
1017
|
-
data: Data for the time series (can be raw data or an existing TimeSeries)
|
|
1018
|
-
has_time_dim: Whether the TimeSeries has a time dimension
|
|
1019
|
-
has_scenario_dim: Whether the TimeSeries has a scenario dimension
|
|
1020
|
-
aggregation_weight: Weight used for aggregation
|
|
1021
|
-
aggregation_group: Group name for shared aggregation weighting
|
|
1022
|
-
has_extra_timestep: Whether this series needs an extra timestep
|
|
1023
|
-
|
|
1024
|
-
Returns:
|
|
1025
|
-
The created TimeSeries object
|
|
1026
|
-
"""
|
|
1027
|
-
if name in self._time_series:
|
|
1028
|
-
raise KeyError(f"TimeSeries '{name}' already exists in allocator")
|
|
1029
|
-
if not has_time_dim and has_extra_timestep:
|
|
1030
|
-
raise ValueError('A not time-indexed TimeSeries cannot have an extra timestep')
|
|
1031
|
-
|
|
1032
|
-
# Choose which timesteps to use
|
|
1033
|
-
if has_time_dim:
|
|
1034
|
-
target_timesteps = self.timesteps_extra if has_extra_timestep else self.timesteps
|
|
1035
|
-
else:
|
|
1036
|
-
target_timesteps = None
|
|
1037
|
-
|
|
1038
|
-
target_scenarios = self.scenarios if has_scenario_dim else None
|
|
1039
|
-
|
|
1040
|
-
# Create or adapt the TimeSeries object
|
|
1041
|
-
if isinstance(data, TimeSeries):
|
|
1042
|
-
# Use the existing TimeSeries but update its parameters
|
|
1043
|
-
time_series = data
|
|
1044
|
-
# Update the stored data to use our timesteps and scenarios
|
|
1045
|
-
data_array = DataConverter.as_dataarray(
|
|
1046
|
-
time_series.stored_data, timesteps=target_timesteps, scenarios=target_scenarios
|
|
1047
|
-
)
|
|
1048
|
-
time_series = TimeSeries(
|
|
1049
|
-
data=data_array,
|
|
1050
|
-
name=name,
|
|
1051
|
-
aggregation_weight=aggregation_weight or time_series.aggregation_weight,
|
|
1052
|
-
aggregation_group=aggregation_group or time_series.aggregation_group,
|
|
1053
|
-
has_extra_timestep=has_extra_timestep or time_series.has_extra_timestep,
|
|
1054
|
-
)
|
|
1055
|
-
else:
|
|
1056
|
-
# Create a new TimeSeries from raw data
|
|
1057
|
-
time_series = TimeSeries.from_datasource(
|
|
1058
|
-
data=data,
|
|
1059
|
-
name=name,
|
|
1060
|
-
timesteps=target_timesteps,
|
|
1061
|
-
scenarios=target_scenarios,
|
|
1062
|
-
aggregation_weight=aggregation_weight,
|
|
1063
|
-
aggregation_group=aggregation_group,
|
|
1064
|
-
has_extra_timestep=has_extra_timestep,
|
|
1065
|
-
)
|
|
1066
|
-
|
|
1067
|
-
# Add to storage
|
|
1068
|
-
self._time_series[name] = time_series
|
|
1069
|
-
|
|
1070
|
-
# Track if it needs extra timestep
|
|
1071
|
-
if has_extra_timestep:
|
|
1072
|
-
self._has_extra_timestep.add(name)
|
|
480
|
+
# Multi-dimensional array
|
|
481
|
+
intermediate = cls._match_multidim_array_by_shape_permutation(data, validated_coords, target_dims)
|
|
1073
482
|
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
Set active subset for timesteps and scenarios.
|
|
1080
|
-
|
|
1081
|
-
Args:
|
|
1082
|
-
timesteps: Timesteps to activate, or None to clear
|
|
1083
|
-
scenarios: Scenarios to activate, or None to clear
|
|
1084
|
-
"""
|
|
1085
|
-
if timesteps is None:
|
|
1086
|
-
self._selected_timesteps = None
|
|
1087
|
-
self._selected_timesteps_extra = None
|
|
1088
|
-
else:
|
|
1089
|
-
self._selected_timesteps = self._validate_timesteps(timesteps, self._full_timesteps)
|
|
1090
|
-
self._selected_timesteps_extra = self._create_timesteps_with_extra(
|
|
1091
|
-
timesteps, self._calculate_hours_of_final_timestep(timesteps, self._full_timesteps)
|
|
1092
|
-
)
|
|
1093
|
-
|
|
1094
|
-
if scenarios is None:
|
|
1095
|
-
self._selected_scenarios = None
|
|
1096
|
-
else:
|
|
1097
|
-
self._selected_scenarios = self._validate_scenarios(scenarios, self._full_scenarios)
|
|
483
|
+
elif isinstance(data, pd.Series):
|
|
484
|
+
# Pandas Series - validate and match by index
|
|
485
|
+
if isinstance(data.index, pd.MultiIndex):
|
|
486
|
+
raise ConversionError('MultiIndex Series are not supported. Please use a single-level index.')
|
|
487
|
+
intermediate = cls._match_series_by_index_alignment(data, validated_coords, target_dims)
|
|
1098
488
|
|
|
1099
|
-
|
|
489
|
+
elif isinstance(data, pd.DataFrame):
|
|
490
|
+
# Pandas DataFrame - validate and convert
|
|
491
|
+
if isinstance(data.index, pd.MultiIndex):
|
|
492
|
+
raise ConversionError('MultiIndex DataFrames are not supported. Please use a single-level index.')
|
|
493
|
+
if len(data.columns) == 0 or data.empty:
|
|
494
|
+
raise ConversionError('DataFrame must have at least one column and cannot be empty.')
|
|
1100
495
|
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
496
|
+
if len(data.columns) == 1:
|
|
497
|
+
# Single-column DataFrame - treat as Series
|
|
498
|
+
series_data = data.iloc[:, 0]
|
|
499
|
+
intermediate = cls._match_series_by_index_alignment(series_data, validated_coords, target_dims)
|
|
1105
500
|
else:
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
501
|
+
# Multi-column DataFrame - treat as multi-dimensional array
|
|
502
|
+
intermediate = cls._match_multidim_array_by_shape_permutation(
|
|
503
|
+
data.to_numpy(), validated_coords, target_dims
|
|
504
|
+
)
|
|
1110
505
|
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
506
|
+
elif isinstance(data, xr.DataArray):
|
|
507
|
+
# Existing DataArray - use as-is
|
|
508
|
+
intermediate = data.copy()
|
|
1114
509
|
|
|
1115
|
-
Args:
|
|
1116
|
-
with_extra_timestep: Whether to exclude the extra timesteps.
|
|
1117
|
-
Effectively, this removes the last timestep for certain TimeSeries, but mitigates the presence of NANs in others.
|
|
1118
|
-
with_constants: Whether to exclude TimeSeries with a constant value from the dataset.
|
|
1119
|
-
"""
|
|
1120
|
-
if self.scenarios is None:
|
|
1121
|
-
ds = xr.Dataset(coords={'time': self.timesteps_extra})
|
|
1122
510
|
else:
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
return self._full_timesteps
|
|
1140
|
-
return self._selected_timesteps
|
|
1141
|
-
|
|
1142
|
-
@property
|
|
1143
|
-
def timesteps_extra(self) -> pd.DatetimeIndex:
|
|
1144
|
-
"""Get the current active timesteps with extra timestep."""
|
|
1145
|
-
if self._selected_timesteps_extra is None:
|
|
1146
|
-
return self._full_timesteps_extra
|
|
1147
|
-
return self._selected_timesteps_extra
|
|
1148
|
-
|
|
1149
|
-
@property
|
|
1150
|
-
def hours_per_timestep(self) -> xr.DataArray:
|
|
1151
|
-
"""Get the current active hours per timestep."""
|
|
1152
|
-
if self._selected_hours_per_timestep is None:
|
|
1153
|
-
return self._full_hours_per_timestep
|
|
1154
|
-
return self._selected_hours_per_timestep
|
|
1155
|
-
|
|
1156
|
-
@property
|
|
1157
|
-
def scenarios(self) -> Optional[pd.Index]:
|
|
1158
|
-
"""Get the current active scenarios."""
|
|
1159
|
-
if self._selected_scenarios is None:
|
|
1160
|
-
return self._full_scenarios
|
|
1161
|
-
return self._selected_scenarios
|
|
1162
|
-
|
|
1163
|
-
def _propagate_selection_to_time_series(self) -> None:
|
|
1164
|
-
"""Apply the current selection to all TimeSeries objects."""
|
|
1165
|
-
for ts_name, ts in self._time_series.items():
|
|
1166
|
-
if ts.has_time_dim:
|
|
1167
|
-
timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps
|
|
1168
|
-
else:
|
|
1169
|
-
timesteps = None
|
|
511
|
+
# Unsupported data type
|
|
512
|
+
supported_types = [
|
|
513
|
+
'int',
|
|
514
|
+
'float',
|
|
515
|
+
'bool',
|
|
516
|
+
'np.integer',
|
|
517
|
+
'np.floating',
|
|
518
|
+
'np.bool_',
|
|
519
|
+
'np.ndarray',
|
|
520
|
+
'pd.Series',
|
|
521
|
+
'pd.DataFrame',
|
|
522
|
+
'xr.DataArray',
|
|
523
|
+
]
|
|
524
|
+
raise ConversionError(
|
|
525
|
+
f'Unsupported data type: {type(data).__name__}. Supported types: {", ".join(supported_types)}'
|
|
526
|
+
)
|
|
1170
527
|
|
|
1171
|
-
|
|
528
|
+
# Broadcast intermediate result to target specification
|
|
529
|
+
return cls._broadcast_dataarray_to_target_specification(intermediate, validated_coords, target_dims)
|
|
1172
530
|
|
|
1173
|
-
|
|
531
|
+
@staticmethod
|
|
532
|
+
def _validate_and_prepare_target_coordinates(
|
|
533
|
+
coords: dict[str, pd.Index],
|
|
534
|
+
) -> tuple[dict[str, pd.Index], tuple[str, ...]]:
|
|
1174
535
|
"""
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
Args:
|
|
1178
|
-
name: Name of the data array or time series
|
|
536
|
+
Validate and prepare target coordinate specification for DataArray creation.
|
|
1179
537
|
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
"""
|
|
1183
|
-
# First check if this is a TimeSeries
|
|
1184
|
-
if name in self._time_series:
|
|
1185
|
-
# Return the TimeSeries object (it will handle selection internally)
|
|
1186
|
-
return self._time_series[name]
|
|
1187
|
-
raise ValueError(f'No TimeSeries named "{name}" found')
|
|
1188
|
-
|
|
1189
|
-
def __contains__(self, value) -> bool:
|
|
1190
|
-
if isinstance(value, str):
|
|
1191
|
-
return value in self._time_series
|
|
1192
|
-
elif isinstance(value, TimeSeries):
|
|
1193
|
-
return value.name in self._time_series
|
|
1194
|
-
raise TypeError(f'Invalid type for __contains__ of {self.__class__.__name__}: {type(value)}')
|
|
1195
|
-
|
|
1196
|
-
def __iter__(self) -> Iterator[TimeSeries]:
|
|
1197
|
-
"""Iterate over TimeSeries objects."""
|
|
1198
|
-
return iter(self._time_series.values())
|
|
1199
|
-
|
|
1200
|
-
def update_time_series(self, name: str, data: TimestepData) -> TimeSeries:
|
|
1201
|
-
"""
|
|
1202
|
-
Update an existing TimeSeries with new data.
|
|
538
|
+
Performs comprehensive validation of coordinate inputs and prepares them
|
|
539
|
+
for use in DataArray construction with appropriate naming and type checking.
|
|
1203
540
|
|
|
1204
541
|
Args:
|
|
1205
|
-
|
|
1206
|
-
data: New data to assign
|
|
542
|
+
coords: Raw coordinate specification {dimension_name: coordinate_index}
|
|
1207
543
|
|
|
1208
544
|
Returns:
|
|
1209
|
-
|
|
545
|
+
Tuple of (validated_coordinates_dict, dimension_names_tuple)
|
|
1210
546
|
|
|
1211
547
|
Raises:
|
|
1212
|
-
|
|
548
|
+
ConversionError: If any coordinates are invalid, improperly typed,
|
|
549
|
+
or have inconsistent naming
|
|
1213
550
|
"""
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
# Get the TimeSeries
|
|
1218
|
-
ts = self._time_series[name]
|
|
551
|
+
validated_coords = {}
|
|
552
|
+
dimension_names = []
|
|
1219
553
|
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
# Convert data to proper format
|
|
1227
|
-
data_array = DataConverter.as_dataarray(
|
|
1228
|
-
data, timesteps=target_timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None
|
|
1229
|
-
)
|
|
1230
|
-
|
|
1231
|
-
# Update the TimeSeries
|
|
1232
|
-
ts.update_stored_data(data_array)
|
|
1233
|
-
|
|
1234
|
-
return ts
|
|
1235
|
-
|
|
1236
|
-
def calculate_aggregation_weights(self) -> Dict[str, float]:
|
|
1237
|
-
"""Calculate and return aggregation weights for all time series."""
|
|
1238
|
-
group_weights = self._calculate_group_weights()
|
|
1239
|
-
|
|
1240
|
-
weights = {}
|
|
1241
|
-
for name, ts in self._time_series.items():
|
|
1242
|
-
if ts.aggregation_group is not None:
|
|
1243
|
-
# Use group weight
|
|
1244
|
-
weights[name] = group_weights.get(ts.aggregation_group, 1)
|
|
1245
|
-
else:
|
|
1246
|
-
# Use individual weight or default to 1
|
|
1247
|
-
weights[name] = ts.aggregation_weight or 1
|
|
1248
|
-
|
|
1249
|
-
if np.all(np.isclose(list(weights.values()), 1, atol=1e-6)):
|
|
1250
|
-
logger.info('All Aggregation weights were set to 1')
|
|
1251
|
-
|
|
1252
|
-
return weights
|
|
1253
|
-
|
|
1254
|
-
def _calculate_group_weights(self) -> Dict[str, float]:
|
|
1255
|
-
"""Calculate weights for aggregation groups."""
|
|
1256
|
-
# Count series in each group
|
|
1257
|
-
groups = [ts.aggregation_group for ts in self._time_series.values() if ts.aggregation_group is not None]
|
|
1258
|
-
group_counts = Counter(groups)
|
|
1259
|
-
|
|
1260
|
-
# Calculate weight for each group (1/count)
|
|
1261
|
-
return {group: 1 / count for group, count in group_counts.items()}
|
|
1262
|
-
|
|
1263
|
-
@staticmethod
|
|
1264
|
-
def _validate_timesteps(
|
|
1265
|
-
timesteps: pd.DatetimeIndex, present_timesteps: Optional[pd.DatetimeIndex] = None
|
|
1266
|
-
) -> pd.DatetimeIndex:
|
|
1267
|
-
"""
|
|
1268
|
-
Validate timesteps format and rename if needed.
|
|
1269
|
-
Args:
|
|
1270
|
-
timesteps: The timesteps to validate
|
|
1271
|
-
present_timesteps: The timesteps that are present in the dataset
|
|
1272
|
-
|
|
1273
|
-
Raises:
|
|
1274
|
-
ValueError: If timesteps is not a pandas DatetimeIndex
|
|
1275
|
-
ValueError: If timesteps is not at least 2 timestamps
|
|
1276
|
-
ValueError: If timesteps has a different name than 'time'
|
|
1277
|
-
ValueError: If timesteps is not sorted
|
|
1278
|
-
ValueError: If timesteps contains duplicates
|
|
1279
|
-
ValueError: If timesteps is not a subset of present_timesteps
|
|
1280
|
-
"""
|
|
1281
|
-
if not isinstance(timesteps, pd.DatetimeIndex):
|
|
1282
|
-
raise TypeError('timesteps must be a pandas DatetimeIndex')
|
|
1283
|
-
|
|
1284
|
-
if len(timesteps) < 2:
|
|
1285
|
-
raise ValueError('timesteps must contain at least 2 timestamps')
|
|
1286
|
-
|
|
1287
|
-
# Ensure timesteps has the required name
|
|
1288
|
-
if timesteps.name != 'time':
|
|
1289
|
-
logger.debug('Renamed timesteps to "time" (was "%s")', timesteps.name)
|
|
1290
|
-
timesteps.name = 'time'
|
|
1291
|
-
|
|
1292
|
-
# Ensure timesteps is sorted
|
|
1293
|
-
if not timesteps.is_monotonic_increasing:
|
|
1294
|
-
raise ValueError('timesteps must be sorted')
|
|
1295
|
-
|
|
1296
|
-
# Ensure timesteps has no duplicates
|
|
1297
|
-
if len(timesteps) != len(timesteps.drop_duplicates()):
|
|
1298
|
-
raise ValueError('timesteps must not contain duplicates')
|
|
1299
|
-
|
|
1300
|
-
# Ensure timesteps is a subset of present_timesteps
|
|
1301
|
-
if present_timesteps is not None and not set(timesteps).issubset(set(present_timesteps)):
|
|
1302
|
-
raise ValueError('timesteps must be a subset of present_timesteps')
|
|
1303
|
-
|
|
1304
|
-
return timesteps
|
|
1305
|
-
|
|
1306
|
-
@staticmethod
|
|
1307
|
-
def _validate_scenarios(scenarios: pd.Index, present_scenarios: Optional[pd.Index] = None) -> Optional[pd.Index]:
|
|
1308
|
-
"""
|
|
1309
|
-
Validate scenario format and rename if needed.
|
|
1310
|
-
Args:
|
|
1311
|
-
scenarios: The scenarios to validate
|
|
1312
|
-
present_scenarios: The present_scenarios that are present in the dataset
|
|
1313
|
-
|
|
1314
|
-
Raises:
|
|
1315
|
-
ValueError: If timesteps is not a pandas DatetimeIndex
|
|
1316
|
-
ValueError: If timesteps is not at least 2 timestamps
|
|
1317
|
-
ValueError: If timesteps has a different name than 'time'
|
|
1318
|
-
ValueError: If timesteps is not sorted
|
|
1319
|
-
ValueError: If timesteps contains duplicates
|
|
1320
|
-
ValueError: If timesteps is not a subset of present_timesteps
|
|
1321
|
-
"""
|
|
1322
|
-
if scenarios is None:
|
|
1323
|
-
return None
|
|
1324
|
-
|
|
1325
|
-
if not isinstance(scenarios, pd.Index):
|
|
1326
|
-
logger.warning('Converting scenarios to pandas.Index')
|
|
1327
|
-
scenarios = pd.Index(scenarios, name='scenario')
|
|
1328
|
-
|
|
1329
|
-
# Ensure timesteps has the required name
|
|
1330
|
-
if scenarios.name != 'scenario':
|
|
1331
|
-
logger.debug('Renamed scenarios to "scneario" (was "%s")', scenarios.name)
|
|
1332
|
-
scenarios.name = 'scenario'
|
|
1333
|
-
|
|
1334
|
-
# Ensure timesteps is a subset of present_timesteps
|
|
1335
|
-
if present_scenarios is not None and not set(scenarios).issubset(set(present_scenarios)):
|
|
1336
|
-
raise ValueError('scenarios must be a subset of present_scenarios')
|
|
1337
|
-
|
|
1338
|
-
return scenarios
|
|
1339
|
-
|
|
1340
|
-
@staticmethod
|
|
1341
|
-
def _create_timesteps_with_extra(timesteps: pd.DatetimeIndex, hours_of_last_timestep: float) -> pd.DatetimeIndex:
|
|
1342
|
-
"""Create timesteps with an extra step at the end."""
|
|
1343
|
-
last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time')
|
|
1344
|
-
return pd.DatetimeIndex(timesteps.append(last_date), name='time')
|
|
1345
|
-
|
|
1346
|
-
@staticmethod
|
|
1347
|
-
def _calculate_hours_of_previous_timesteps(
|
|
1348
|
-
timesteps: pd.DatetimeIndex, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]]
|
|
1349
|
-
) -> Union[float, np.ndarray]:
|
|
1350
|
-
"""Calculate duration of regular timesteps."""
|
|
1351
|
-
if hours_of_previous_timesteps is not None:
|
|
1352
|
-
return hours_of_previous_timesteps
|
|
1353
|
-
|
|
1354
|
-
# Calculate from the first interval
|
|
1355
|
-
first_interval = timesteps[1] - timesteps[0]
|
|
1356
|
-
return first_interval.total_seconds() / 3600 # Convert to hours
|
|
1357
|
-
|
|
1358
|
-
@staticmethod
|
|
1359
|
-
def _calculate_hours_of_final_timestep(
|
|
1360
|
-
timesteps: pd.DatetimeIndex,
|
|
1361
|
-
timesteps_superset: Optional[pd.DatetimeIndex] = None,
|
|
1362
|
-
hours_of_final_timestep: Optional[float] = None,
|
|
1363
|
-
) -> float:
|
|
1364
|
-
"""
|
|
1365
|
-
Calculate duration of the final timestep.
|
|
1366
|
-
If timesteps_subset is provided, the final timestep is calculated for this subset.
|
|
1367
|
-
The hours_of_final_timestep is only used if the final timestep cant be determined from the timesteps.
|
|
1368
|
-
|
|
1369
|
-
Args:
|
|
1370
|
-
timesteps: The full timesteps
|
|
1371
|
-
timesteps_subset: The subset of timesteps
|
|
1372
|
-
hours_of_final_timestep: The duration of the final timestep, if already known
|
|
1373
|
-
|
|
1374
|
-
Returns:
|
|
1375
|
-
The duration of the final timestep in hours
|
|
554
|
+
for dim_name, coord_index in coords.items():
|
|
555
|
+
# Type validation
|
|
556
|
+
if not isinstance(coord_index, pd.Index):
|
|
557
|
+
raise ConversionError(
|
|
558
|
+
f'Coordinate for dimension "{dim_name}" must be a pandas.Index, got {type(coord_index).__name__}'
|
|
559
|
+
)
|
|
1376
560
|
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
if timesteps_superset is None:
|
|
1381
|
-
if hours_of_final_timestep is not None:
|
|
1382
|
-
return hours_of_final_timestep
|
|
1383
|
-
return (timesteps[-1] - timesteps[-2]) / pd.Timedelta(hours=1)
|
|
561
|
+
# Non-empty validation
|
|
562
|
+
if len(coord_index) == 0:
|
|
563
|
+
raise ConversionError(f'Coordinate for dimension "{dim_name}" cannot be empty')
|
|
1384
564
|
|
|
1385
|
-
|
|
565
|
+
# Ensure coordinate index has consistent naming
|
|
566
|
+
if coord_index.name != dim_name:
|
|
567
|
+
coord_index = coord_index.rename(dim_name)
|
|
1386
568
|
|
|
1387
|
-
|
|
1388
|
-
if
|
|
1389
|
-
|
|
1390
|
-
|
|
569
|
+
# Special validation for time dimensions (common pattern)
|
|
570
|
+
if dim_name == 'time' and not isinstance(coord_index, pd.DatetimeIndex):
|
|
571
|
+
raise ConversionError(
|
|
572
|
+
f'Dimension named "time" should use DatetimeIndex for proper '
|
|
573
|
+
f'time-series functionality, got {type(coord_index).__name__}'
|
|
574
|
+
)
|
|
1391
575
|
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
576
|
+
validated_coords[dim_name] = coord_index
|
|
577
|
+
dimension_names.append(dim_name)
|
|
578
|
+
|
|
579
|
+
return validated_coords, tuple(dimension_names)
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def get_dataarray_stats(arr: xr.DataArray) -> dict:
|
|
583
|
+
"""Generate statistical summary of a DataArray."""
|
|
584
|
+
stats = {}
|
|
585
|
+
if arr.dtype.kind in 'biufc': # bool, int, uint, float, complex
|
|
586
|
+
try:
|
|
587
|
+
stats.update(
|
|
588
|
+
{
|
|
589
|
+
'min': float(arr.min().values),
|
|
590
|
+
'max': float(arr.max().values),
|
|
591
|
+
'mean': float(arr.mean().values),
|
|
592
|
+
'median': float(arr.median().values),
|
|
593
|
+
'std': float(arr.std().values),
|
|
594
|
+
'count': int(arr.count().values), # non-null count
|
|
595
|
+
}
|
|
1395
596
|
)
|
|
1396
|
-
else:
|
|
1397
|
-
# Get the first timestep in the superset that is after the final timestep of the subset
|
|
1398
|
-
extra_timestep = timesteps_superset[timesteps_superset > final_timestep].min()
|
|
1399
|
-
return (extra_timestep - final_timestep) / pd.Timedelta(hours=1)
|
|
1400
597
|
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
"""Calculate duration of each timestep."""
|
|
1406
|
-
# Calculate differences between consecutive timestamps
|
|
1407
|
-
hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1)
|
|
598
|
+
# Add null count only if there are nulls
|
|
599
|
+
null_count = int(arr.isnull().sum().values)
|
|
600
|
+
if null_count > 0:
|
|
601
|
+
stats['nulls'] = null_count
|
|
1408
602
|
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
timesteps=timesteps_extra[:-1],
|
|
1412
|
-
scenarios=scenarios,
|
|
1413
|
-
).rename('hours_per_step')
|
|
603
|
+
except Exception:
|
|
604
|
+
pass
|
|
1414
605
|
|
|
606
|
+
return stats
|
|
1415
607
|
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
608
|
+
|
|
609
|
+
def drop_constant_arrays(ds: xr.Dataset, dim: str = 'time', drop_arrays_without_dim: bool = True) -> xr.Dataset:
|
|
610
|
+
"""Drop variables with constant values along a dimension.
|
|
1419
611
|
|
|
1420
612
|
Args:
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
by_scenario: Whether to break down stats by scenario
|
|
613
|
+
ds: Input dataset to filter.
|
|
614
|
+
dim: Dimension along which to check for constant values.
|
|
615
|
+
drop_arrays_without_dim: If True, also drop variables that don't have the specified dimension.
|
|
1425
616
|
|
|
1426
617
|
Returns:
|
|
1427
|
-
|
|
618
|
+
Dataset with constant variables removed.
|
|
1428
619
|
"""
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
)
|
|
1448
|
-
return '\n'.join(['By scenario:'] + results)
|
|
1449
|
-
|
|
1450
|
-
# Standard logic for non-scenario data or aggregated stats
|
|
1451
|
-
if np.unique(data).size == 1:
|
|
1452
|
-
return f'{data.max().item():{format_spec}} (constant)'
|
|
1453
|
-
|
|
1454
|
-
mean = data.mean().item()
|
|
1455
|
-
median = data.median().item()
|
|
1456
|
-
min_val = data.min().item()
|
|
1457
|
-
max_val = data.max().item()
|
|
1458
|
-
std = data.std().item()
|
|
1459
|
-
|
|
1460
|
-
return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
|
|
1461
|
-
|
|
620
|
+
drop_vars = []
|
|
621
|
+
|
|
622
|
+
for name, da in ds.data_vars.items():
|
|
623
|
+
# Skip variables without the dimension
|
|
624
|
+
if dim not in da.dims:
|
|
625
|
+
if drop_arrays_without_dim:
|
|
626
|
+
drop_vars.append(name)
|
|
627
|
+
continue
|
|
628
|
+
|
|
629
|
+
# Check if variable is constant along the dimension
|
|
630
|
+
if (da.max(dim, skipna=True) == da.min(dim, skipna=True)).all().item():
|
|
631
|
+
drop_vars.append(name)
|
|
632
|
+
|
|
633
|
+
if drop_vars:
|
|
634
|
+
drop_vars = sorted(drop_vars)
|
|
635
|
+
logger.debug(
|
|
636
|
+
f'Dropping {len(drop_vars)} constant/dimension-less arrays: {drop_vars[:5]}{"..." if len(drop_vars) > 5 else ""}'
|
|
637
|
+
)
|
|
1462
638
|
|
|
1463
|
-
|
|
1464
|
-
data: Optional[Union[int, float, xr.DataArray, TimeSeries]],
|
|
1465
|
-
if_none: Any = None
|
|
1466
|
-
) -> Any:
|
|
1467
|
-
"""
|
|
1468
|
-
Convert data to xr.DataArray.
|
|
639
|
+
return ds.drop_vars(drop_vars)
|
|
1469
640
|
|
|
1470
|
-
Args:
|
|
1471
|
-
data: The data to convert (scalar, array, or DataArray)
|
|
1472
|
-
if_none: The value to return if data is None
|
|
1473
641
|
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
return if_none
|
|
1479
|
-
if isinstance(data, TimeSeries):
|
|
1480
|
-
return data.selected_data
|
|
1481
|
-
if isinstance(data, xr.DataArray):
|
|
1482
|
-
return data
|
|
1483
|
-
if isinstance(data, (int, float, np.integer, np.floating)):
|
|
1484
|
-
return data
|
|
1485
|
-
raise TypeError(f'Unsupported data type: {type(data).__name__}')
|
|
642
|
+
# Backward compatibility aliases
|
|
643
|
+
# TODO: Needed?
|
|
644
|
+
NonTemporalDataUser = PeriodicDataUser
|
|
645
|
+
NonTemporalData = PeriodicData
|