flixopt 2.1.0__py3-none-any.whl → 2.2.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flixopt might be problematic. Click here for more details.
- docs/release-notes/v2.2.0.md +55 -0
- docs/user-guide/Mathematical Notation/Investment.md +115 -0
- flixopt/calculation.py +65 -37
- flixopt/components.py +119 -74
- flixopt/core.py +966 -451
- flixopt/effects.py +269 -65
- flixopt/elements.py +83 -52
- flixopt/features.py +134 -85
- flixopt/flow_system.py +99 -16
- flixopt/interface.py +142 -51
- flixopt/io.py +56 -27
- flixopt/linear_converters.py +3 -3
- flixopt/plotting.py +34 -16
- flixopt/results.py +807 -109
- flixopt/structure.py +64 -10
- flixopt/utils.py +6 -9
- {flixopt-2.1.0.dist-info → flixopt-2.2.0b0.dist-info}/METADATA +1 -1
- {flixopt-2.1.0.dist-info → flixopt-2.2.0b0.dist-info}/RECORD +21 -20
- {flixopt-2.1.0.dist-info → flixopt-2.2.0b0.dist-info}/WHEEL +1 -1
- {flixopt-2.1.0.dist-info → flixopt-2.2.0b0.dist-info}/top_level.txt +0 -1
- site/release-notes/_template.txt +0 -32
- {flixopt-2.1.0.dist-info → flixopt-2.2.0b0.dist-info}/licenses/LICENSE +0 -0
flixopt/core.py
CHANGED
|
@@ -7,6 +7,7 @@ import inspect
|
|
|
7
7
|
import json
|
|
8
8
|
import logging
|
|
9
9
|
import pathlib
|
|
10
|
+
import textwrap
|
|
10
11
|
from collections import Counter
|
|
11
12
|
from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
|
|
12
13
|
|
|
@@ -25,6 +26,12 @@ NumericData = Union[int, float, np.integer, np.floating, np.ndarray, pd.Series,
|
|
|
25
26
|
NumericDataTS = Union[NumericData, 'TimeSeriesData']
|
|
26
27
|
"""Represents either standard numeric data or TimeSeriesData."""
|
|
27
28
|
|
|
29
|
+
TimestepData = NumericData
|
|
30
|
+
"""Represents any form of numeric data that corresponds to timesteps."""
|
|
31
|
+
|
|
32
|
+
ScenarioData = NumericData
|
|
33
|
+
"""Represents any form of numeric data that corresponds to scenarios."""
|
|
34
|
+
|
|
28
35
|
|
|
29
36
|
class PlausibilityError(Exception):
|
|
30
37
|
"""Error for a failing Plausibility check."""
|
|
@@ -40,61 +47,446 @@ class ConversionError(Exception):
|
|
|
40
47
|
|
|
41
48
|
class DataConverter:
|
|
42
49
|
"""
|
|
43
|
-
Converts various data types into xarray.DataArray with
|
|
50
|
+
Converts various data types into xarray.DataArray with optional time and scenario dimension.
|
|
44
51
|
|
|
45
|
-
|
|
52
|
+
Current implementation handles:
|
|
53
|
+
- Scalar values
|
|
54
|
+
- NumPy arrays
|
|
55
|
+
- xarray.DataArray
|
|
46
56
|
"""
|
|
47
57
|
|
|
48
58
|
@staticmethod
|
|
49
|
-
def as_dataarray(
|
|
50
|
-
|
|
59
|
+
def as_dataarray(
|
|
60
|
+
data: TimestepData, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None
|
|
61
|
+
) -> xr.DataArray:
|
|
62
|
+
"""
|
|
63
|
+
Convert data to xarray.DataArray with specified dimensions.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
data: The data to convert (scalar, array, or DataArray)
|
|
67
|
+
timesteps: Optional DatetimeIndex for time dimension
|
|
68
|
+
scenarios: Optional Index for scenario dimension
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
DataArray with the converted data
|
|
72
|
+
"""
|
|
73
|
+
# Prepare dimensions and coordinates
|
|
74
|
+
coords, dims = DataConverter._prepare_dimensions(timesteps, scenarios)
|
|
75
|
+
|
|
76
|
+
# Select appropriate converter based on data type
|
|
77
|
+
if isinstance(data, (int, float, np.integer, np.floating)):
|
|
78
|
+
return DataConverter._convert_scalar(data, coords, dims)
|
|
79
|
+
|
|
80
|
+
elif isinstance(data, xr.DataArray):
|
|
81
|
+
return DataConverter._convert_dataarray(data, coords, dims)
|
|
82
|
+
|
|
83
|
+
elif isinstance(data, np.ndarray):
|
|
84
|
+
return DataConverter._convert_ndarray(data, coords, dims)
|
|
85
|
+
|
|
86
|
+
elif isinstance(data, pd.Series):
|
|
87
|
+
return DataConverter._convert_series(data, coords, dims)
|
|
88
|
+
|
|
89
|
+
elif isinstance(data, pd.DataFrame):
|
|
90
|
+
return DataConverter._convert_dataframe(data, coords, dims)
|
|
91
|
+
|
|
92
|
+
else:
|
|
93
|
+
raise ConversionError(f'Unsupported data type: {type(data).__name__}')
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def _validate_timesteps(timesteps: pd.DatetimeIndex) -> pd.DatetimeIndex:
|
|
97
|
+
"""
|
|
98
|
+
Validate and prepare time index.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
timesteps: The time index to validate
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Validated time index
|
|
105
|
+
"""
|
|
51
106
|
if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0:
|
|
52
|
-
raise
|
|
107
|
+
raise ConversionError('Timesteps must be a non-empty DatetimeIndex')
|
|
108
|
+
|
|
53
109
|
if not timesteps.name == 'time':
|
|
54
|
-
raise ConversionError(f'
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
110
|
+
raise ConversionError(f'Scenarios must be named "time", got "{timesteps.name}"')
|
|
111
|
+
|
|
112
|
+
return timesteps
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def _validate_scenarios(scenarios: pd.Index) -> pd.Index:
|
|
116
|
+
"""
|
|
117
|
+
Validate and prepare scenario index.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
scenarios: The scenario index to validate
|
|
121
|
+
"""
|
|
122
|
+
if not isinstance(scenarios, pd.Index) or len(scenarios) == 0:
|
|
123
|
+
raise ConversionError('Scenarios must be a non-empty Index')
|
|
124
|
+
|
|
125
|
+
if not scenarios.name == 'scenario':
|
|
126
|
+
raise ConversionError(f'Scenarios must be named "scenario", got "{scenarios.name}"')
|
|
127
|
+
|
|
128
|
+
return scenarios
|
|
129
|
+
|
|
130
|
+
@staticmethod
|
|
131
|
+
def _prepare_dimensions(
|
|
132
|
+
timesteps: Optional[pd.DatetimeIndex], scenarios: Optional[pd.Index]
|
|
133
|
+
) -> Tuple[Dict[str, pd.Index], Tuple[str, ...]]:
|
|
134
|
+
"""
|
|
135
|
+
Prepare coordinates and dimensions for the DataArray.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
timesteps: Optional time index
|
|
139
|
+
scenarios: Optional scenario index
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Tuple of (coordinates dict, dimensions tuple)
|
|
143
|
+
"""
|
|
144
|
+
# Validate inputs if provided
|
|
145
|
+
if timesteps is not None:
|
|
146
|
+
timesteps = DataConverter._validate_timesteps(timesteps)
|
|
147
|
+
|
|
148
|
+
if scenarios is not None:
|
|
149
|
+
scenarios = DataConverter._validate_scenarios(scenarios)
|
|
150
|
+
|
|
151
|
+
# Build coordinates and dimensions
|
|
152
|
+
coords = {}
|
|
153
|
+
dims = []
|
|
154
|
+
|
|
155
|
+
if timesteps is not None:
|
|
156
|
+
coords['time'] = timesteps
|
|
157
|
+
dims.append('time')
|
|
158
|
+
|
|
159
|
+
if scenarios is not None:
|
|
160
|
+
coords['scenario'] = scenarios
|
|
161
|
+
dims.append('scenario')
|
|
162
|
+
|
|
163
|
+
return coords, tuple(dims)
|
|
164
|
+
|
|
165
|
+
@staticmethod
|
|
166
|
+
def _convert_scalar(
|
|
167
|
+
data: Union[int, float, np.integer, np.floating], coords: Dict[str, pd.Index], dims: Tuple[str, ...]
|
|
168
|
+
) -> xr.DataArray:
|
|
169
|
+
"""
|
|
170
|
+
Convert a scalar value to a DataArray.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
data: The scalar value
|
|
174
|
+
coords: Coordinate dictionary
|
|
175
|
+
dims: Dimension names
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
DataArray with the scalar value
|
|
179
|
+
"""
|
|
180
|
+
if isinstance(data, (np.integer, np.floating)):
|
|
181
|
+
data = data.item()
|
|
182
|
+
return xr.DataArray(data, coords=coords, dims=dims)
|
|
183
|
+
|
|
184
|
+
@staticmethod
|
|
185
|
+
def _convert_dataarray(data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
|
|
186
|
+
"""
|
|
187
|
+
Convert an existing DataArray to desired dimensions.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
data: The source DataArray
|
|
191
|
+
coords: Target coordinates
|
|
192
|
+
dims: Target dimensions
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
DataArray with the target dimensions
|
|
196
|
+
"""
|
|
197
|
+
# No dimensions case
|
|
198
|
+
if len(dims) == 0:
|
|
199
|
+
if data.size != 1:
|
|
200
|
+
raise ConversionError('When converting to dimensionless DataArray, source must be scalar')
|
|
201
|
+
return xr.DataArray(data.values.item())
|
|
202
|
+
|
|
203
|
+
# Check if data already has matching dimensions and coordinates
|
|
204
|
+
if set(data.dims) == set(dims):
|
|
205
|
+
# Check if coordinates match
|
|
206
|
+
is_compatible = True
|
|
207
|
+
for dim in dims:
|
|
208
|
+
if dim in data.dims and not np.array_equal(data.coords[dim].values, coords[dim].values):
|
|
209
|
+
is_compatible = False
|
|
210
|
+
break
|
|
211
|
+
|
|
212
|
+
if is_compatible:
|
|
213
|
+
# Ensure dimensions are in the correct order
|
|
214
|
+
if data.dims != dims:
|
|
215
|
+
# Transpose to get dimensions in the right order
|
|
216
|
+
return data.transpose(*dims).copy(deep=True)
|
|
217
|
+
else:
|
|
218
|
+
# Return existing DataArray if compatible and order is correct
|
|
219
|
+
return data.copy(deep=True)
|
|
220
|
+
|
|
221
|
+
# Handle dimension broadcasting
|
|
222
|
+
if len(data.dims) == 1 and len(dims) == 2:
|
|
223
|
+
# Single dimension to two dimensions
|
|
224
|
+
if data.dims[0] == 'time' and 'scenario' in dims:
|
|
225
|
+
# Broadcast time dimension to include scenarios
|
|
226
|
+
return DataConverter._broadcast_time_to_scenarios(data, coords, dims)
|
|
227
|
+
|
|
228
|
+
elif data.dims[0] == 'scenario' and 'time' in dims:
|
|
229
|
+
# Broadcast scenario dimension to include time
|
|
230
|
+
return DataConverter._broadcast_scenario_to_time(data, coords, dims)
|
|
231
|
+
|
|
232
|
+
raise ConversionError(
|
|
233
|
+
f'Cannot convert {data.dims} to {dims}. Source coordinates: {data.coords}, Target coordinates: {coords}'
|
|
234
|
+
)
|
|
235
|
+
@staticmethod
|
|
236
|
+
def _broadcast_time_to_scenarios(
|
|
237
|
+
data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
|
|
238
|
+
) -> xr.DataArray:
|
|
239
|
+
"""
|
|
240
|
+
Broadcast a time-only DataArray to include scenarios.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
data: The time-indexed DataArray
|
|
244
|
+
coords: Target coordinates
|
|
245
|
+
dims: Target dimensions
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
DataArray with time and scenario dimensions
|
|
249
|
+
"""
|
|
250
|
+
# Check compatibility
|
|
251
|
+
if not np.array_equal(data.coords['time'].values, coords['time'].values):
|
|
252
|
+
raise ConversionError("Source time coordinates don't match target time coordinates")
|
|
253
|
+
|
|
254
|
+
if len(coords['scenario']) <= 1:
|
|
255
|
+
return data.copy(deep=True)
|
|
256
|
+
|
|
257
|
+
# Broadcast values
|
|
258
|
+
values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
|
|
259
|
+
return xr.DataArray(values.copy(), coords=coords, dims=dims)
|
|
260
|
+
|
|
261
|
+
@staticmethod
|
|
262
|
+
def _broadcast_scenario_to_time(
|
|
263
|
+
data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
|
|
264
|
+
) -> xr.DataArray:
|
|
265
|
+
"""
|
|
266
|
+
Broadcast a scenario-only DataArray to include time.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
data: The scenario-indexed DataArray
|
|
270
|
+
coords: Target coordinates
|
|
271
|
+
dims: Target dimensions
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
DataArray with time and scenario dimensions
|
|
275
|
+
"""
|
|
276
|
+
# Check compatibility
|
|
277
|
+
if not np.array_equal(data.coords['scenario'].values, coords['scenario'].values):
|
|
278
|
+
raise ConversionError("Source scenario coordinates don't match target scenario coordinates")
|
|
279
|
+
|
|
280
|
+
# Broadcast values
|
|
281
|
+
values = np.repeat(data.values[:, np.newaxis], len(coords['time']), axis=1).T
|
|
282
|
+
return xr.DataArray(values.copy(), coords=coords, dims=dims)
|
|
283
|
+
|
|
284
|
+
@staticmethod
|
|
285
|
+
def _convert_ndarray(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
|
|
286
|
+
"""
|
|
287
|
+
Convert a NumPy array to a DataArray.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
data: The NumPy array
|
|
291
|
+
coords: Target coordinates
|
|
292
|
+
dims: Target dimensions
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
DataArray from the NumPy array
|
|
296
|
+
"""
|
|
297
|
+
# Handle dimensionless case
|
|
298
|
+
if len(dims) == 0:
|
|
299
|
+
if data.size != 1:
|
|
300
|
+
raise ConversionError('Without dimensions, can only convert scalar arrays')
|
|
301
|
+
return xr.DataArray(data.item())
|
|
302
|
+
|
|
303
|
+
# Handle single dimension
|
|
304
|
+
elif len(dims) == 1:
|
|
305
|
+
return DataConverter._convert_ndarray_single_dim(data, coords, dims)
|
|
306
|
+
|
|
307
|
+
# Handle two dimensions
|
|
308
|
+
elif len(dims) == 2:
|
|
309
|
+
return DataConverter._convert_ndarray_two_dims(data, coords, dims)
|
|
310
|
+
|
|
311
|
+
else:
|
|
312
|
+
raise ConversionError('Maximum 2 dimensions supported')
|
|
313
|
+
|
|
314
|
+
@staticmethod
|
|
315
|
+
def _convert_ndarray_single_dim(
|
|
316
|
+
data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
|
|
317
|
+
) -> xr.DataArray:
|
|
318
|
+
"""
|
|
319
|
+
Convert a NumPy array to a single-dimension DataArray.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
data: The NumPy array
|
|
323
|
+
coords: Target coordinates
|
|
324
|
+
dims: Target dimensions (length 1)
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
DataArray with single dimension
|
|
328
|
+
"""
|
|
329
|
+
dim_name = dims[0]
|
|
330
|
+
dim_length = len(coords[dim_name])
|
|
331
|
+
|
|
332
|
+
if data.ndim == 1:
|
|
333
|
+
# 1D array must match dimension length
|
|
334
|
+
if data.shape[0] != dim_length:
|
|
335
|
+
raise ConversionError(f"Array length {data.shape[0]} doesn't match {dim_name} length {dim_length}")
|
|
336
|
+
return xr.DataArray(data, coords=coords, dims=dims)
|
|
337
|
+
else:
|
|
338
|
+
raise ConversionError(f'Expected 1D array for single dimension, got {data.ndim}D')
|
|
339
|
+
|
|
340
|
+
@staticmethod
|
|
341
|
+
def _convert_ndarray_two_dims(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
|
|
342
|
+
"""
|
|
343
|
+
Convert a NumPy array to a two-dimension DataArray.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
data: The NumPy array
|
|
347
|
+
coords: Target coordinates
|
|
348
|
+
dims: Target dimensions (length 2)
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
DataArray with two dimensions
|
|
352
|
+
"""
|
|
353
|
+
scenario_length = len(coords['scenario'])
|
|
354
|
+
time_length = len(coords['time'])
|
|
355
|
+
|
|
356
|
+
if data.ndim == 1:
|
|
357
|
+
# For 1D array, create 2D array based on which dimension it matches
|
|
358
|
+
if data.shape[0] == time_length:
|
|
359
|
+
# Broadcast across scenarios
|
|
360
|
+
values = np.repeat(data[:, np.newaxis], scenario_length, axis=1)
|
|
361
|
+
return xr.DataArray(values, coords=coords, dims=dims)
|
|
362
|
+
elif data.shape[0] == scenario_length:
|
|
363
|
+
# Broadcast across time
|
|
364
|
+
values = np.repeat(data[np.newaxis, :], time_length, axis=0)
|
|
365
|
+
return xr.DataArray(values, coords=coords, dims=dims)
|
|
366
|
+
else:
|
|
367
|
+
raise ConversionError(f"1D array length {data.shape[0]} doesn't match either dimension")
|
|
368
|
+
|
|
369
|
+
elif data.ndim == 2:
|
|
370
|
+
# For 2D array, shape must match dimensions
|
|
371
|
+
expected_shape = (time_length, scenario_length)
|
|
372
|
+
if data.shape != expected_shape:
|
|
373
|
+
raise ConversionError(f"2D array shape {data.shape} doesn't match expected shape {expected_shape}")
|
|
374
|
+
return xr.DataArray(data, coords=coords, dims=dims)
|
|
375
|
+
|
|
376
|
+
else:
|
|
377
|
+
raise ConversionError(f'Expected 1D or 2D array for two dimensions, got {data.ndim}D')
|
|
378
|
+
|
|
379
|
+
@staticmethod
|
|
380
|
+
def _convert_series(data: pd.Series, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
|
|
381
|
+
"""
|
|
382
|
+
Convert pandas Series to xarray DataArray.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
data: pandas Series to convert
|
|
386
|
+
coords: Target coordinates
|
|
387
|
+
dims: Target dimensions
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
DataArray from the pandas Series
|
|
391
|
+
"""
|
|
392
|
+
# Handle single dimension case
|
|
393
|
+
if len(dims) == 1:
|
|
394
|
+
dim_name = dims[0]
|
|
395
|
+
|
|
396
|
+
# Check if series index matches the dimension
|
|
397
|
+
if data.index.equals(coords[dim_name]):
|
|
398
|
+
return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
|
|
87
399
|
else:
|
|
88
|
-
raise ConversionError(
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
400
|
+
raise ConversionError(
|
|
401
|
+
f"Series index doesn't match {dim_name} coordinates.\n"
|
|
402
|
+
f'Series index: {data.index}\n'
|
|
403
|
+
f'Target {dim_name} coordinates: {coords[dim_name]}'
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Handle two dimensions case
|
|
407
|
+
elif len(dims) == 2:
|
|
408
|
+
# Check if dimensions are time and scenario
|
|
409
|
+
if dims != ('time', 'scenario'):
|
|
410
|
+
raise ConversionError(
|
|
411
|
+
f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}'
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
# Case 1: Series is indexed by time
|
|
415
|
+
if data.index.equals(coords['time']):
|
|
416
|
+
# Broadcast across scenarios
|
|
417
|
+
values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
|
|
418
|
+
return xr.DataArray(values.copy(), coords=coords, dims=dims)
|
|
419
|
+
|
|
420
|
+
# Case 2: Series is indexed by scenario
|
|
421
|
+
elif data.index.equals(coords['scenario']):
|
|
422
|
+
# Broadcast across time
|
|
423
|
+
values = np.repeat(data.values[np.newaxis, :], len(coords['time']), axis=0)
|
|
424
|
+
return xr.DataArray(values.copy(), coords=coords, dims=dims)
|
|
425
|
+
|
|
426
|
+
else:
|
|
427
|
+
raise ConversionError(
|
|
428
|
+
"Series index must match either 'time' or 'scenario' coordinates.\n"
|
|
429
|
+
f'Series index: {data.index}\n'
|
|
430
|
+
f'Target time coordinates: {coords["time"]}\n'
|
|
431
|
+
f'Target scenario coordinates: {coords["scenario"]}'
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
else:
|
|
435
|
+
raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}')
|
|
436
|
+
|
|
437
|
+
@staticmethod
|
|
438
|
+
def _convert_dataframe(data: pd.DataFrame, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
|
|
439
|
+
"""
|
|
440
|
+
Convert pandas DataFrame to xarray DataArray.
|
|
441
|
+
Only allows time as index and scenarios as columns.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
data: pandas DataFrame to convert
|
|
445
|
+
coords: Target coordinates
|
|
446
|
+
dims: Target dimensions
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
DataArray from the pandas DataFrame
|
|
450
|
+
"""
|
|
451
|
+
# Single dimension case
|
|
452
|
+
if len(dims) == 1:
|
|
453
|
+
# If DataFrame has one column, treat it like a Series
|
|
454
|
+
if len(data.columns) == 1:
|
|
455
|
+
series = data.iloc[:, 0]
|
|
456
|
+
return DataConverter._convert_series(series, coords, dims)
|
|
457
|
+
|
|
458
|
+
raise ConversionError(
|
|
459
|
+
f'When converting DataFrame to single-dimension DataArray, DataFrame must have exactly one column, got {len(data.columns)}'
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
# Two dimensions case
|
|
463
|
+
elif len(dims) == 2:
|
|
464
|
+
# Check if dimensions are time and scenario
|
|
465
|
+
if dims != ('time', 'scenario'):
|
|
466
|
+
raise ConversionError(
|
|
467
|
+
f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}'
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# DataFrame must have time as index and scenarios as columns
|
|
471
|
+
if data.index.equals(coords['time']) and data.columns.equals(coords['scenario']):
|
|
472
|
+
# Create DataArray with proper dimension order
|
|
473
|
+
return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
|
|
474
|
+
else:
|
|
475
|
+
raise ConversionError(
|
|
476
|
+
'DataFrame must have time as index and scenarios as columns.\n'
|
|
477
|
+
f'DataFrame index: {data.index}\n'
|
|
478
|
+
f'DataFrame columns: {data.columns}\n'
|
|
479
|
+
f'Target time coordinates: {coords["time"]}\n'
|
|
480
|
+
f'Target scenario coordinates: {coords["scenario"]}'
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
else:
|
|
484
|
+
raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}')
|
|
93
485
|
|
|
94
486
|
|
|
95
487
|
class TimeSeriesData:
|
|
96
488
|
# TODO: Move to Interface.py
|
|
97
|
-
def __init__(self, data:
|
|
489
|
+
def __init__(self, data: TimestepData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None):
|
|
98
490
|
"""
|
|
99
491
|
timeseries class for transmit timeseries AND special characteristics of timeseries,
|
|
100
492
|
i.g. to define weights needed in calculation_type 'aggregated'
|
|
@@ -146,18 +538,19 @@ class TimeSeries:
|
|
|
146
538
|
name (str): The name of the time series
|
|
147
539
|
aggregation_weight (Optional[float]): Weight used for aggregation
|
|
148
540
|
aggregation_group (Optional[str]): Group name for shared aggregation weighting
|
|
149
|
-
|
|
541
|
+
has_extra_timestep (bool): Whether this series needs an extra timestep
|
|
150
542
|
"""
|
|
151
543
|
|
|
152
544
|
@classmethod
|
|
153
545
|
def from_datasource(
|
|
154
546
|
cls,
|
|
155
|
-
data:
|
|
547
|
+
data: NumericDataTS,
|
|
156
548
|
name: str,
|
|
157
549
|
timesteps: pd.DatetimeIndex,
|
|
550
|
+
scenarios: Optional[pd.Index] = None,
|
|
158
551
|
aggregation_weight: Optional[float] = None,
|
|
159
552
|
aggregation_group: Optional[str] = None,
|
|
160
|
-
|
|
553
|
+
has_extra_timestep: bool = False,
|
|
161
554
|
) -> 'TimeSeries':
|
|
162
555
|
"""
|
|
163
556
|
Initialize the TimeSeries from multiple data sources.
|
|
@@ -166,19 +559,20 @@ class TimeSeries:
|
|
|
166
559
|
data: The time series data
|
|
167
560
|
name: The name of the TimeSeries
|
|
168
561
|
timesteps: The timesteps of the TimeSeries
|
|
562
|
+
scenarios: The scenarios of the TimeSeries
|
|
169
563
|
aggregation_weight: The weight in aggregation calculations
|
|
170
564
|
aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing
|
|
171
|
-
|
|
565
|
+
has_extra_timestep: Whether this series requires an extra timestep
|
|
172
566
|
|
|
173
567
|
Returns:
|
|
174
568
|
A new TimeSeries instance
|
|
175
569
|
"""
|
|
176
570
|
return cls(
|
|
177
|
-
DataConverter.as_dataarray(data, timesteps),
|
|
571
|
+
DataConverter.as_dataarray(data, timesteps, scenarios),
|
|
178
572
|
name,
|
|
179
573
|
aggregation_weight,
|
|
180
574
|
aggregation_group,
|
|
181
|
-
|
|
575
|
+
has_extra_timestep,
|
|
182
576
|
)
|
|
183
577
|
|
|
184
578
|
@classmethod
|
|
@@ -212,7 +606,7 @@ class TimeSeries:
|
|
|
212
606
|
name=data['name'],
|
|
213
607
|
aggregation_weight=data['aggregation_weight'],
|
|
214
608
|
aggregation_group=data['aggregation_group'],
|
|
215
|
-
|
|
609
|
+
has_extra_timestep=data['has_extra_timestep'],
|
|
216
610
|
)
|
|
217
611
|
|
|
218
612
|
def __init__(
|
|
@@ -221,7 +615,7 @@ class TimeSeries:
|
|
|
221
615
|
name: str,
|
|
222
616
|
aggregation_weight: Optional[float] = None,
|
|
223
617
|
aggregation_group: Optional[str] = None,
|
|
224
|
-
|
|
618
|
+
has_extra_timestep: bool = False,
|
|
225
619
|
):
|
|
226
620
|
"""
|
|
227
621
|
Initialize a TimeSeries with a DataArray.
|
|
@@ -231,35 +625,40 @@ class TimeSeries:
|
|
|
231
625
|
name: The name of the TimeSeries
|
|
232
626
|
aggregation_weight: The weight in aggregation calculations
|
|
233
627
|
aggregation_group: Group this TimeSeries belongs to for weight sharing
|
|
234
|
-
|
|
628
|
+
has_extra_timestep: Whether this series requires an extra timestep
|
|
235
629
|
|
|
236
630
|
Raises:
|
|
237
|
-
ValueError: If data
|
|
631
|
+
ValueError: If data has unsupported dimensions
|
|
238
632
|
"""
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
raise ValueError(f'Number of dimensions of DataArray must be 1. Got {data.ndim}')
|
|
633
|
+
allowed_dims = {'time', 'scenario'}
|
|
634
|
+
if not set(data.dims).issubset(allowed_dims):
|
|
635
|
+
raise ValueError(f'DataArray dimensions must be subset of {allowed_dims}. Got {data.dims}')
|
|
243
636
|
|
|
244
637
|
self.name = name
|
|
245
638
|
self.aggregation_weight = aggregation_weight
|
|
246
639
|
self.aggregation_group = aggregation_group
|
|
247
|
-
self.
|
|
640
|
+
self.has_extra_timestep = has_extra_timestep
|
|
248
641
|
|
|
249
642
|
# Data management
|
|
250
643
|
self._stored_data = data.copy(deep=True)
|
|
251
644
|
self._backup = self._stored_data.copy(deep=True)
|
|
252
|
-
self._active_timesteps = self._stored_data.indexes['time']
|
|
253
|
-
self._active_data = None
|
|
254
|
-
self._update_active_data()
|
|
255
645
|
|
|
256
|
-
|
|
646
|
+
# Selection state
|
|
647
|
+
self._selected_timesteps: Optional[pd.DatetimeIndex] = None
|
|
648
|
+
self._selected_scenarios: Optional[pd.Index] = None
|
|
649
|
+
|
|
650
|
+
# Flag for whether this series has various dimensions
|
|
651
|
+
self.has_time_dim = 'time' in data.dims
|
|
652
|
+
self.has_scenario_dim = 'scenario' in data.dims
|
|
653
|
+
|
|
654
|
+
def reset(self) -> None:
|
|
257
655
|
"""
|
|
258
|
-
Reset
|
|
656
|
+
Reset selections to include all timesteps and scenarios.
|
|
657
|
+
This is equivalent to clearing all selections.
|
|
259
658
|
"""
|
|
260
|
-
self.
|
|
659
|
+
self.set_selection(None, None)
|
|
261
660
|
|
|
262
|
-
def restore_data(self):
|
|
661
|
+
def restore_data(self) -> None:
|
|
263
662
|
"""
|
|
264
663
|
Restore stored_data from the backup and reset active timesteps.
|
|
265
664
|
"""
|
|
@@ -280,8 +679,8 @@ class TimeSeries:
|
|
|
280
679
|
'name': self.name,
|
|
281
680
|
'aggregation_weight': self.aggregation_weight,
|
|
282
681
|
'aggregation_group': self.aggregation_group,
|
|
283
|
-
'
|
|
284
|
-
'data': self.
|
|
682
|
+
'has_extra_timestep': self.has_extra_timestep,
|
|
683
|
+
'data': self.selected_data.to_dict(),
|
|
285
684
|
}
|
|
286
685
|
|
|
287
686
|
# Convert datetime objects to ISO strings
|
|
@@ -289,7 +688,7 @@ class TimeSeries:
|
|
|
289
688
|
|
|
290
689
|
# Save to file if path is provided
|
|
291
690
|
if path is not None:
|
|
292
|
-
indent = 4 if len(self.
|
|
691
|
+
indent = 4 if len(self.selected_timesteps) <= 480 else None
|
|
293
692
|
with open(path, 'w', encoding='utf-8') as f:
|
|
294
693
|
json.dump(data, f, indent=indent, ensure_ascii=False)
|
|
295
694
|
|
|
@@ -303,84 +702,116 @@ class TimeSeries:
|
|
|
303
702
|
Returns:
|
|
304
703
|
String representation of data statistics
|
|
305
704
|
"""
|
|
306
|
-
return get_numeric_stats(self.
|
|
307
|
-
|
|
308
|
-
def _update_active_data(self):
|
|
309
|
-
"""
|
|
310
|
-
Update the active data based on active_timesteps.
|
|
311
|
-
"""
|
|
312
|
-
self._active_data = self._stored_data.sel(time=self.active_timesteps)
|
|
705
|
+
return get_numeric_stats(self.selected_data, padd=0, by_scenario=True)
|
|
313
706
|
|
|
314
707
|
@property
|
|
315
708
|
def all_equal(self) -> bool:
|
|
316
709
|
"""Check if all values in the series are equal."""
|
|
317
|
-
return np.unique(self.
|
|
710
|
+
return np.unique(self.selected_data.values).size == 1
|
|
318
711
|
|
|
319
712
|
@property
|
|
320
|
-
def
|
|
321
|
-
"""Get the current active timesteps."""
|
|
322
|
-
return self._active_timesteps
|
|
323
|
-
|
|
324
|
-
@active_timesteps.setter
|
|
325
|
-
def active_timesteps(self, timesteps: Optional[pd.DatetimeIndex]):
|
|
713
|
+
def selected_data(self) -> xr.DataArray:
|
|
326
714
|
"""
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
Args:
|
|
330
|
-
timesteps: New timesteps to activate, or None to use all stored timesteps
|
|
331
|
-
|
|
332
|
-
Raises:
|
|
333
|
-
TypeError: If timesteps is not a pandas DatetimeIndex or None
|
|
715
|
+
Get a view of stored_data based on current selections.
|
|
716
|
+
This computes the view dynamically based on the current selection state.
|
|
334
717
|
"""
|
|
335
|
-
|
|
336
|
-
self._active_timesteps = self.stored_data.indexes['time']
|
|
337
|
-
elif isinstance(timesteps, pd.DatetimeIndex):
|
|
338
|
-
self._active_timesteps = timesteps
|
|
339
|
-
else:
|
|
340
|
-
raise TypeError('active_timesteps must be a pandas DatetimeIndex or None')
|
|
718
|
+
return self._stored_data.sel(**self._valid_selector)
|
|
341
719
|
|
|
342
|
-
|
|
720
|
+
@property
|
|
721
|
+
def selected_timesteps(self) -> Optional[pd.DatetimeIndex]:
|
|
722
|
+
"""Get the current active timesteps, or None if no time dimension."""
|
|
723
|
+
if not self.has_time_dim:
|
|
724
|
+
return None
|
|
725
|
+
if self._selected_timesteps is None:
|
|
726
|
+
return self._stored_data.indexes['time']
|
|
727
|
+
return self._selected_timesteps
|
|
343
728
|
|
|
344
729
|
@property
|
|
345
|
-
def
|
|
346
|
-
"""Get
|
|
347
|
-
|
|
730
|
+
def active_scenarios(self) -> Optional[pd.Index]:
|
|
731
|
+
"""Get the current active scenarios, or None if no scenario dimension."""
|
|
732
|
+
if not self.has_scenario_dim:
|
|
733
|
+
return None
|
|
734
|
+
if self._selected_scenarios is None:
|
|
735
|
+
return self._stored_data.indexes['scenario']
|
|
736
|
+
return self._selected_scenarios
|
|
348
737
|
|
|
349
738
|
@property
|
|
350
739
|
def stored_data(self) -> xr.DataArray:
|
|
351
740
|
"""Get a copy of the full stored data."""
|
|
352
741
|
return self._stored_data.copy()
|
|
353
742
|
|
|
354
|
-
|
|
355
|
-
def stored_data(self, value: NumericData):
|
|
743
|
+
def update_stored_data(self, value: xr.DataArray) -> None:
|
|
356
744
|
"""
|
|
357
|
-
Update stored_data and refresh
|
|
745
|
+
Update stored_data and refresh selected_data.
|
|
358
746
|
|
|
359
747
|
Args:
|
|
360
748
|
value: New data to store
|
|
361
749
|
"""
|
|
362
|
-
new_data = DataConverter.as_dataarray(
|
|
750
|
+
new_data = DataConverter.as_dataarray(
|
|
751
|
+
value,
|
|
752
|
+
timesteps=self.selected_timesteps if self.has_time_dim else None,
|
|
753
|
+
scenarios=self.active_scenarios if self.has_scenario_dim else None,
|
|
754
|
+
)
|
|
363
755
|
|
|
364
756
|
# Skip if data is unchanged to avoid overwriting backup
|
|
365
757
|
if new_data.equals(self._stored_data):
|
|
366
758
|
return
|
|
367
759
|
|
|
368
760
|
self._stored_data = new_data
|
|
369
|
-
self.
|
|
761
|
+
self.set_selection(None, None) # Reset selections to full dataset
|
|
762
|
+
|
|
763
|
+
def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None:
|
|
764
|
+
"""
|
|
765
|
+
Set active subset for timesteps and scenarios.
|
|
766
|
+
|
|
767
|
+
Args:
|
|
768
|
+
timesteps: Timesteps to activate, or None to clear. Ignored if series has no time dimension.
|
|
769
|
+
scenarios: Scenarios to activate, or None to clear. Ignored if series has no scenario dimension.
|
|
770
|
+
"""
|
|
771
|
+
# Only update timesteps if the series has time dimension
|
|
772
|
+
if self.has_time_dim:
|
|
773
|
+
if timesteps is None or timesteps.equals(self._stored_data.indexes['time']):
|
|
774
|
+
self._selected_timesteps = None
|
|
775
|
+
else:
|
|
776
|
+
self._selected_timesteps = timesteps
|
|
777
|
+
|
|
778
|
+
# Only update scenarios if the series has scenario dimension
|
|
779
|
+
if self.has_scenario_dim:
|
|
780
|
+
if scenarios is None or scenarios.equals(self._stored_data.indexes['scenario']):
|
|
781
|
+
self._selected_scenarios = None
|
|
782
|
+
else:
|
|
783
|
+
self._selected_scenarios = scenarios
|
|
370
784
|
|
|
371
785
|
@property
|
|
372
786
|
def sel(self):
|
|
373
|
-
|
|
787
|
+
"""Direct access to the selected_data's sel method for convenience."""
|
|
788
|
+
return self.selected_data.sel
|
|
374
789
|
|
|
375
790
|
@property
|
|
376
791
|
def isel(self):
|
|
377
|
-
|
|
792
|
+
"""Direct access to the selected_data's isel method for convenience."""
|
|
793
|
+
return self.selected_data.isel
|
|
794
|
+
|
|
795
|
+
@property
|
|
796
|
+
def _valid_selector(self) -> Dict[str, pd.Index]:
|
|
797
|
+
"""Get the current selection as a dictionary."""
|
|
798
|
+
selector = {}
|
|
799
|
+
|
|
800
|
+
# Only include time in selector if series has time dimension
|
|
801
|
+
if self.has_time_dim and self._selected_timesteps is not None:
|
|
802
|
+
selector['time'] = self._selected_timesteps
|
|
803
|
+
|
|
804
|
+
# Only include scenario in selector if series has scenario dimension
|
|
805
|
+
if self.has_scenario_dim and self._selected_scenarios is not None:
|
|
806
|
+
selector['scenario'] = self._selected_scenarios
|
|
807
|
+
|
|
808
|
+
return selector
|
|
378
809
|
|
|
379
810
|
def _apply_operation(self, other, op):
|
|
380
811
|
"""Apply an operation between this TimeSeries and another object."""
|
|
381
812
|
if isinstance(other, TimeSeries):
|
|
382
|
-
other = other.
|
|
383
|
-
return op(self.
|
|
813
|
+
other = other.selected_data
|
|
814
|
+
return op(self.selected_data, other)
|
|
384
815
|
|
|
385
816
|
def __add__(self, other):
|
|
386
817
|
return self._apply_operation(other, lambda x, y: x + y)
|
|
@@ -395,25 +826,25 @@ class TimeSeries:
|
|
|
395
826
|
return self._apply_operation(other, lambda x, y: x / y)
|
|
396
827
|
|
|
397
828
|
def __radd__(self, other):
|
|
398
|
-
return other + self.
|
|
829
|
+
return other + self.selected_data
|
|
399
830
|
|
|
400
831
|
def __rsub__(self, other):
|
|
401
|
-
return other - self.
|
|
832
|
+
return other - self.selected_data
|
|
402
833
|
|
|
403
834
|
def __rmul__(self, other):
|
|
404
|
-
return other * self.
|
|
835
|
+
return other * self.selected_data
|
|
405
836
|
|
|
406
837
|
def __rtruediv__(self, other):
|
|
407
|
-
return other / self.
|
|
838
|
+
return other / self.selected_data
|
|
408
839
|
|
|
409
840
|
def __neg__(self) -> xr.DataArray:
|
|
410
|
-
return -self.
|
|
841
|
+
return -self.selected_data
|
|
411
842
|
|
|
412
843
|
def __pos__(self) -> xr.DataArray:
|
|
413
|
-
return +self.
|
|
844
|
+
return +self.selected_data
|
|
414
845
|
|
|
415
846
|
def __abs__(self) -> xr.DataArray:
|
|
416
|
-
return abs(self.
|
|
847
|
+
return abs(self.selected_data)
|
|
417
848
|
|
|
418
849
|
def __gt__(self, other):
|
|
419
850
|
"""
|
|
@@ -426,8 +857,8 @@ class TimeSeries:
|
|
|
426
857
|
True if all values in this TimeSeries are greater than other
|
|
427
858
|
"""
|
|
428
859
|
if isinstance(other, TimeSeries):
|
|
429
|
-
return self.
|
|
430
|
-
return self.
|
|
860
|
+
return self.selected_data > other.selected_data
|
|
861
|
+
return self.selected_data > other
|
|
431
862
|
|
|
432
863
|
def __ge__(self, other):
|
|
433
864
|
"""
|
|
@@ -440,8 +871,8 @@ class TimeSeries:
|
|
|
440
871
|
True if all values in this TimeSeries are greater than or equal to other
|
|
441
872
|
"""
|
|
442
873
|
if isinstance(other, TimeSeries):
|
|
443
|
-
return self.
|
|
444
|
-
return self.
|
|
874
|
+
return self.selected_data >= other.selected_data
|
|
875
|
+
return self.selected_data >= other
|
|
445
876
|
|
|
446
877
|
def __lt__(self, other):
|
|
447
878
|
"""
|
|
@@ -454,8 +885,8 @@ class TimeSeries:
|
|
|
454
885
|
True if all values in this TimeSeries are less than other
|
|
455
886
|
"""
|
|
456
887
|
if isinstance(other, TimeSeries):
|
|
457
|
-
return self.
|
|
458
|
-
return self.
|
|
888
|
+
return self.selected_data < other.selected_data
|
|
889
|
+
return self.selected_data < other
|
|
459
890
|
|
|
460
891
|
def __le__(self, other):
|
|
461
892
|
"""
|
|
@@ -468,8 +899,8 @@ class TimeSeries:
|
|
|
468
899
|
True if all values in this TimeSeries are less than or equal to other
|
|
469
900
|
"""
|
|
470
901
|
if isinstance(other, TimeSeries):
|
|
471
|
-
return self.
|
|
472
|
-
return self.
|
|
902
|
+
return self.selected_data <= other.selected_data
|
|
903
|
+
return self.selected_data <= other
|
|
473
904
|
|
|
474
905
|
def __eq__(self, other):
|
|
475
906
|
"""
|
|
@@ -482,8 +913,8 @@ class TimeSeries:
|
|
|
482
913
|
True if all values in this TimeSeries are equal to other
|
|
483
914
|
"""
|
|
484
915
|
if isinstance(other, TimeSeries):
|
|
485
|
-
return self.
|
|
486
|
-
return self.
|
|
916
|
+
return self.selected_data == other.selected_data
|
|
917
|
+
return self.selected_data == other
|
|
487
918
|
|
|
488
919
|
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
|
|
489
920
|
"""
|
|
@@ -491,8 +922,8 @@ class TimeSeries:
|
|
|
491
922
|
|
|
492
923
|
This allows NumPy functions to work with TimeSeries objects.
|
|
493
924
|
"""
|
|
494
|
-
# Convert any TimeSeries inputs to their
|
|
495
|
-
inputs = [x.
|
|
925
|
+
# Convert any TimeSeries inputs to their selected_data
|
|
926
|
+
inputs = [x.selected_data if isinstance(x, TimeSeries) else x for x in inputs]
|
|
496
927
|
return getattr(ufunc, method)(*inputs, **kwargs)
|
|
497
928
|
|
|
498
929
|
def __repr__(self):
|
|
@@ -506,10 +937,10 @@ class TimeSeries:
|
|
|
506
937
|
'name': self.name,
|
|
507
938
|
'aggregation_weight': self.aggregation_weight,
|
|
508
939
|
'aggregation_group': self.aggregation_group,
|
|
509
|
-
'
|
|
510
|
-
'shape': self.
|
|
511
|
-
'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}',
|
|
940
|
+
'has_extra_timestep': self.has_extra_timestep,
|
|
941
|
+
'shape': self.selected_data.shape,
|
|
512
942
|
}
|
|
943
|
+
|
|
513
944
|
attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items())
|
|
514
945
|
return f'TimeSeries({attr_str})'
|
|
515
946
|
|
|
@@ -520,281 +951,333 @@ class TimeSeries:
|
|
|
520
951
|
Returns:
|
|
521
952
|
Descriptive string with statistics
|
|
522
953
|
"""
|
|
523
|
-
return f
|
|
954
|
+
return f'TimeSeries "{self.name}":\n{textwrap.indent(self.stats, " ")}'
|
|
524
955
|
|
|
525
956
|
|
|
526
957
|
class TimeSeriesCollection:
|
|
527
958
|
"""
|
|
528
|
-
|
|
959
|
+
Simplified central manager for time series data with reference tracking.
|
|
529
960
|
|
|
530
|
-
|
|
531
|
-
|
|
961
|
+
Provides a way to store time series data and work with subsets of dimensions
|
|
962
|
+
that automatically update all references when changed.
|
|
532
963
|
"""
|
|
533
964
|
|
|
534
965
|
def __init__(
|
|
535
966
|
self,
|
|
536
967
|
timesteps: pd.DatetimeIndex,
|
|
968
|
+
scenarios: Optional[pd.Index] = None,
|
|
537
969
|
hours_of_last_timestep: Optional[float] = None,
|
|
538
970
|
hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None,
|
|
539
971
|
):
|
|
540
|
-
"""
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
self.
|
|
551
|
-
|
|
552
|
-
timesteps, hours_of_previous_timesteps
|
|
972
|
+
"""Initialize a TimeSeriesCollection."""
|
|
973
|
+
self._full_timesteps = self._validate_timesteps(timesteps)
|
|
974
|
+
self._full_scenarios = self._validate_scenarios(scenarios)
|
|
975
|
+
|
|
976
|
+
self._full_timesteps_extra = self._create_timesteps_with_extra(
|
|
977
|
+
self._full_timesteps,
|
|
978
|
+
self._calculate_hours_of_final_timestep(
|
|
979
|
+
self._full_timesteps, hours_of_final_timestep=hours_of_last_timestep
|
|
980
|
+
),
|
|
981
|
+
)
|
|
982
|
+
self._full_hours_per_timestep = self.calculate_hours_per_timestep(
|
|
983
|
+
self._full_timesteps_extra, self._full_scenarios
|
|
553
984
|
)
|
|
554
985
|
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra)
|
|
986
|
+
self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps(
|
|
987
|
+
timesteps, hours_of_previous_timesteps
|
|
988
|
+
) # TODO: Make dynamic
|
|
559
989
|
|
|
560
|
-
#
|
|
561
|
-
self.
|
|
562
|
-
self._active_timesteps_extra = None
|
|
563
|
-
self._active_hours_per_timestep = None
|
|
990
|
+
# Series that need extra timestep
|
|
991
|
+
self._has_extra_timestep: set = set()
|
|
564
992
|
|
|
565
|
-
#
|
|
566
|
-
self.
|
|
993
|
+
# Storage for TimeSeries objects
|
|
994
|
+
self._time_series: Dict[str, TimeSeries] = {}
|
|
567
995
|
|
|
568
|
-
#
|
|
569
|
-
self.
|
|
570
|
-
self.
|
|
996
|
+
# Active subset selectors
|
|
997
|
+
self._selected_timesteps: Optional[pd.DatetimeIndex] = None
|
|
998
|
+
self._selected_scenarios: Optional[pd.Index] = None
|
|
999
|
+
self._selected_timesteps_extra: Optional[pd.DatetimeIndex] = None
|
|
1000
|
+
self._selected_hours_per_timestep: Optional[xr.DataArray] = None
|
|
571
1001
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
self, data: Union[NumericData, TimeSeriesData], name: str, needs_extra_timestep: bool = False
|
|
1002
|
+
def add_time_series(
|
|
1003
|
+
self,
|
|
1004
|
+
name: str,
|
|
1005
|
+
data: Union[NumericDataTS, TimeSeries],
|
|
1006
|
+
has_time_dim: bool = True,
|
|
1007
|
+
has_scenario_dim: bool = True,
|
|
1008
|
+
aggregation_weight: Optional[float] = None,
|
|
1009
|
+
aggregation_group: Optional[str] = None,
|
|
1010
|
+
has_extra_timestep: bool = False,
|
|
582
1011
|
) -> TimeSeries:
|
|
583
1012
|
"""
|
|
584
|
-
|
|
1013
|
+
Add a new TimeSeries to the allocator.
|
|
585
1014
|
|
|
586
1015
|
Args:
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
1016
|
+
name: Name of the time series
|
|
1017
|
+
data: Data for the time series (can be raw data or an existing TimeSeries)
|
|
1018
|
+
has_time_dim: Whether the TimeSeries has a time dimension
|
|
1019
|
+
has_scenario_dim: Whether the TimeSeries has a scenario dimension
|
|
1020
|
+
aggregation_weight: Weight used for aggregation
|
|
1021
|
+
aggregation_group: Group name for shared aggregation weighting
|
|
1022
|
+
has_extra_timestep: Whether this series needs an extra timestep
|
|
591
1023
|
|
|
592
1024
|
Returns:
|
|
593
|
-
The created TimeSeries
|
|
594
|
-
|
|
1025
|
+
The created TimeSeries object
|
|
595
1026
|
"""
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
1027
|
+
if name in self._time_series:
|
|
1028
|
+
raise KeyError(f"TimeSeries '{name}' already exists in allocator")
|
|
1029
|
+
if not has_time_dim and has_extra_timestep:
|
|
1030
|
+
raise ValueError('A not time-indexed TimeSeries cannot have an extra timestep')
|
|
1031
|
+
|
|
1032
|
+
# Choose which timesteps to use
|
|
1033
|
+
if has_time_dim:
|
|
1034
|
+
target_timesteps = self.timesteps_extra if has_extra_timestep else self.timesteps
|
|
1035
|
+
else:
|
|
1036
|
+
target_timesteps = None
|
|
599
1037
|
|
|
600
|
-
|
|
601
|
-
timesteps_to_use = self.timesteps_extra if needs_extra_timestep else self.timesteps
|
|
1038
|
+
target_scenarios = self.scenarios if has_scenario_dim else None
|
|
602
1039
|
|
|
603
|
-
# Create the
|
|
604
|
-
if isinstance(data,
|
|
605
|
-
|
|
1040
|
+
# Create or adapt the TimeSeries object
|
|
1041
|
+
if isinstance(data, TimeSeries):
|
|
1042
|
+
# Use the existing TimeSeries but update its parameters
|
|
1043
|
+
time_series = data
|
|
1044
|
+
# Update the stored data to use our timesteps and scenarios
|
|
1045
|
+
data_array = DataConverter.as_dataarray(
|
|
1046
|
+
time_series.stored_data, timesteps=target_timesteps, scenarios=target_scenarios
|
|
1047
|
+
)
|
|
1048
|
+
time_series = TimeSeries(
|
|
1049
|
+
data=data_array,
|
|
606
1050
|
name=name,
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
aggregation_group=data.agg_group,
|
|
611
|
-
needs_extra_timestep=needs_extra_timestep,
|
|
1051
|
+
aggregation_weight=aggregation_weight or time_series.aggregation_weight,
|
|
1052
|
+
aggregation_group=aggregation_group or time_series.aggregation_group,
|
|
1053
|
+
has_extra_timestep=has_extra_timestep or time_series.has_extra_timestep,
|
|
612
1054
|
)
|
|
613
|
-
# Connect the user time series to the created TimeSeries
|
|
614
|
-
data.label = name
|
|
615
1055
|
else:
|
|
1056
|
+
# Create a new TimeSeries from raw data
|
|
616
1057
|
time_series = TimeSeries.from_datasource(
|
|
617
|
-
|
|
1058
|
+
data=data,
|
|
1059
|
+
name=name,
|
|
1060
|
+
timesteps=target_timesteps,
|
|
1061
|
+
scenarios=target_scenarios,
|
|
1062
|
+
aggregation_weight=aggregation_weight,
|
|
1063
|
+
aggregation_group=aggregation_group,
|
|
1064
|
+
has_extra_timestep=has_extra_timestep,
|
|
618
1065
|
)
|
|
619
1066
|
|
|
620
|
-
# Add to
|
|
621
|
-
self.
|
|
1067
|
+
# Add to storage
|
|
1068
|
+
self._time_series[name] = time_series
|
|
622
1069
|
|
|
623
|
-
|
|
1070
|
+
# Track if it needs extra timestep
|
|
1071
|
+
if has_extra_timestep:
|
|
1072
|
+
self._has_extra_timestep.add(name)
|
|
624
1073
|
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
self.group_weights = self._calculate_group_weights()
|
|
628
|
-
self.weights = self._calculate_weights()
|
|
629
|
-
|
|
630
|
-
if np.all(np.isclose(list(self.weights.values()), 1, atol=1e-6)):
|
|
631
|
-
logger.info('All Aggregation weights were set to 1')
|
|
632
|
-
|
|
633
|
-
return self.weights
|
|
1074
|
+
# Return the TimeSeries object
|
|
1075
|
+
return time_series
|
|
634
1076
|
|
|
635
|
-
def
|
|
1077
|
+
def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None:
|
|
636
1078
|
"""
|
|
637
|
-
|
|
638
|
-
If no arguments are provided, the active timesteps are reset.
|
|
1079
|
+
Set active subset for timesteps and scenarios.
|
|
639
1080
|
|
|
640
1081
|
Args:
|
|
641
|
-
|
|
642
|
-
|
|
1082
|
+
timesteps: Timesteps to activate, or None to clear
|
|
1083
|
+
scenarios: Scenarios to activate, or None to clear
|
|
643
1084
|
"""
|
|
644
|
-
if
|
|
645
|
-
|
|
1085
|
+
if timesteps is None:
|
|
1086
|
+
self._selected_timesteps = None
|
|
1087
|
+
self._selected_timesteps_extra = None
|
|
1088
|
+
else:
|
|
1089
|
+
self._selected_timesteps = self._validate_timesteps(timesteps, self._full_timesteps)
|
|
1090
|
+
self._selected_timesteps_extra = self._create_timesteps_with_extra(
|
|
1091
|
+
timesteps, self._calculate_hours_of_final_timestep(timesteps, self._full_timesteps)
|
|
1092
|
+
)
|
|
646
1093
|
|
|
647
|
-
if
|
|
648
|
-
|
|
1094
|
+
if scenarios is None:
|
|
1095
|
+
self._selected_scenarios = None
|
|
1096
|
+
else:
|
|
1097
|
+
self._selected_scenarios = self._validate_scenarios(scenarios, self._full_scenarios)
|
|
649
1098
|
|
|
650
|
-
|
|
651
|
-
self._active_timesteps = active_timesteps
|
|
652
|
-
first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0]
|
|
653
|
-
last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0]
|
|
654
|
-
self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2]
|
|
655
|
-
self._active_hours_per_timestep = self.all_hours_per_timestep.isel(time=slice(first_ts_index, last_ts_idx + 1))
|
|
1099
|
+
self._selected_hours_per_timestep = self.calculate_hours_per_timestep(self.timesteps_extra, self.scenarios)
|
|
656
1100
|
|
|
657
|
-
#
|
|
658
|
-
self.
|
|
1101
|
+
# Apply the selection to all TimeSeries objects
|
|
1102
|
+
for ts_name, ts in self._time_series.items():
|
|
1103
|
+
if ts.has_time_dim:
|
|
1104
|
+
timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps
|
|
1105
|
+
else:
|
|
1106
|
+
timesteps = None
|
|
659
1107
|
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
self._active_timesteps = None
|
|
663
|
-
self._active_timesteps_extra = None
|
|
664
|
-
self._active_hours_per_timestep = None
|
|
1108
|
+
ts.set_selection(timesteps=timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None)
|
|
1109
|
+
self._propagate_selection_to_time_series()
|
|
665
1110
|
|
|
666
|
-
|
|
667
|
-
|
|
1111
|
+
def as_dataset(self, with_extra_timestep: bool = True, with_constants: bool = True) -> xr.Dataset:
|
|
1112
|
+
"""
|
|
1113
|
+
Convert the TimeSeriesCollection to a xarray Dataset, containing the data of each TimeSeries.
|
|
668
1114
|
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
1115
|
+
Args:
|
|
1116
|
+
with_extra_timestep: Whether to exclude the extra timesteps.
|
|
1117
|
+
Effectively, this removes the last timestep for certain TimeSeries, but mitigates the presence of NANs in others.
|
|
1118
|
+
with_constants: Whether to exclude TimeSeries with a constant value from the dataset.
|
|
1119
|
+
"""
|
|
1120
|
+
if self.scenarios is None:
|
|
1121
|
+
ds = xr.Dataset(coords={'time': self.timesteps_extra})
|
|
1122
|
+
else:
|
|
1123
|
+
ds = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra})
|
|
673
1124
|
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
1125
|
+
for ts in self._time_series.values():
|
|
1126
|
+
if not with_constants and ts.all_equal:
|
|
1127
|
+
continue
|
|
1128
|
+
ds[ts.name] = ts.selected_data
|
|
678
1129
|
|
|
679
|
-
|
|
1130
|
+
if not with_extra_timestep:
|
|
1131
|
+
return ds.sel(time=self.timesteps)
|
|
680
1132
|
|
|
681
|
-
|
|
682
|
-
"""
|
|
683
|
-
Update time series with new data from a DataFrame.
|
|
1133
|
+
return ds
|
|
684
1134
|
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
1135
|
+
@property
|
|
1136
|
+
def timesteps(self) -> pd.DatetimeIndex:
|
|
1137
|
+
"""Get the current active timesteps."""
|
|
1138
|
+
if self._selected_timesteps is None:
|
|
1139
|
+
return self._full_timesteps
|
|
1140
|
+
return self._selected_timesteps
|
|
691
1141
|
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
1142
|
+
@property
|
|
1143
|
+
def timesteps_extra(self) -> pd.DatetimeIndex:
|
|
1144
|
+
"""Get the current active timesteps with extra timestep."""
|
|
1145
|
+
if self._selected_timesteps_extra is None:
|
|
1146
|
+
return self._full_timesteps_extra
|
|
1147
|
+
return self._selected_timesteps_extra
|
|
698
1148
|
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
ts.stored_data = data[name].iloc[:-1]
|
|
706
|
-
else:
|
|
707
|
-
# Use data as is
|
|
708
|
-
ts.stored_data = data[name]
|
|
709
|
-
else:
|
|
710
|
-
# For time series with extra timestep
|
|
711
|
-
if include_extra_timestep:
|
|
712
|
-
# Data already includes extra timestep
|
|
713
|
-
ts.stored_data = data[name]
|
|
714
|
-
else:
|
|
715
|
-
# Need to add extra timestep - extrapolate from the last value
|
|
716
|
-
extra_step_value = data[name].iloc[-1]
|
|
717
|
-
extra_step_index = pd.DatetimeIndex([self.timesteps_extra[-1]], name='time')
|
|
718
|
-
extra_step_series = pd.Series([extra_step_value], index=extra_step_index)
|
|
1149
|
+
@property
|
|
1150
|
+
def hours_per_timestep(self) -> xr.DataArray:
|
|
1151
|
+
"""Get the current active hours per timestep."""
|
|
1152
|
+
if self._selected_hours_per_timestep is None:
|
|
1153
|
+
return self._full_hours_per_timestep
|
|
1154
|
+
return self._selected_hours_per_timestep
|
|
719
1155
|
|
|
720
|
-
|
|
721
|
-
|
|
1156
|
+
@property
|
|
1157
|
+
def scenarios(self) -> Optional[pd.Index]:
|
|
1158
|
+
"""Get the current active scenarios."""
|
|
1159
|
+
if self._selected_scenarios is None:
|
|
1160
|
+
return self._full_scenarios
|
|
1161
|
+
return self._selected_scenarios
|
|
1162
|
+
|
|
1163
|
+
def _propagate_selection_to_time_series(self) -> None:
|
|
1164
|
+
"""Apply the current selection to all TimeSeries objects."""
|
|
1165
|
+
for ts_name, ts in self._time_series.items():
|
|
1166
|
+
if ts.has_time_dim:
|
|
1167
|
+
timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps
|
|
1168
|
+
else:
|
|
1169
|
+
timesteps = None
|
|
722
1170
|
|
|
723
|
-
|
|
1171
|
+
ts.set_selection(timesteps=timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None)
|
|
724
1172
|
|
|
725
|
-
def
|
|
726
|
-
self, filtered: Literal['all', 'constant', 'non_constant'] = 'non_constant', include_extra_timestep: bool = True
|
|
727
|
-
) -> pd.DataFrame:
|
|
1173
|
+
def __getitem__(self, name: str) -> TimeSeries:
|
|
728
1174
|
"""
|
|
729
|
-
|
|
1175
|
+
Get a reference to a time series or data array.
|
|
730
1176
|
|
|
731
1177
|
Args:
|
|
732
|
-
|
|
733
|
-
include_extra_timestep: Whether to include the extra timestep in the result, by default True
|
|
1178
|
+
name: Name of the data array or time series
|
|
734
1179
|
|
|
735
1180
|
Returns:
|
|
736
|
-
|
|
1181
|
+
TimeSeries object if it exists, otherwise DataArray with current selection applied
|
|
737
1182
|
"""
|
|
738
|
-
|
|
739
|
-
|
|
1183
|
+
# First check if this is a TimeSeries
|
|
1184
|
+
if name in self._time_series:
|
|
1185
|
+
# Return the TimeSeries object (it will handle selection internally)
|
|
1186
|
+
return self._time_series[name]
|
|
1187
|
+
raise ValueError(f'No TimeSeries named "{name}" found')
|
|
1188
|
+
|
|
1189
|
+
def __contains__(self, value) -> bool:
|
|
1190
|
+
if isinstance(value, str):
|
|
1191
|
+
return value in self._time_series
|
|
1192
|
+
elif isinstance(value, TimeSeries):
|
|
1193
|
+
return value.name in self._time_series
|
|
1194
|
+
raise TypeError(f'Invalid type for __contains__ of {self.__class__.__name__}: {type(value)}')
|
|
740
1195
|
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
df = ds.to_dataframe()
|
|
745
|
-
|
|
746
|
-
# Apply filtering
|
|
747
|
-
if filtered == 'all':
|
|
748
|
-
return df
|
|
749
|
-
elif filtered == 'constant':
|
|
750
|
-
return df.loc[:, df.nunique() == 1]
|
|
751
|
-
elif filtered == 'non_constant':
|
|
752
|
-
return df.loc[:, df.nunique() > 1]
|
|
753
|
-
else:
|
|
754
|
-
raise ValueError("filtered must be one of: 'all', 'constant', 'non_constant'")
|
|
1196
|
+
def __iter__(self) -> Iterator[TimeSeries]:
|
|
1197
|
+
"""Iterate over TimeSeries objects."""
|
|
1198
|
+
return iter(self._time_series.values())
|
|
755
1199
|
|
|
756
|
-
def
|
|
1200
|
+
def update_time_series(self, name: str, data: TimestepData) -> TimeSeries:
|
|
757
1201
|
"""
|
|
758
|
-
|
|
1202
|
+
Update an existing TimeSeries with new data.
|
|
759
1203
|
|
|
760
1204
|
Args:
|
|
761
|
-
|
|
1205
|
+
name: Name of the TimeSeries to update
|
|
1206
|
+
data: New data to assign
|
|
762
1207
|
|
|
763
1208
|
Returns:
|
|
764
|
-
|
|
1209
|
+
The updated TimeSeries
|
|
1210
|
+
|
|
1211
|
+
Raises:
|
|
1212
|
+
KeyError: If no TimeSeries with the given name exists
|
|
765
1213
|
"""
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
series_to_include = self.time_series_data.values()
|
|
769
|
-
else:
|
|
770
|
-
series_to_include = self.non_constants
|
|
1214
|
+
if name not in self._time_series:
|
|
1215
|
+
raise KeyError(f"No TimeSeries named '{name}' found")
|
|
771
1216
|
|
|
772
|
-
#
|
|
773
|
-
|
|
1217
|
+
# Get the TimeSeries
|
|
1218
|
+
ts = self._time_series[name]
|
|
774
1219
|
|
|
775
|
-
#
|
|
776
|
-
|
|
1220
|
+
# Determine which timesteps to use if the series has a time dimension
|
|
1221
|
+
if ts.has_time_dim:
|
|
1222
|
+
target_timesteps = self.timesteps_extra if name in self._has_extra_timestep else self.timesteps
|
|
1223
|
+
else:
|
|
1224
|
+
target_timesteps = None
|
|
777
1225
|
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
'hours_per_timestep': self._format_stats(self.hours_per_timestep),
|
|
782
|
-
}
|
|
1226
|
+
# Convert data to proper format
|
|
1227
|
+
data_array = DataConverter.as_dataarray(
|
|
1228
|
+
data, timesteps=target_timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None
|
|
783
1229
|
)
|
|
784
1230
|
|
|
785
|
-
|
|
1231
|
+
# Update the TimeSeries
|
|
1232
|
+
ts.update_stored_data(data_array)
|
|
1233
|
+
|
|
1234
|
+
return ts
|
|
1235
|
+
|
|
1236
|
+
def calculate_aggregation_weights(self) -> Dict[str, float]:
|
|
1237
|
+
"""Calculate and return aggregation weights for all time series."""
|
|
1238
|
+
group_weights = self._calculate_group_weights()
|
|
786
1239
|
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
1240
|
+
weights = {}
|
|
1241
|
+
for name, ts in self._time_series.items():
|
|
1242
|
+
if ts.aggregation_group is not None:
|
|
1243
|
+
# Use group weight
|
|
1244
|
+
weights[name] = group_weights.get(ts.aggregation_group, 1)
|
|
792
1245
|
else:
|
|
793
|
-
|
|
1246
|
+
# Use individual weight or default to 1
|
|
1247
|
+
weights[name] = ts.aggregation_weight or 1
|
|
1248
|
+
|
|
1249
|
+
if np.all(np.isclose(list(weights.values()), 1, atol=1e-6)):
|
|
1250
|
+
logger.info('All Aggregation weights were set to 1')
|
|
1251
|
+
|
|
1252
|
+
return weights
|
|
1253
|
+
|
|
1254
|
+
def _calculate_group_weights(self) -> Dict[str, float]:
|
|
1255
|
+
"""Calculate weights for aggregation groups."""
|
|
1256
|
+
# Count series in each group
|
|
1257
|
+
groups = [ts.aggregation_group for ts in self._time_series.values() if ts.aggregation_group is not None]
|
|
1258
|
+
group_counts = Counter(groups)
|
|
1259
|
+
|
|
1260
|
+
# Calculate weight for each group (1/count)
|
|
1261
|
+
return {group: 1 / count for group, count in group_counts.items()}
|
|
794
1262
|
|
|
795
1263
|
@staticmethod
|
|
796
|
-
def _validate_timesteps(
|
|
797
|
-
|
|
1264
|
+
def _validate_timesteps(
|
|
1265
|
+
timesteps: pd.DatetimeIndex, present_timesteps: Optional[pd.DatetimeIndex] = None
|
|
1266
|
+
) -> pd.DatetimeIndex:
|
|
1267
|
+
"""
|
|
1268
|
+
Validate timesteps format and rename if needed.
|
|
1269
|
+
Args:
|
|
1270
|
+
timesteps: The timesteps to validate
|
|
1271
|
+
present_timesteps: The timesteps that are present in the dataset
|
|
1272
|
+
|
|
1273
|
+
Raises:
|
|
1274
|
+
ValueError: If timesteps is not a pandas DatetimeIndex
|
|
1275
|
+
ValueError: If timesteps is not at least 2 timestamps
|
|
1276
|
+
ValueError: If timesteps has a different name than 'time'
|
|
1277
|
+
ValueError: If timesteps is not sorted
|
|
1278
|
+
ValueError: If timesteps contains duplicates
|
|
1279
|
+
ValueError: If timesteps is not a subset of present_timesteps
|
|
1280
|
+
"""
|
|
798
1281
|
if not isinstance(timesteps, pd.DatetimeIndex):
|
|
799
1282
|
raise TypeError('timesteps must be a pandas DatetimeIndex')
|
|
800
1283
|
|
|
@@ -803,22 +1286,61 @@ class TimeSeriesCollection:
|
|
|
803
1286
|
|
|
804
1287
|
# Ensure timesteps has the required name
|
|
805
1288
|
if timesteps.name != 'time':
|
|
806
|
-
logger.
|
|
1289
|
+
logger.debug('Renamed timesteps to "time" (was "%s")', timesteps.name)
|
|
807
1290
|
timesteps.name = 'time'
|
|
808
1291
|
|
|
1292
|
+
# Ensure timesteps is sorted
|
|
1293
|
+
if not timesteps.is_monotonic_increasing:
|
|
1294
|
+
raise ValueError('timesteps must be sorted')
|
|
1295
|
+
|
|
1296
|
+
# Ensure timesteps has no duplicates
|
|
1297
|
+
if len(timesteps) != len(timesteps.drop_duplicates()):
|
|
1298
|
+
raise ValueError('timesteps must not contain duplicates')
|
|
1299
|
+
|
|
1300
|
+
# Ensure timesteps is a subset of present_timesteps
|
|
1301
|
+
if present_timesteps is not None and not set(timesteps).issubset(set(present_timesteps)):
|
|
1302
|
+
raise ValueError('timesteps must be a subset of present_timesteps')
|
|
1303
|
+
|
|
1304
|
+
return timesteps
|
|
1305
|
+
|
|
809
1306
|
@staticmethod
|
|
810
|
-
def
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
1307
|
+
def _validate_scenarios(scenarios: pd.Index, present_scenarios: Optional[pd.Index] = None) -> Optional[pd.Index]:
|
|
1308
|
+
"""
|
|
1309
|
+
Validate scenario format and rename if needed.
|
|
1310
|
+
Args:
|
|
1311
|
+
scenarios: The scenarios to validate
|
|
1312
|
+
present_scenarios: The present_scenarios that are present in the dataset
|
|
1313
|
+
|
|
1314
|
+
Raises:
|
|
1315
|
+
ValueError: If timesteps is not a pandas DatetimeIndex
|
|
1316
|
+
ValueError: If timesteps is not at least 2 timestamps
|
|
1317
|
+
ValueError: If timesteps has a different name than 'time'
|
|
1318
|
+
ValueError: If timesteps is not sorted
|
|
1319
|
+
ValueError: If timesteps contains duplicates
|
|
1320
|
+
ValueError: If timesteps is not a subset of present_timesteps
|
|
1321
|
+
"""
|
|
1322
|
+
if scenarios is None:
|
|
1323
|
+
return None
|
|
1324
|
+
|
|
1325
|
+
if not isinstance(scenarios, pd.Index):
|
|
1326
|
+
logger.warning('Converting scenarios to pandas.Index')
|
|
1327
|
+
scenarios = pd.Index(scenarios, name='scenario')
|
|
1328
|
+
|
|
1329
|
+
# Ensure timesteps has the required name
|
|
1330
|
+
if scenarios.name != 'scenario':
|
|
1331
|
+
logger.debug('Renamed scenarios to "scneario" (was "%s")', scenarios.name)
|
|
1332
|
+
scenarios.name = 'scenario'
|
|
820
1333
|
|
|
821
|
-
#
|
|
1334
|
+
# Ensure timesteps is a subset of present_timesteps
|
|
1335
|
+
if present_scenarios is not None and not set(scenarios).issubset(set(present_scenarios)):
|
|
1336
|
+
raise ValueError('scenarios must be a subset of present_scenarios')
|
|
1337
|
+
|
|
1338
|
+
return scenarios
|
|
1339
|
+
|
|
1340
|
+
@staticmethod
|
|
1341
|
+
def _create_timesteps_with_extra(timesteps: pd.DatetimeIndex, hours_of_last_timestep: float) -> pd.DatetimeIndex:
|
|
1342
|
+
"""Create timesteps with an extra step at the end."""
|
|
1343
|
+
last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time')
|
|
822
1344
|
return pd.DatetimeIndex(timesteps.append(last_date), name='time')
|
|
823
1345
|
|
|
824
1346
|
@staticmethod
|
|
@@ -834,137 +1356,130 @@ class TimeSeriesCollection:
|
|
|
834
1356
|
return first_interval.total_seconds() / 3600 # Convert to hours
|
|
835
1357
|
|
|
836
1358
|
@staticmethod
|
|
837
|
-
def
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
1359
|
+
def _calculate_hours_of_final_timestep(
|
|
1360
|
+
timesteps: pd.DatetimeIndex,
|
|
1361
|
+
timesteps_superset: Optional[pd.DatetimeIndex] = None,
|
|
1362
|
+
hours_of_final_timestep: Optional[float] = None,
|
|
1363
|
+
) -> float:
|
|
1364
|
+
"""
|
|
1365
|
+
Calculate duration of the final timestep.
|
|
1366
|
+
If timesteps_subset is provided, the final timestep is calculated for this subset.
|
|
1367
|
+
The hours_of_final_timestep is only used if the final timestep cant be determined from the timesteps.
|
|
841
1368
|
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
1369
|
+
Args:
|
|
1370
|
+
timesteps: The full timesteps
|
|
1371
|
+
timesteps_subset: The subset of timesteps
|
|
1372
|
+
hours_of_final_timestep: The duration of the final timestep, if already known
|
|
845
1373
|
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
# Count series in each group
|
|
849
|
-
groups = [ts.aggregation_group for ts in self.time_series_data.values() if ts.aggregation_group is not None]
|
|
850
|
-
group_counts = Counter(groups)
|
|
1374
|
+
Returns:
|
|
1375
|
+
The duration of the final timestep in hours
|
|
851
1376
|
|
|
852
|
-
|
|
853
|
-
|
|
1377
|
+
Raises:
|
|
1378
|
+
ValueError: If the provided timesteps_subset does not end before the timesteps superset
|
|
1379
|
+
"""
|
|
1380
|
+
if timesteps_superset is None:
|
|
1381
|
+
if hours_of_final_timestep is not None:
|
|
1382
|
+
return hours_of_final_timestep
|
|
1383
|
+
return (timesteps[-1] - timesteps[-2]) / pd.Timedelta(hours=1)
|
|
854
1384
|
|
|
855
|
-
|
|
856
|
-
"""Calculate weights for all time series."""
|
|
857
|
-
# Calculate weight for each time series
|
|
858
|
-
weights = {}
|
|
859
|
-
for name, ts in self.time_series_data.items():
|
|
860
|
-
if ts.aggregation_group is not None:
|
|
861
|
-
# Use group weight
|
|
862
|
-
weights[name] = self.group_weights.get(ts.aggregation_group, 1)
|
|
863
|
-
else:
|
|
864
|
-
# Use individual weight or default to 1
|
|
865
|
-
weights[name] = ts.aggregation_weight or 1
|
|
1385
|
+
final_timestep = timesteps[-1]
|
|
866
1386
|
|
|
867
|
-
|
|
1387
|
+
if timesteps_superset[-1] == final_timestep:
|
|
1388
|
+
if hours_of_final_timestep is not None:
|
|
1389
|
+
return hours_of_final_timestep
|
|
1390
|
+
return (timesteps_superset[-1] - timesteps_superset[-2]) / pd.Timedelta(hours=1)
|
|
868
1391
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
1392
|
+
elif timesteps_superset[-1] <= final_timestep:
|
|
1393
|
+
raise ValueError(
|
|
1394
|
+
f'The provided timesteps ({timesteps}) end after the provided timesteps_superset ({timesteps_superset})'
|
|
1395
|
+
)
|
|
873
1396
|
else:
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
min_val = np.min(values)
|
|
878
|
-
max_val = np.max(values)
|
|
879
|
-
|
|
880
|
-
return f'mean: {mean_val:.2f}, min: {min_val:.2f}, max: {max_val:.2f}'
|
|
881
|
-
|
|
882
|
-
def __getitem__(self, name: str) -> TimeSeries:
|
|
883
|
-
"""Get a TimeSeries by name."""
|
|
884
|
-
try:
|
|
885
|
-
return self.time_series_data[name]
|
|
886
|
-
except KeyError as e:
|
|
887
|
-
raise KeyError(f'TimeSeries "{name}" not found in the TimeSeriesCollection') from e
|
|
888
|
-
|
|
889
|
-
def __iter__(self) -> Iterator[TimeSeries]:
|
|
890
|
-
"""Iterate through all TimeSeries in the collection."""
|
|
891
|
-
return iter(self.time_series_data.values())
|
|
892
|
-
|
|
893
|
-
def __len__(self) -> int:
|
|
894
|
-
"""Get the number of TimeSeries in the collection."""
|
|
895
|
-
return len(self.time_series_data)
|
|
896
|
-
|
|
897
|
-
def __contains__(self, item: Union[str, TimeSeries]) -> bool:
|
|
898
|
-
"""Check if a TimeSeries exists in the collection."""
|
|
899
|
-
if isinstance(item, str):
|
|
900
|
-
return item in self.time_series_data
|
|
901
|
-
elif isinstance(item, TimeSeries):
|
|
902
|
-
return any([item is ts for ts in self.time_series_data.values()])
|
|
903
|
-
return False
|
|
904
|
-
|
|
905
|
-
@property
|
|
906
|
-
def non_constants(self) -> List[TimeSeries]:
|
|
907
|
-
"""Get time series with varying values."""
|
|
908
|
-
return [ts for ts in self.time_series_data.values() if not ts.all_equal]
|
|
909
|
-
|
|
910
|
-
@property
|
|
911
|
-
def constants(self) -> List[TimeSeries]:
|
|
912
|
-
"""Get time series with constant values."""
|
|
913
|
-
return [ts for ts in self.time_series_data.values() if ts.all_equal]
|
|
914
|
-
|
|
915
|
-
@property
|
|
916
|
-
def timesteps(self) -> pd.DatetimeIndex:
|
|
917
|
-
"""Get the active timesteps."""
|
|
918
|
-
return self.all_timesteps if self._active_timesteps is None else self._active_timesteps
|
|
919
|
-
|
|
920
|
-
@property
|
|
921
|
-
def timesteps_extra(self) -> pd.DatetimeIndex:
|
|
922
|
-
"""Get the active timesteps with extra step."""
|
|
923
|
-
return self.all_timesteps_extra if self._active_timesteps_extra is None else self._active_timesteps_extra
|
|
1397
|
+
# Get the first timestep in the superset that is after the final timestep of the subset
|
|
1398
|
+
extra_timestep = timesteps_superset[timesteps_superset > final_timestep].min()
|
|
1399
|
+
return (extra_timestep - final_timestep) / pd.Timedelta(hours=1)
|
|
924
1400
|
|
|
925
|
-
@
|
|
926
|
-
def
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
@property
|
|
933
|
-
def hours_of_last_timestep(self) -> float:
|
|
934
|
-
"""Get the duration of the last timestep."""
|
|
935
|
-
return float(self.hours_per_timestep[-1].item())
|
|
936
|
-
|
|
937
|
-
def __repr__(self):
|
|
938
|
-
return f'TimeSeriesCollection:\n{self.to_dataset()}'
|
|
1401
|
+
@staticmethod
|
|
1402
|
+
def calculate_hours_per_timestep(
|
|
1403
|
+
timesteps_extra: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None
|
|
1404
|
+
) -> xr.DataArray:
|
|
1405
|
+
"""Calculate duration of each timestep."""
|
|
1406
|
+
# Calculate differences between consecutive timestamps
|
|
1407
|
+
hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1)
|
|
939
1408
|
|
|
940
|
-
|
|
941
|
-
|
|
1409
|
+
return DataConverter.as_dataarray(
|
|
1410
|
+
hours_per_step,
|
|
1411
|
+
timesteps=timesteps_extra[:-1],
|
|
1412
|
+
scenarios=scenarios,
|
|
1413
|
+
).rename('hours_per_step')
|
|
942
1414
|
|
|
943
|
-
stats_summary = '\n'.join(
|
|
944
|
-
[
|
|
945
|
-
f' - {time_series.name:<{len(longest_name)}}: {get_numeric_stats(time_series.active_data)}'
|
|
946
|
-
for time_series in self.time_series_data
|
|
947
|
-
]
|
|
948
|
-
)
|
|
949
1415
|
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
f' No. of timesteps: {len(self.timesteps)} + 1 extra\n'
|
|
954
|
-
f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n'
|
|
955
|
-
f' Time Series Data:\n'
|
|
956
|
-
f'{stats_summary}'
|
|
957
|
-
)
|
|
1416
|
+
def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10, by_scenario: bool = False) -> str:
|
|
1417
|
+
"""
|
|
1418
|
+
Calculates the mean, median, min, max, and standard deviation of a numeric DataArray.
|
|
958
1419
|
|
|
1420
|
+
Args:
|
|
1421
|
+
data: The DataArray to analyze
|
|
1422
|
+
decimals: Number of decimal places to show
|
|
1423
|
+
padd: Padding for alignment
|
|
1424
|
+
by_scenario: Whether to break down stats by scenario
|
|
959
1425
|
|
|
960
|
-
|
|
961
|
-
|
|
1426
|
+
Returns:
|
|
1427
|
+
String representation of data statistics
|
|
1428
|
+
"""
|
|
962
1429
|
format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f'
|
|
1430
|
+
|
|
1431
|
+
# If by_scenario is True and there's a scenario dimension with multiple values
|
|
1432
|
+
if by_scenario and 'scenario' in data.dims and data.sizes['scenario'] > 1:
|
|
1433
|
+
results = []
|
|
1434
|
+
for scenario in data.coords['scenario'].values:
|
|
1435
|
+
scenario_data = data.sel(scenario=scenario)
|
|
1436
|
+
if np.unique(scenario_data).size == 1:
|
|
1437
|
+
results.append(f' {scenario}: {scenario_data.max().item():{format_spec}} (constant)')
|
|
1438
|
+
else:
|
|
1439
|
+
mean = scenario_data.mean().item()
|
|
1440
|
+
median = scenario_data.median().item()
|
|
1441
|
+
min_val = scenario_data.min().item()
|
|
1442
|
+
max_val = scenario_data.max().item()
|
|
1443
|
+
std = scenario_data.std().item()
|
|
1444
|
+
results.append(
|
|
1445
|
+
f' {scenario}: {mean:{format_spec}} (mean), {median:{format_spec}} (median), '
|
|
1446
|
+
f'{min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
|
|
1447
|
+
)
|
|
1448
|
+
return '\n'.join(['By scenario:'] + results)
|
|
1449
|
+
|
|
1450
|
+
# Standard logic for non-scenario data or aggregated stats
|
|
963
1451
|
if np.unique(data).size == 1:
|
|
964
1452
|
return f'{data.max().item():{format_spec}} (constant)'
|
|
1453
|
+
|
|
965
1454
|
mean = data.mean().item()
|
|
966
1455
|
median = data.median().item()
|
|
967
1456
|
min_val = data.min().item()
|
|
968
1457
|
max_val = data.max().item()
|
|
969
1458
|
std = data.std().item()
|
|
1459
|
+
|
|
970
1460
|
return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
|
|
1461
|
+
|
|
1462
|
+
|
|
1463
|
+
def extract_data(
|
|
1464
|
+
data: Optional[Union[int, float, xr.DataArray, TimeSeries]],
|
|
1465
|
+
if_none: Any = None
|
|
1466
|
+
) -> Any:
|
|
1467
|
+
"""
|
|
1468
|
+
Convert data to xr.DataArray.
|
|
1469
|
+
|
|
1470
|
+
Args:
|
|
1471
|
+
data: The data to convert (scalar, array, or DataArray)
|
|
1472
|
+
if_none: The value to return if data is None
|
|
1473
|
+
|
|
1474
|
+
Returns:
|
|
1475
|
+
DataArray with the converted data, or the value specified by if_none
|
|
1476
|
+
"""
|
|
1477
|
+
if data is None:
|
|
1478
|
+
return if_none
|
|
1479
|
+
if isinstance(data, TimeSeries):
|
|
1480
|
+
return data.selected_data
|
|
1481
|
+
if isinstance(data, xr.DataArray):
|
|
1482
|
+
return data
|
|
1483
|
+
if isinstance(data, (int, float, np.integer, np.floating)):
|
|
1484
|
+
return data
|
|
1485
|
+
raise TypeError(f'Unsupported data type: {type(data).__name__}')
|