flixopt 2.1.0__py3-none-any.whl → 2.2.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flixopt might be problematic. Click here for more details.

flixopt/core.py CHANGED
@@ -7,6 +7,7 @@ import inspect
7
7
  import json
8
8
  import logging
9
9
  import pathlib
10
+ import textwrap
10
11
  from collections import Counter
11
12
  from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
12
13
 
@@ -25,6 +26,12 @@ NumericData = Union[int, float, np.integer, np.floating, np.ndarray, pd.Series,
25
26
  NumericDataTS = Union[NumericData, 'TimeSeriesData']
26
27
  """Represents either standard numeric data or TimeSeriesData."""
27
28
 
29
+ TimestepData = NumericData
30
+ """Represents any form of numeric data that corresponds to timesteps."""
31
+
32
+ ScenarioData = NumericData
33
+ """Represents any form of numeric data that corresponds to scenarios."""
34
+
28
35
 
29
36
  class PlausibilityError(Exception):
30
37
  """Error for a failing Plausibility check."""
@@ -40,61 +47,446 @@ class ConversionError(Exception):
40
47
 
41
48
  class DataConverter:
42
49
  """
43
- Converts various data types into xarray.DataArray with a timesteps index.
50
+ Converts various data types into xarray.DataArray with optional time and scenario dimension.
44
51
 
45
- Supports: scalars, arrays, Series, DataFrames, and DataArrays.
52
+ Current implementation handles:
53
+ - Scalar values
54
+ - NumPy arrays
55
+ - xarray.DataArray
46
56
  """
47
57
 
48
58
  @staticmethod
49
- def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex) -> xr.DataArray:
50
- """Convert data to xarray.DataArray with specified timesteps index."""
59
+ def as_dataarray(
60
+ data: TimestepData, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None
61
+ ) -> xr.DataArray:
62
+ """
63
+ Convert data to xarray.DataArray with specified dimensions.
64
+
65
+ Args:
66
+ data: The data to convert (scalar, array, or DataArray)
67
+ timesteps: Optional DatetimeIndex for time dimension
68
+ scenarios: Optional Index for scenario dimension
69
+
70
+ Returns:
71
+ DataArray with the converted data
72
+ """
73
+ # Prepare dimensions and coordinates
74
+ coords, dims = DataConverter._prepare_dimensions(timesteps, scenarios)
75
+
76
+ # Select appropriate converter based on data type
77
+ if isinstance(data, (int, float, np.integer, np.floating)):
78
+ return DataConverter._convert_scalar(data, coords, dims)
79
+
80
+ elif isinstance(data, xr.DataArray):
81
+ return DataConverter._convert_dataarray(data, coords, dims)
82
+
83
+ elif isinstance(data, np.ndarray):
84
+ return DataConverter._convert_ndarray(data, coords, dims)
85
+
86
+ elif isinstance(data, pd.Series):
87
+ return DataConverter._convert_series(data, coords, dims)
88
+
89
+ elif isinstance(data, pd.DataFrame):
90
+ return DataConverter._convert_dataframe(data, coords, dims)
91
+
92
+ else:
93
+ raise ConversionError(f'Unsupported data type: {type(data).__name__}')
94
+
95
+ @staticmethod
96
+ def _validate_timesteps(timesteps: pd.DatetimeIndex) -> pd.DatetimeIndex:
97
+ """
98
+ Validate and prepare time index.
99
+
100
+ Args:
101
+ timesteps: The time index to validate
102
+
103
+ Returns:
104
+ Validated time index
105
+ """
51
106
  if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0:
52
- raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}')
107
+ raise ConversionError('Timesteps must be a non-empty DatetimeIndex')
108
+
53
109
  if not timesteps.name == 'time':
54
- raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}')
55
-
56
- coords = [timesteps]
57
- dims = ['time']
58
- expected_shape = (len(timesteps),)
59
-
60
- try:
61
- if isinstance(data, (int, float, np.integer, np.floating)):
62
- return xr.DataArray(data, coords=coords, dims=dims)
63
- elif isinstance(data, pd.DataFrame):
64
- if not data.index.equals(timesteps):
65
- raise ConversionError("DataFrame index doesn't match timesteps index")
66
- if not len(data.columns) == 1:
67
- raise ConversionError('DataFrame must have exactly one column')
68
- return xr.DataArray(data.values.flatten(), coords=coords, dims=dims)
69
- elif isinstance(data, pd.Series):
70
- if not data.index.equals(timesteps):
71
- raise ConversionError("Series index doesn't match timesteps index")
72
- return xr.DataArray(data.values, coords=coords, dims=dims)
73
- elif isinstance(data, np.ndarray):
74
- if data.ndim != 1:
75
- raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}')
76
- elif data.shape[0] != expected_shape[0]:
77
- raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}")
78
- return xr.DataArray(data, coords=coords, dims=dims)
79
- elif isinstance(data, xr.DataArray):
80
- if data.dims != tuple(dims):
81
- raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}")
82
- if data.sizes[dims[0]] != len(coords[0]):
83
- raise ConversionError(
84
- f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}"
85
- )
86
- return data.copy(deep=True)
110
+ raise ConversionError(f'Scenarios must be named "time", got "{timesteps.name}"')
111
+
112
+ return timesteps
113
+
114
+ @staticmethod
115
+ def _validate_scenarios(scenarios: pd.Index) -> pd.Index:
116
+ """
117
+ Validate and prepare scenario index.
118
+
119
+ Args:
120
+ scenarios: The scenario index to validate
121
+ """
122
+ if not isinstance(scenarios, pd.Index) or len(scenarios) == 0:
123
+ raise ConversionError('Scenarios must be a non-empty Index')
124
+
125
+ if not scenarios.name == 'scenario':
126
+ raise ConversionError(f'Scenarios must be named "scenario", got "{scenarios.name}"')
127
+
128
+ return scenarios
129
+
130
+ @staticmethod
131
+ def _prepare_dimensions(
132
+ timesteps: Optional[pd.DatetimeIndex], scenarios: Optional[pd.Index]
133
+ ) -> Tuple[Dict[str, pd.Index], Tuple[str, ...]]:
134
+ """
135
+ Prepare coordinates and dimensions for the DataArray.
136
+
137
+ Args:
138
+ timesteps: Optional time index
139
+ scenarios: Optional scenario index
140
+
141
+ Returns:
142
+ Tuple of (coordinates dict, dimensions tuple)
143
+ """
144
+ # Validate inputs if provided
145
+ if timesteps is not None:
146
+ timesteps = DataConverter._validate_timesteps(timesteps)
147
+
148
+ if scenarios is not None:
149
+ scenarios = DataConverter._validate_scenarios(scenarios)
150
+
151
+ # Build coordinates and dimensions
152
+ coords = {}
153
+ dims = []
154
+
155
+ if timesteps is not None:
156
+ coords['time'] = timesteps
157
+ dims.append('time')
158
+
159
+ if scenarios is not None:
160
+ coords['scenario'] = scenarios
161
+ dims.append('scenario')
162
+
163
+ return coords, tuple(dims)
164
+
165
+ @staticmethod
166
+ def _convert_scalar(
167
+ data: Union[int, float, np.integer, np.floating], coords: Dict[str, pd.Index], dims: Tuple[str, ...]
168
+ ) -> xr.DataArray:
169
+ """
170
+ Convert a scalar value to a DataArray.
171
+
172
+ Args:
173
+ data: The scalar value
174
+ coords: Coordinate dictionary
175
+ dims: Dimension names
176
+
177
+ Returns:
178
+ DataArray with the scalar value
179
+ """
180
+ if isinstance(data, (np.integer, np.floating)):
181
+ data = data.item()
182
+ return xr.DataArray(data, coords=coords, dims=dims)
183
+
184
+ @staticmethod
185
+ def _convert_dataarray(data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
186
+ """
187
+ Convert an existing DataArray to desired dimensions.
188
+
189
+ Args:
190
+ data: The source DataArray
191
+ coords: Target coordinates
192
+ dims: Target dimensions
193
+
194
+ Returns:
195
+ DataArray with the target dimensions
196
+ """
197
+ # No dimensions case
198
+ if len(dims) == 0:
199
+ if data.size != 1:
200
+ raise ConversionError('When converting to dimensionless DataArray, source must be scalar')
201
+ return xr.DataArray(data.values.item())
202
+
203
+ # Check if data already has matching dimensions and coordinates
204
+ if set(data.dims) == set(dims):
205
+ # Check if coordinates match
206
+ is_compatible = True
207
+ for dim in dims:
208
+ if dim in data.dims and not np.array_equal(data.coords[dim].values, coords[dim].values):
209
+ is_compatible = False
210
+ break
211
+
212
+ if is_compatible:
213
+ # Ensure dimensions are in the correct order
214
+ if data.dims != dims:
215
+ # Transpose to get dimensions in the right order
216
+ return data.transpose(*dims).copy(deep=True)
217
+ else:
218
+ # Return existing DataArray if compatible and order is correct
219
+ return data.copy(deep=True)
220
+
221
+ # Handle dimension broadcasting
222
+ if len(data.dims) == 1 and len(dims) == 2:
223
+ # Single dimension to two dimensions
224
+ if data.dims[0] == 'time' and 'scenario' in dims:
225
+ # Broadcast time dimension to include scenarios
226
+ return DataConverter._broadcast_time_to_scenarios(data, coords, dims)
227
+
228
+ elif data.dims[0] == 'scenario' and 'time' in dims:
229
+ # Broadcast scenario dimension to include time
230
+ return DataConverter._broadcast_scenario_to_time(data, coords, dims)
231
+
232
+ raise ConversionError(
233
+ f'Cannot convert {data.dims} to {dims}. Source coordinates: {data.coords}, Target coordinates: {coords}'
234
+ )
235
+ @staticmethod
236
+ def _broadcast_time_to_scenarios(
237
+ data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
238
+ ) -> xr.DataArray:
239
+ """
240
+ Broadcast a time-only DataArray to include scenarios.
241
+
242
+ Args:
243
+ data: The time-indexed DataArray
244
+ coords: Target coordinates
245
+ dims: Target dimensions
246
+
247
+ Returns:
248
+ DataArray with time and scenario dimensions
249
+ """
250
+ # Check compatibility
251
+ if not np.array_equal(data.coords['time'].values, coords['time'].values):
252
+ raise ConversionError("Source time coordinates don't match target time coordinates")
253
+
254
+ if len(coords['scenario']) <= 1:
255
+ return data.copy(deep=True)
256
+
257
+ # Broadcast values
258
+ values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
259
+ return xr.DataArray(values.copy(), coords=coords, dims=dims)
260
+
261
+ @staticmethod
262
+ def _broadcast_scenario_to_time(
263
+ data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
264
+ ) -> xr.DataArray:
265
+ """
266
+ Broadcast a scenario-only DataArray to include time.
267
+
268
+ Args:
269
+ data: The scenario-indexed DataArray
270
+ coords: Target coordinates
271
+ dims: Target dimensions
272
+
273
+ Returns:
274
+ DataArray with time and scenario dimensions
275
+ """
276
+ # Check compatibility
277
+ if not np.array_equal(data.coords['scenario'].values, coords['scenario'].values):
278
+ raise ConversionError("Source scenario coordinates don't match target scenario coordinates")
279
+
280
+ # Broadcast values
281
+ values = np.repeat(data.values[:, np.newaxis], len(coords['time']), axis=1).T
282
+ return xr.DataArray(values.copy(), coords=coords, dims=dims)
283
+
284
+ @staticmethod
285
+ def _convert_ndarray(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
286
+ """
287
+ Convert a NumPy array to a DataArray.
288
+
289
+ Args:
290
+ data: The NumPy array
291
+ coords: Target coordinates
292
+ dims: Target dimensions
293
+
294
+ Returns:
295
+ DataArray from the NumPy array
296
+ """
297
+ # Handle dimensionless case
298
+ if len(dims) == 0:
299
+ if data.size != 1:
300
+ raise ConversionError('Without dimensions, can only convert scalar arrays')
301
+ return xr.DataArray(data.item())
302
+
303
+ # Handle single dimension
304
+ elif len(dims) == 1:
305
+ return DataConverter._convert_ndarray_single_dim(data, coords, dims)
306
+
307
+ # Handle two dimensions
308
+ elif len(dims) == 2:
309
+ return DataConverter._convert_ndarray_two_dims(data, coords, dims)
310
+
311
+ else:
312
+ raise ConversionError('Maximum 2 dimensions supported')
313
+
314
+ @staticmethod
315
+ def _convert_ndarray_single_dim(
316
+ data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
317
+ ) -> xr.DataArray:
318
+ """
319
+ Convert a NumPy array to a single-dimension DataArray.
320
+
321
+ Args:
322
+ data: The NumPy array
323
+ coords: Target coordinates
324
+ dims: Target dimensions (length 1)
325
+
326
+ Returns:
327
+ DataArray with single dimension
328
+ """
329
+ dim_name = dims[0]
330
+ dim_length = len(coords[dim_name])
331
+
332
+ if data.ndim == 1:
333
+ # 1D array must match dimension length
334
+ if data.shape[0] != dim_length:
335
+ raise ConversionError(f"Array length {data.shape[0]} doesn't match {dim_name} length {dim_length}")
336
+ return xr.DataArray(data, coords=coords, dims=dims)
337
+ else:
338
+ raise ConversionError(f'Expected 1D array for single dimension, got {data.ndim}D')
339
+
340
+ @staticmethod
341
+ def _convert_ndarray_two_dims(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
342
+ """
343
+ Convert a NumPy array to a two-dimension DataArray.
344
+
345
+ Args:
346
+ data: The NumPy array
347
+ coords: Target coordinates
348
+ dims: Target dimensions (length 2)
349
+
350
+ Returns:
351
+ DataArray with two dimensions
352
+ """
353
+ scenario_length = len(coords['scenario'])
354
+ time_length = len(coords['time'])
355
+
356
+ if data.ndim == 1:
357
+ # For 1D array, create 2D array based on which dimension it matches
358
+ if data.shape[0] == time_length:
359
+ # Broadcast across scenarios
360
+ values = np.repeat(data[:, np.newaxis], scenario_length, axis=1)
361
+ return xr.DataArray(values, coords=coords, dims=dims)
362
+ elif data.shape[0] == scenario_length:
363
+ # Broadcast across time
364
+ values = np.repeat(data[np.newaxis, :], time_length, axis=0)
365
+ return xr.DataArray(values, coords=coords, dims=dims)
366
+ else:
367
+ raise ConversionError(f"1D array length {data.shape[0]} doesn't match either dimension")
368
+
369
+ elif data.ndim == 2:
370
+ # For 2D array, shape must match dimensions
371
+ expected_shape = (time_length, scenario_length)
372
+ if data.shape != expected_shape:
373
+ raise ConversionError(f"2D array shape {data.shape} doesn't match expected shape {expected_shape}")
374
+ return xr.DataArray(data, coords=coords, dims=dims)
375
+
376
+ else:
377
+ raise ConversionError(f'Expected 1D or 2D array for two dimensions, got {data.ndim}D')
378
+
379
+ @staticmethod
380
+ def _convert_series(data: pd.Series, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
381
+ """
382
+ Convert pandas Series to xarray DataArray.
383
+
384
+ Args:
385
+ data: pandas Series to convert
386
+ coords: Target coordinates
387
+ dims: Target dimensions
388
+
389
+ Returns:
390
+ DataArray from the pandas Series
391
+ """
392
+ # Handle single dimension case
393
+ if len(dims) == 1:
394
+ dim_name = dims[0]
395
+
396
+ # Check if series index matches the dimension
397
+ if data.index.equals(coords[dim_name]):
398
+ return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
87
399
  else:
88
- raise ConversionError(f'Unsupported type: {type(data).__name__}')
89
- except Exception as e:
90
- if isinstance(e, ConversionError):
91
- raise
92
- raise ConversionError(f'Converting data {type(data)} to xarray.Dataset raised an error: {str(e)}') from e
400
+ raise ConversionError(
401
+ f"Series index doesn't match {dim_name} coordinates.\n"
402
+ f'Series index: {data.index}\n'
403
+ f'Target {dim_name} coordinates: {coords[dim_name]}'
404
+ )
405
+
406
+ # Handle two dimensions case
407
+ elif len(dims) == 2:
408
+ # Check if dimensions are time and scenario
409
+ if dims != ('time', 'scenario'):
410
+ raise ConversionError(
411
+ f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}'
412
+ )
413
+
414
+ # Case 1: Series is indexed by time
415
+ if data.index.equals(coords['time']):
416
+ # Broadcast across scenarios
417
+ values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
418
+ return xr.DataArray(values.copy(), coords=coords, dims=dims)
419
+
420
+ # Case 2: Series is indexed by scenario
421
+ elif data.index.equals(coords['scenario']):
422
+ # Broadcast across time
423
+ values = np.repeat(data.values[np.newaxis, :], len(coords['time']), axis=0)
424
+ return xr.DataArray(values.copy(), coords=coords, dims=dims)
425
+
426
+ else:
427
+ raise ConversionError(
428
+ "Series index must match either 'time' or 'scenario' coordinates.\n"
429
+ f'Series index: {data.index}\n'
430
+ f'Target time coordinates: {coords["time"]}\n'
431
+ f'Target scenario coordinates: {coords["scenario"]}'
432
+ )
433
+
434
+ else:
435
+ raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}')
436
+
437
+ @staticmethod
438
+ def _convert_dataframe(data: pd.DataFrame, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
439
+ """
440
+ Convert pandas DataFrame to xarray DataArray.
441
+ Only allows time as index and scenarios as columns.
442
+
443
+ Args:
444
+ data: pandas DataFrame to convert
445
+ coords: Target coordinates
446
+ dims: Target dimensions
447
+
448
+ Returns:
449
+ DataArray from the pandas DataFrame
450
+ """
451
+ # Single dimension case
452
+ if len(dims) == 1:
453
+ # If DataFrame has one column, treat it like a Series
454
+ if len(data.columns) == 1:
455
+ series = data.iloc[:, 0]
456
+ return DataConverter._convert_series(series, coords, dims)
457
+
458
+ raise ConversionError(
459
+ f'When converting DataFrame to single-dimension DataArray, DataFrame must have exactly one column, got {len(data.columns)}'
460
+ )
461
+
462
+ # Two dimensions case
463
+ elif len(dims) == 2:
464
+ # Check if dimensions are time and scenario
465
+ if dims != ('time', 'scenario'):
466
+ raise ConversionError(
467
+ f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}'
468
+ )
469
+
470
+ # DataFrame must have time as index and scenarios as columns
471
+ if data.index.equals(coords['time']) and data.columns.equals(coords['scenario']):
472
+ # Create DataArray with proper dimension order
473
+ return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
474
+ else:
475
+ raise ConversionError(
476
+ 'DataFrame must have time as index and scenarios as columns.\n'
477
+ f'DataFrame index: {data.index}\n'
478
+ f'DataFrame columns: {data.columns}\n'
479
+ f'Target time coordinates: {coords["time"]}\n'
480
+ f'Target scenario coordinates: {coords["scenario"]}'
481
+ )
482
+
483
+ else:
484
+ raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}')
93
485
 
94
486
 
95
487
  class TimeSeriesData:
96
488
  # TODO: Move to Interface.py
97
- def __init__(self, data: NumericData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None):
489
+ def __init__(self, data: TimestepData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None):
98
490
  """
99
491
  timeseries class for transmit timeseries AND special characteristics of timeseries,
100
492
  i.g. to define weights needed in calculation_type 'aggregated'
@@ -146,18 +538,19 @@ class TimeSeries:
146
538
  name (str): The name of the time series
147
539
  aggregation_weight (Optional[float]): Weight used for aggregation
148
540
  aggregation_group (Optional[str]): Group name for shared aggregation weighting
149
- needs_extra_timestep (bool): Whether this series needs an extra timestep
541
+ has_extra_timestep (bool): Whether this series needs an extra timestep
150
542
  """
151
543
 
152
544
  @classmethod
153
545
  def from_datasource(
154
546
  cls,
155
- data: NumericData,
547
+ data: NumericDataTS,
156
548
  name: str,
157
549
  timesteps: pd.DatetimeIndex,
550
+ scenarios: Optional[pd.Index] = None,
158
551
  aggregation_weight: Optional[float] = None,
159
552
  aggregation_group: Optional[str] = None,
160
- needs_extra_timestep: bool = False,
553
+ has_extra_timestep: bool = False,
161
554
  ) -> 'TimeSeries':
162
555
  """
163
556
  Initialize the TimeSeries from multiple data sources.
@@ -166,19 +559,20 @@ class TimeSeries:
166
559
  data: The time series data
167
560
  name: The name of the TimeSeries
168
561
  timesteps: The timesteps of the TimeSeries
562
+ scenarios: The scenarios of the TimeSeries
169
563
  aggregation_weight: The weight in aggregation calculations
170
564
  aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing
171
- needs_extra_timestep: Whether this series requires an extra timestep
565
+ has_extra_timestep: Whether this series requires an extra timestep
172
566
 
173
567
  Returns:
174
568
  A new TimeSeries instance
175
569
  """
176
570
  return cls(
177
- DataConverter.as_dataarray(data, timesteps),
571
+ DataConverter.as_dataarray(data, timesteps, scenarios),
178
572
  name,
179
573
  aggregation_weight,
180
574
  aggregation_group,
181
- needs_extra_timestep,
575
+ has_extra_timestep,
182
576
  )
183
577
 
184
578
  @classmethod
@@ -212,7 +606,7 @@ class TimeSeries:
212
606
  name=data['name'],
213
607
  aggregation_weight=data['aggregation_weight'],
214
608
  aggregation_group=data['aggregation_group'],
215
- needs_extra_timestep=data['needs_extra_timestep'],
609
+ has_extra_timestep=data['has_extra_timestep'],
216
610
  )
217
611
 
218
612
  def __init__(
@@ -221,7 +615,7 @@ class TimeSeries:
221
615
  name: str,
222
616
  aggregation_weight: Optional[float] = None,
223
617
  aggregation_group: Optional[str] = None,
224
- needs_extra_timestep: bool = False,
618
+ has_extra_timestep: bool = False,
225
619
  ):
226
620
  """
227
621
  Initialize a TimeSeries with a DataArray.
@@ -231,35 +625,40 @@ class TimeSeries:
231
625
  name: The name of the TimeSeries
232
626
  aggregation_weight: The weight in aggregation calculations
233
627
  aggregation_group: Group this TimeSeries belongs to for weight sharing
234
- needs_extra_timestep: Whether this series requires an extra timestep
628
+ has_extra_timestep: Whether this series requires an extra timestep
235
629
 
236
630
  Raises:
237
- ValueError: If data doesn't have a 'time' index or has more than 1 dimension
631
+ ValueError: If data has unsupported dimensions
238
632
  """
239
- if 'time' not in data.indexes:
240
- raise ValueError(f'DataArray must have a "time" index. Got {data.indexes}')
241
- if data.ndim > 1:
242
- raise ValueError(f'Number of dimensions of DataArray must be 1. Got {data.ndim}')
633
+ allowed_dims = {'time', 'scenario'}
634
+ if not set(data.dims).issubset(allowed_dims):
635
+ raise ValueError(f'DataArray dimensions must be subset of {allowed_dims}. Got {data.dims}')
243
636
 
244
637
  self.name = name
245
638
  self.aggregation_weight = aggregation_weight
246
639
  self.aggregation_group = aggregation_group
247
- self.needs_extra_timestep = needs_extra_timestep
640
+ self.has_extra_timestep = has_extra_timestep
248
641
 
249
642
  # Data management
250
643
  self._stored_data = data.copy(deep=True)
251
644
  self._backup = self._stored_data.copy(deep=True)
252
- self._active_timesteps = self._stored_data.indexes['time']
253
- self._active_data = None
254
- self._update_active_data()
255
645
 
256
- def reset(self):
646
+ # Selection state
647
+ self._selected_timesteps: Optional[pd.DatetimeIndex] = None
648
+ self._selected_scenarios: Optional[pd.Index] = None
649
+
650
+ # Flag for whether this series has various dimensions
651
+ self.has_time_dim = 'time' in data.dims
652
+ self.has_scenario_dim = 'scenario' in data.dims
653
+
654
+ def reset(self) -> None:
257
655
  """
258
- Reset active timesteps to the full set of stored timesteps.
656
+ Reset selections to include all timesteps and scenarios.
657
+ This is equivalent to clearing all selections.
259
658
  """
260
- self.active_timesteps = None
659
+ self.set_selection(None, None)
261
660
 
262
- def restore_data(self):
661
+ def restore_data(self) -> None:
263
662
  """
264
663
  Restore stored_data from the backup and reset active timesteps.
265
664
  """
@@ -280,8 +679,8 @@ class TimeSeries:
280
679
  'name': self.name,
281
680
  'aggregation_weight': self.aggregation_weight,
282
681
  'aggregation_group': self.aggregation_group,
283
- 'needs_extra_timestep': self.needs_extra_timestep,
284
- 'data': self.active_data.to_dict(),
682
+ 'has_extra_timestep': self.has_extra_timestep,
683
+ 'data': self.selected_data.to_dict(),
285
684
  }
286
685
 
287
686
  # Convert datetime objects to ISO strings
@@ -289,7 +688,7 @@ class TimeSeries:
289
688
 
290
689
  # Save to file if path is provided
291
690
  if path is not None:
292
- indent = 4 if len(self.active_timesteps) <= 480 else None
691
+ indent = 4 if len(self.selected_timesteps) <= 480 else None
293
692
  with open(path, 'w', encoding='utf-8') as f:
294
693
  json.dump(data, f, indent=indent, ensure_ascii=False)
295
694
 
@@ -303,84 +702,116 @@ class TimeSeries:
303
702
  Returns:
304
703
  String representation of data statistics
305
704
  """
306
- return get_numeric_stats(self.active_data, padd=0)
307
-
308
- def _update_active_data(self):
309
- """
310
- Update the active data based on active_timesteps.
311
- """
312
- self._active_data = self._stored_data.sel(time=self.active_timesteps)
705
+ return get_numeric_stats(self.selected_data, padd=0, by_scenario=True)
313
706
 
314
707
  @property
315
708
  def all_equal(self) -> bool:
316
709
  """Check if all values in the series are equal."""
317
- return np.unique(self.active_data.values).size == 1
710
+ return np.unique(self.selected_data.values).size == 1
318
711
 
319
712
  @property
320
- def active_timesteps(self) -> pd.DatetimeIndex:
321
- """Get the current active timesteps."""
322
- return self._active_timesteps
323
-
324
- @active_timesteps.setter
325
- def active_timesteps(self, timesteps: Optional[pd.DatetimeIndex]):
713
+ def selected_data(self) -> xr.DataArray:
326
714
  """
327
- Set active_timesteps and refresh active_data.
328
-
329
- Args:
330
- timesteps: New timesteps to activate, or None to use all stored timesteps
331
-
332
- Raises:
333
- TypeError: If timesteps is not a pandas DatetimeIndex or None
715
+ Get a view of stored_data based on current selections.
716
+ This computes the view dynamically based on the current selection state.
334
717
  """
335
- if timesteps is None:
336
- self._active_timesteps = self.stored_data.indexes['time']
337
- elif isinstance(timesteps, pd.DatetimeIndex):
338
- self._active_timesteps = timesteps
339
- else:
340
- raise TypeError('active_timesteps must be a pandas DatetimeIndex or None')
718
+ return self._stored_data.sel(**self._valid_selector)
341
719
 
342
- self._update_active_data()
720
+ @property
721
+ def selected_timesteps(self) -> Optional[pd.DatetimeIndex]:
722
+ """Get the current active timesteps, or None if no time dimension."""
723
+ if not self.has_time_dim:
724
+ return None
725
+ if self._selected_timesteps is None:
726
+ return self._stored_data.indexes['time']
727
+ return self._selected_timesteps
343
728
 
344
729
  @property
345
- def active_data(self) -> xr.DataArray:
346
- """Get a view of stored_data based on active_timesteps."""
347
- return self._active_data
730
+ def active_scenarios(self) -> Optional[pd.Index]:
731
+ """Get the current active scenarios, or None if no scenario dimension."""
732
+ if not self.has_scenario_dim:
733
+ return None
734
+ if self._selected_scenarios is None:
735
+ return self._stored_data.indexes['scenario']
736
+ return self._selected_scenarios
348
737
 
349
738
  @property
350
739
  def stored_data(self) -> xr.DataArray:
351
740
  """Get a copy of the full stored data."""
352
741
  return self._stored_data.copy()
353
742
 
354
- @stored_data.setter
355
- def stored_data(self, value: NumericData):
743
+ def update_stored_data(self, value: xr.DataArray) -> None:
356
744
  """
357
- Update stored_data and refresh active_data.
745
+ Update stored_data and refresh selected_data.
358
746
 
359
747
  Args:
360
748
  value: New data to store
361
749
  """
362
- new_data = DataConverter.as_dataarray(value, timesteps=self.active_timesteps)
750
+ new_data = DataConverter.as_dataarray(
751
+ value,
752
+ timesteps=self.selected_timesteps if self.has_time_dim else None,
753
+ scenarios=self.active_scenarios if self.has_scenario_dim else None,
754
+ )
363
755
 
364
756
  # Skip if data is unchanged to avoid overwriting backup
365
757
  if new_data.equals(self._stored_data):
366
758
  return
367
759
 
368
760
  self._stored_data = new_data
369
- self.active_timesteps = None # Reset to full timeline
761
+ self.set_selection(None, None) # Reset selections to full dataset
762
+
763
+ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None:
764
+ """
765
+ Set active subset for timesteps and scenarios.
766
+
767
+ Args:
768
+ timesteps: Timesteps to activate, or None to clear. Ignored if series has no time dimension.
769
+ scenarios: Scenarios to activate, or None to clear. Ignored if series has no scenario dimension.
770
+ """
771
+ # Only update timesteps if the series has time dimension
772
+ if self.has_time_dim:
773
+ if timesteps is None or timesteps.equals(self._stored_data.indexes['time']):
774
+ self._selected_timesteps = None
775
+ else:
776
+ self._selected_timesteps = timesteps
777
+
778
+ # Only update scenarios if the series has scenario dimension
779
+ if self.has_scenario_dim:
780
+ if scenarios is None or scenarios.equals(self._stored_data.indexes['scenario']):
781
+ self._selected_scenarios = None
782
+ else:
783
+ self._selected_scenarios = scenarios
370
784
 
371
785
  @property
372
786
  def sel(self):
373
- return self.active_data.sel
787
+ """Direct access to the selected_data's sel method for convenience."""
788
+ return self.selected_data.sel
374
789
 
375
790
  @property
376
791
  def isel(self):
377
- return self.active_data.isel
792
+ """Direct access to the selected_data's isel method for convenience."""
793
+ return self.selected_data.isel
794
+
795
+ @property
796
+ def _valid_selector(self) -> Dict[str, pd.Index]:
797
+ """Get the current selection as a dictionary."""
798
+ selector = {}
799
+
800
+ # Only include time in selector if series has time dimension
801
+ if self.has_time_dim and self._selected_timesteps is not None:
802
+ selector['time'] = self._selected_timesteps
803
+
804
+ # Only include scenario in selector if series has scenario dimension
805
+ if self.has_scenario_dim and self._selected_scenarios is not None:
806
+ selector['scenario'] = self._selected_scenarios
807
+
808
+ return selector
378
809
 
379
810
  def _apply_operation(self, other, op):
380
811
  """Apply an operation between this TimeSeries and another object."""
381
812
  if isinstance(other, TimeSeries):
382
- other = other.active_data
383
- return op(self.active_data, other)
813
+ other = other.selected_data
814
+ return op(self.selected_data, other)
384
815
 
385
816
  def __add__(self, other):
386
817
  return self._apply_operation(other, lambda x, y: x + y)
@@ -395,25 +826,25 @@ class TimeSeries:
395
826
  return self._apply_operation(other, lambda x, y: x / y)
396
827
 
397
828
  def __radd__(self, other):
398
- return other + self.active_data
829
+ return other + self.selected_data
399
830
 
400
831
  def __rsub__(self, other):
401
- return other - self.active_data
832
+ return other - self.selected_data
402
833
 
403
834
  def __rmul__(self, other):
404
- return other * self.active_data
835
+ return other * self.selected_data
405
836
 
406
837
  def __rtruediv__(self, other):
407
- return other / self.active_data
838
+ return other / self.selected_data
408
839
 
409
840
  def __neg__(self) -> xr.DataArray:
410
- return -self.active_data
841
+ return -self.selected_data
411
842
 
412
843
  def __pos__(self) -> xr.DataArray:
413
- return +self.active_data
844
+ return +self.selected_data
414
845
 
415
846
  def __abs__(self) -> xr.DataArray:
416
- return abs(self.active_data)
847
+ return abs(self.selected_data)
417
848
 
418
849
  def __gt__(self, other):
419
850
  """
@@ -426,8 +857,8 @@ class TimeSeries:
426
857
  True if all values in this TimeSeries are greater than other
427
858
  """
428
859
  if isinstance(other, TimeSeries):
429
- return self.active_data > other.active_data
430
- return self.active_data > other
860
+ return self.selected_data > other.selected_data
861
+ return self.selected_data > other
431
862
 
432
863
  def __ge__(self, other):
433
864
  """
@@ -440,8 +871,8 @@ class TimeSeries:
440
871
  True if all values in this TimeSeries are greater than or equal to other
441
872
  """
442
873
  if isinstance(other, TimeSeries):
443
- return self.active_data >= other.active_data
444
- return self.active_data >= other
874
+ return self.selected_data >= other.selected_data
875
+ return self.selected_data >= other
445
876
 
446
877
  def __lt__(self, other):
447
878
  """
@@ -454,8 +885,8 @@ class TimeSeries:
454
885
  True if all values in this TimeSeries are less than other
455
886
  """
456
887
  if isinstance(other, TimeSeries):
457
- return self.active_data < other.active_data
458
- return self.active_data < other
888
+ return self.selected_data < other.selected_data
889
+ return self.selected_data < other
459
890
 
460
891
  def __le__(self, other):
461
892
  """
@@ -468,8 +899,8 @@ class TimeSeries:
468
899
  True if all values in this TimeSeries are less than or equal to other
469
900
  """
470
901
  if isinstance(other, TimeSeries):
471
- return self.active_data <= other.active_data
472
- return self.active_data <= other
902
+ return self.selected_data <= other.selected_data
903
+ return self.selected_data <= other
473
904
 
474
905
  def __eq__(self, other):
475
906
  """
@@ -482,8 +913,8 @@ class TimeSeries:
482
913
  True if all values in this TimeSeries are equal to other
483
914
  """
484
915
  if isinstance(other, TimeSeries):
485
- return self.active_data == other.active_data
486
- return self.active_data == other
916
+ return self.selected_data == other.selected_data
917
+ return self.selected_data == other
487
918
 
488
919
  def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
489
920
  """
@@ -491,8 +922,8 @@ class TimeSeries:
491
922
 
492
923
  This allows NumPy functions to work with TimeSeries objects.
493
924
  """
494
- # Convert any TimeSeries inputs to their active_data
495
- inputs = [x.active_data if isinstance(x, TimeSeries) else x for x in inputs]
925
+ # Convert any TimeSeries inputs to their selected_data
926
+ inputs = [x.selected_data if isinstance(x, TimeSeries) else x for x in inputs]
496
927
  return getattr(ufunc, method)(*inputs, **kwargs)
497
928
 
498
929
  def __repr__(self):
@@ -506,10 +937,10 @@ class TimeSeries:
506
937
  'name': self.name,
507
938
  'aggregation_weight': self.aggregation_weight,
508
939
  'aggregation_group': self.aggregation_group,
509
- 'needs_extra_timestep': self.needs_extra_timestep,
510
- 'shape': self.active_data.shape,
511
- 'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}',
940
+ 'has_extra_timestep': self.has_extra_timestep,
941
+ 'shape': self.selected_data.shape,
512
942
  }
943
+
513
944
  attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items())
514
945
  return f'TimeSeries({attr_str})'
515
946
 
@@ -520,281 +951,333 @@ class TimeSeries:
520
951
  Returns:
521
952
  Descriptive string with statistics
522
953
  """
523
- return f"TimeSeries '{self.name}': {self.stats}"
954
+ return f'TimeSeries "{self.name}":\n{textwrap.indent(self.stats, " ")}'
524
955
 
525
956
 
526
957
  class TimeSeriesCollection:
527
958
  """
528
- Collection of TimeSeries objects with shared timestep management.
959
+ Simplified central manager for time series data with reference tracking.
529
960
 
530
- TimeSeriesCollection handles multiple TimeSeries objects with synchronized
531
- timesteps, provides operations on collections, and manages extra timesteps.
961
+ Provides a way to store time series data and work with subsets of dimensions
962
+ that automatically update all references when changed.
532
963
  """
533
964
 
534
965
  def __init__(
535
966
  self,
536
967
  timesteps: pd.DatetimeIndex,
968
+ scenarios: Optional[pd.Index] = None,
537
969
  hours_of_last_timestep: Optional[float] = None,
538
970
  hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None,
539
971
  ):
540
- """
541
- Args:
542
- timesteps: The timesteps of the Collection.
543
- hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified
544
- hours_of_previous_timesteps: The duration of previous timesteps.
545
- If None, the first time increment of time_series is used.
546
- This is needed to calculate previous durations (for example consecutive_on_hours).
547
- If you use an array, take care that its long enough to cover all previous values!
548
- """
549
- # Prepare and validate timesteps
550
- self._validate_timesteps(timesteps)
551
- self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps(
552
- timesteps, hours_of_previous_timesteps
972
+ """Initialize a TimeSeriesCollection."""
973
+ self._full_timesteps = self._validate_timesteps(timesteps)
974
+ self._full_scenarios = self._validate_scenarios(scenarios)
975
+
976
+ self._full_timesteps_extra = self._create_timesteps_with_extra(
977
+ self._full_timesteps,
978
+ self._calculate_hours_of_final_timestep(
979
+ self._full_timesteps, hours_of_final_timestep=hours_of_last_timestep
980
+ ),
981
+ )
982
+ self._full_hours_per_timestep = self.calculate_hours_per_timestep(
983
+ self._full_timesteps_extra, self._full_scenarios
553
984
  )
554
985
 
555
- # Set up timesteps and hours
556
- self.all_timesteps = timesteps
557
- self.all_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep)
558
- self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra)
986
+ self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps(
987
+ timesteps, hours_of_previous_timesteps
988
+ ) # TODO: Make dynamic
559
989
 
560
- # Active timestep tracking
561
- self._active_timesteps = None
562
- self._active_timesteps_extra = None
563
- self._active_hours_per_timestep = None
990
+ # Series that need extra timestep
991
+ self._has_extra_timestep: set = set()
564
992
 
565
- # Dictionary of time series by name
566
- self.time_series_data: Dict[str, TimeSeries] = {}
993
+ # Storage for TimeSeries objects
994
+ self._time_series: Dict[str, TimeSeries] = {}
567
995
 
568
- # Aggregation
569
- self.group_weights: Dict[str, float] = {}
570
- self.weights: Dict[str, float] = {}
996
+ # Active subset selectors
997
+ self._selected_timesteps: Optional[pd.DatetimeIndex] = None
998
+ self._selected_scenarios: Optional[pd.Index] = None
999
+ self._selected_timesteps_extra: Optional[pd.DatetimeIndex] = None
1000
+ self._selected_hours_per_timestep: Optional[xr.DataArray] = None
571
1001
 
572
- @classmethod
573
- def with_uniform_timesteps(
574
- cls, start_time: pd.Timestamp, periods: int, freq: str, hours_per_step: Optional[float] = None
575
- ) -> 'TimeSeriesCollection':
576
- """Create a collection with uniform timesteps."""
577
- timesteps = pd.date_range(start_time, periods=periods, freq=freq, name='time')
578
- return cls(timesteps, hours_of_previous_timesteps=hours_per_step)
579
-
580
- def create_time_series(
581
- self, data: Union[NumericData, TimeSeriesData], name: str, needs_extra_timestep: bool = False
1002
+ def add_time_series(
1003
+ self,
1004
+ name: str,
1005
+ data: Union[NumericDataTS, TimeSeries],
1006
+ has_time_dim: bool = True,
1007
+ has_scenario_dim: bool = True,
1008
+ aggregation_weight: Optional[float] = None,
1009
+ aggregation_group: Optional[str] = None,
1010
+ has_extra_timestep: bool = False,
582
1011
  ) -> TimeSeries:
583
1012
  """
584
- Creates a TimeSeries from the given data and adds it to the collection.
1013
+ Add a new TimeSeries to the allocator.
585
1014
 
586
1015
  Args:
587
- data: The data to create the TimeSeries from.
588
- name: The name of the TimeSeries.
589
- needs_extra_timestep: Whether to create an additional timestep at the end of the timesteps.
590
- The data to create the TimeSeries from.
1016
+ name: Name of the time series
1017
+ data: Data for the time series (can be raw data or an existing TimeSeries)
1018
+ has_time_dim: Whether the TimeSeries has a time dimension
1019
+ has_scenario_dim: Whether the TimeSeries has a scenario dimension
1020
+ aggregation_weight: Weight used for aggregation
1021
+ aggregation_group: Group name for shared aggregation weighting
1022
+ has_extra_timestep: Whether this series needs an extra timestep
591
1023
 
592
1024
  Returns:
593
- The created TimeSeries.
594
-
1025
+ The created TimeSeries object
595
1026
  """
596
- # Check for duplicate name
597
- if name in self.time_series_data:
598
- raise ValueError(f"TimeSeries '{name}' already exists in this collection")
1027
+ if name in self._time_series:
1028
+ raise KeyError(f"TimeSeries '{name}' already exists in allocator")
1029
+ if not has_time_dim and has_extra_timestep:
1030
+ raise ValueError('A not time-indexed TimeSeries cannot have an extra timestep')
1031
+
1032
+ # Choose which timesteps to use
1033
+ if has_time_dim:
1034
+ target_timesteps = self.timesteps_extra if has_extra_timestep else self.timesteps
1035
+ else:
1036
+ target_timesteps = None
599
1037
 
600
- # Determine which timesteps to use
601
- timesteps_to_use = self.timesteps_extra if needs_extra_timestep else self.timesteps
1038
+ target_scenarios = self.scenarios if has_scenario_dim else None
602
1039
 
603
- # Create the time series
604
- if isinstance(data, TimeSeriesData):
605
- time_series = TimeSeries.from_datasource(
1040
+ # Create or adapt the TimeSeries object
1041
+ if isinstance(data, TimeSeries):
1042
+ # Use the existing TimeSeries but update its parameters
1043
+ time_series = data
1044
+ # Update the stored data to use our timesteps and scenarios
1045
+ data_array = DataConverter.as_dataarray(
1046
+ time_series.stored_data, timesteps=target_timesteps, scenarios=target_scenarios
1047
+ )
1048
+ time_series = TimeSeries(
1049
+ data=data_array,
606
1050
  name=name,
607
- data=data.data,
608
- timesteps=timesteps_to_use,
609
- aggregation_weight=data.agg_weight,
610
- aggregation_group=data.agg_group,
611
- needs_extra_timestep=needs_extra_timestep,
1051
+ aggregation_weight=aggregation_weight or time_series.aggregation_weight,
1052
+ aggregation_group=aggregation_group or time_series.aggregation_group,
1053
+ has_extra_timestep=has_extra_timestep or time_series.has_extra_timestep,
612
1054
  )
613
- # Connect the user time series to the created TimeSeries
614
- data.label = name
615
1055
  else:
1056
+ # Create a new TimeSeries from raw data
616
1057
  time_series = TimeSeries.from_datasource(
617
- name=name, data=data, timesteps=timesteps_to_use, needs_extra_timestep=needs_extra_timestep
1058
+ data=data,
1059
+ name=name,
1060
+ timesteps=target_timesteps,
1061
+ scenarios=target_scenarios,
1062
+ aggregation_weight=aggregation_weight,
1063
+ aggregation_group=aggregation_group,
1064
+ has_extra_timestep=has_extra_timestep,
618
1065
  )
619
1066
 
620
- # Add to the collection
621
- self.add_time_series(time_series)
1067
+ # Add to storage
1068
+ self._time_series[name] = time_series
622
1069
 
623
- return time_series
1070
+ # Track if it needs extra timestep
1071
+ if has_extra_timestep:
1072
+ self._has_extra_timestep.add(name)
624
1073
 
625
- def calculate_aggregation_weights(self) -> Dict[str, float]:
626
- """Calculate and return aggregation weights for all time series."""
627
- self.group_weights = self._calculate_group_weights()
628
- self.weights = self._calculate_weights()
629
-
630
- if np.all(np.isclose(list(self.weights.values()), 1, atol=1e-6)):
631
- logger.info('All Aggregation weights were set to 1')
632
-
633
- return self.weights
1074
+ # Return the TimeSeries object
1075
+ return time_series
634
1076
 
635
- def activate_timesteps(self, active_timesteps: Optional[pd.DatetimeIndex] = None):
1077
+ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None:
636
1078
  """
637
- Update active timesteps for the collection and all time series.
638
- If no arguments are provided, the active timesteps are reset.
1079
+ Set active subset for timesteps and scenarios.
639
1080
 
640
1081
  Args:
641
- active_timesteps: The active timesteps of the model.
642
- If None, the all timesteps of the TimeSeriesCollection are taken.
1082
+ timesteps: Timesteps to activate, or None to clear
1083
+ scenarios: Scenarios to activate, or None to clear
643
1084
  """
644
- if active_timesteps is None:
645
- return self.reset()
1085
+ if timesteps is None:
1086
+ self._selected_timesteps = None
1087
+ self._selected_timesteps_extra = None
1088
+ else:
1089
+ self._selected_timesteps = self._validate_timesteps(timesteps, self._full_timesteps)
1090
+ self._selected_timesteps_extra = self._create_timesteps_with_extra(
1091
+ timesteps, self._calculate_hours_of_final_timestep(timesteps, self._full_timesteps)
1092
+ )
646
1093
 
647
- if not np.all(np.isin(active_timesteps, self.all_timesteps)):
648
- raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection')
1094
+ if scenarios is None:
1095
+ self._selected_scenarios = None
1096
+ else:
1097
+ self._selected_scenarios = self._validate_scenarios(scenarios, self._full_scenarios)
649
1098
 
650
- # Calculate derived timesteps
651
- self._active_timesteps = active_timesteps
652
- first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0]
653
- last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0]
654
- self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2]
655
- self._active_hours_per_timestep = self.all_hours_per_timestep.isel(time=slice(first_ts_index, last_ts_idx + 1))
1099
+ self._selected_hours_per_timestep = self.calculate_hours_per_timestep(self.timesteps_extra, self.scenarios)
656
1100
 
657
- # Update all time series
658
- self._update_time_series_timesteps()
1101
+ # Apply the selection to all TimeSeries objects
1102
+ for ts_name, ts in self._time_series.items():
1103
+ if ts.has_time_dim:
1104
+ timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps
1105
+ else:
1106
+ timesteps = None
659
1107
 
660
- def reset(self):
661
- """Reset active timesteps to defaults for all time series."""
662
- self._active_timesteps = None
663
- self._active_timesteps_extra = None
664
- self._active_hours_per_timestep = None
1108
+ ts.set_selection(timesteps=timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None)
1109
+ self._propagate_selection_to_time_series()
665
1110
 
666
- for time_series in self.time_series_data.values():
667
- time_series.reset()
1111
+ def as_dataset(self, with_extra_timestep: bool = True, with_constants: bool = True) -> xr.Dataset:
1112
+ """
1113
+ Convert the TimeSeriesCollection to a xarray Dataset, containing the data of each TimeSeries.
668
1114
 
669
- def restore_data(self):
670
- """Restore original data for all time series."""
671
- for time_series in self.time_series_data.values():
672
- time_series.restore_data()
1115
+ Args:
1116
+ with_extra_timestep: Whether to exclude the extra timesteps.
1117
+ Effectively, this removes the last timestep for certain TimeSeries, but mitigates the presence of NANs in others.
1118
+ with_constants: Whether to exclude TimeSeries with a constant value from the dataset.
1119
+ """
1120
+ if self.scenarios is None:
1121
+ ds = xr.Dataset(coords={'time': self.timesteps_extra})
1122
+ else:
1123
+ ds = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra})
673
1124
 
674
- def add_time_series(self, time_series: TimeSeries):
675
- """Add an existing TimeSeries to the collection."""
676
- if time_series.name in self.time_series_data:
677
- raise ValueError(f"TimeSeries '{time_series.name}' already exists in this collection")
1125
+ for ts in self._time_series.values():
1126
+ if not with_constants and ts.all_equal:
1127
+ continue
1128
+ ds[ts.name] = ts.selected_data
678
1129
 
679
- self.time_series_data[time_series.name] = time_series
1130
+ if not with_extra_timestep:
1131
+ return ds.sel(time=self.timesteps)
680
1132
 
681
- def insert_new_data(self, data: pd.DataFrame, include_extra_timestep: bool = False):
682
- """
683
- Update time series with new data from a DataFrame.
1133
+ return ds
684
1134
 
685
- Args:
686
- data: DataFrame containing new data with timestamps as index
687
- include_extra_timestep: Whether the provided data already includes the extra timestep, by default False
688
- """
689
- if not isinstance(data, pd.DataFrame):
690
- raise TypeError(f'data must be a pandas DataFrame, got {type(data).__name__}')
1135
+ @property
1136
+ def timesteps(self) -> pd.DatetimeIndex:
1137
+ """Get the current active timesteps."""
1138
+ if self._selected_timesteps is None:
1139
+ return self._full_timesteps
1140
+ return self._selected_timesteps
691
1141
 
692
- # Check if the DataFrame index matches the expected timesteps
693
- expected_timesteps = self.timesteps_extra if include_extra_timestep else self.timesteps
694
- if not data.index.equals(expected_timesteps):
695
- raise ValueError(
696
- f'DataFrame index must match {"collection timesteps with extra timestep" if include_extra_timestep else "collection timesteps"}'
697
- )
1142
+ @property
1143
+ def timesteps_extra(self) -> pd.DatetimeIndex:
1144
+ """Get the current active timesteps with extra timestep."""
1145
+ if self._selected_timesteps_extra is None:
1146
+ return self._full_timesteps_extra
1147
+ return self._selected_timesteps_extra
698
1148
 
699
- for name, ts in self.time_series_data.items():
700
- if name in data.columns:
701
- if not ts.needs_extra_timestep:
702
- # For time series without extra timestep
703
- if include_extra_timestep:
704
- # If data includes extra timestep but series doesn't need it, exclude the last point
705
- ts.stored_data = data[name].iloc[:-1]
706
- else:
707
- # Use data as is
708
- ts.stored_data = data[name]
709
- else:
710
- # For time series with extra timestep
711
- if include_extra_timestep:
712
- # Data already includes extra timestep
713
- ts.stored_data = data[name]
714
- else:
715
- # Need to add extra timestep - extrapolate from the last value
716
- extra_step_value = data[name].iloc[-1]
717
- extra_step_index = pd.DatetimeIndex([self.timesteps_extra[-1]], name='time')
718
- extra_step_series = pd.Series([extra_step_value], index=extra_step_index)
1149
+ @property
1150
+ def hours_per_timestep(self) -> xr.DataArray:
1151
+ """Get the current active hours per timestep."""
1152
+ if self._selected_hours_per_timestep is None:
1153
+ return self._full_hours_per_timestep
1154
+ return self._selected_hours_per_timestep
719
1155
 
720
- # Combine the regular data with the extra timestep
721
- ts.stored_data = pd.concat([data[name], extra_step_series])
1156
+ @property
1157
+ def scenarios(self) -> Optional[pd.Index]:
1158
+ """Get the current active scenarios."""
1159
+ if self._selected_scenarios is None:
1160
+ return self._full_scenarios
1161
+ return self._selected_scenarios
1162
+
1163
+ def _propagate_selection_to_time_series(self) -> None:
1164
+ """Apply the current selection to all TimeSeries objects."""
1165
+ for ts_name, ts in self._time_series.items():
1166
+ if ts.has_time_dim:
1167
+ timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps
1168
+ else:
1169
+ timesteps = None
722
1170
 
723
- logger.debug(f'Updated data for {name}')
1171
+ ts.set_selection(timesteps=timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None)
724
1172
 
725
- def to_dataframe(
726
- self, filtered: Literal['all', 'constant', 'non_constant'] = 'non_constant', include_extra_timestep: bool = True
727
- ) -> pd.DataFrame:
1173
+ def __getitem__(self, name: str) -> TimeSeries:
728
1174
  """
729
- Convert collection to DataFrame with optional filtering and timestep control.
1175
+ Get a reference to a time series or data array.
730
1176
 
731
1177
  Args:
732
- filtered: Filter time series by variability, by default 'non_constant'
733
- include_extra_timestep: Whether to include the extra timestep in the result, by default True
1178
+ name: Name of the data array or time series
734
1179
 
735
1180
  Returns:
736
- DataFrame representation of the collection
1181
+ TimeSeries object if it exists, otherwise DataArray with current selection applied
737
1182
  """
738
- include_constants = filtered != 'non_constant'
739
- ds = self.to_dataset(include_constants=include_constants)
1183
+ # First check if this is a TimeSeries
1184
+ if name in self._time_series:
1185
+ # Return the TimeSeries object (it will handle selection internally)
1186
+ return self._time_series[name]
1187
+ raise ValueError(f'No TimeSeries named "{name}" found')
1188
+
1189
+ def __contains__(self, value) -> bool:
1190
+ if isinstance(value, str):
1191
+ return value in self._time_series
1192
+ elif isinstance(value, TimeSeries):
1193
+ return value.name in self._time_series
1194
+ raise TypeError(f'Invalid type for __contains__ of {self.__class__.__name__}: {type(value)}')
740
1195
 
741
- if not include_extra_timestep:
742
- ds = ds.isel(time=slice(None, -1))
743
-
744
- df = ds.to_dataframe()
745
-
746
- # Apply filtering
747
- if filtered == 'all':
748
- return df
749
- elif filtered == 'constant':
750
- return df.loc[:, df.nunique() == 1]
751
- elif filtered == 'non_constant':
752
- return df.loc[:, df.nunique() > 1]
753
- else:
754
- raise ValueError("filtered must be one of: 'all', 'constant', 'non_constant'")
1196
+ def __iter__(self) -> Iterator[TimeSeries]:
1197
+ """Iterate over TimeSeries objects."""
1198
+ return iter(self._time_series.values())
755
1199
 
756
- def to_dataset(self, include_constants: bool = True) -> xr.Dataset:
1200
+ def update_time_series(self, name: str, data: TimestepData) -> TimeSeries:
757
1201
  """
758
- Combine all time series into a single Dataset with all timesteps.
1202
+ Update an existing TimeSeries with new data.
759
1203
 
760
1204
  Args:
761
- include_constants: Whether to include time series with constant values, by default True
1205
+ name: Name of the TimeSeries to update
1206
+ data: New data to assign
762
1207
 
763
1208
  Returns:
764
- Dataset containing all selected time series with all timesteps
1209
+ The updated TimeSeries
1210
+
1211
+ Raises:
1212
+ KeyError: If no TimeSeries with the given name exists
765
1213
  """
766
- # Determine which series to include
767
- if include_constants:
768
- series_to_include = self.time_series_data.values()
769
- else:
770
- series_to_include = self.non_constants
1214
+ if name not in self._time_series:
1215
+ raise KeyError(f"No TimeSeries named '{name}' found")
771
1216
 
772
- # Create individual datasets and merge them
773
- ds = xr.merge([ts.active_data.to_dataset(name=ts.name) for ts in series_to_include])
1217
+ # Get the TimeSeries
1218
+ ts = self._time_series[name]
774
1219
 
775
- # Ensure the correct time coordinates
776
- ds = ds.reindex(time=self.timesteps_extra)
1220
+ # Determine which timesteps to use if the series has a time dimension
1221
+ if ts.has_time_dim:
1222
+ target_timesteps = self.timesteps_extra if name in self._has_extra_timestep else self.timesteps
1223
+ else:
1224
+ target_timesteps = None
777
1225
 
778
- ds.attrs.update(
779
- {
780
- 'timesteps_extra': f'{self.timesteps_extra[0]} ... {self.timesteps_extra[-1]} | len={len(self.timesteps_extra)}',
781
- 'hours_per_timestep': self._format_stats(self.hours_per_timestep),
782
- }
1226
+ # Convert data to proper format
1227
+ data_array = DataConverter.as_dataarray(
1228
+ data, timesteps=target_timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None
783
1229
  )
784
1230
 
785
- return ds
1231
+ # Update the TimeSeries
1232
+ ts.update_stored_data(data_array)
1233
+
1234
+ return ts
1235
+
1236
+ def calculate_aggregation_weights(self) -> Dict[str, float]:
1237
+ """Calculate and return aggregation weights for all time series."""
1238
+ group_weights = self._calculate_group_weights()
786
1239
 
787
- def _update_time_series_timesteps(self):
788
- """Update active timesteps for all time series."""
789
- for ts in self.time_series_data.values():
790
- if ts.needs_extra_timestep:
791
- ts.active_timesteps = self.timesteps_extra
1240
+ weights = {}
1241
+ for name, ts in self._time_series.items():
1242
+ if ts.aggregation_group is not None:
1243
+ # Use group weight
1244
+ weights[name] = group_weights.get(ts.aggregation_group, 1)
792
1245
  else:
793
- ts.active_timesteps = self.timesteps
1246
+ # Use individual weight or default to 1
1247
+ weights[name] = ts.aggregation_weight or 1
1248
+
1249
+ if np.all(np.isclose(list(weights.values()), 1, atol=1e-6)):
1250
+ logger.info('All Aggregation weights were set to 1')
1251
+
1252
+ return weights
1253
+
1254
+ def _calculate_group_weights(self) -> Dict[str, float]:
1255
+ """Calculate weights for aggregation groups."""
1256
+ # Count series in each group
1257
+ groups = [ts.aggregation_group for ts in self._time_series.values() if ts.aggregation_group is not None]
1258
+ group_counts = Counter(groups)
1259
+
1260
+ # Calculate weight for each group (1/count)
1261
+ return {group: 1 / count for group, count in group_counts.items()}
794
1262
 
795
1263
  @staticmethod
796
- def _validate_timesteps(timesteps: pd.DatetimeIndex):
797
- """Validate timesteps format and rename if needed."""
1264
+ def _validate_timesteps(
1265
+ timesteps: pd.DatetimeIndex, present_timesteps: Optional[pd.DatetimeIndex] = None
1266
+ ) -> pd.DatetimeIndex:
1267
+ """
1268
+ Validate timesteps format and rename if needed.
1269
+ Args:
1270
+ timesteps: The timesteps to validate
1271
+ present_timesteps: The timesteps that are present in the dataset
1272
+
1273
+ Raises:
1274
+ ValueError: If timesteps is not a pandas DatetimeIndex
1275
+ ValueError: If timesteps is not at least 2 timestamps
1276
+ ValueError: If timesteps has a different name than 'time'
1277
+ ValueError: If timesteps is not sorted
1278
+ ValueError: If timesteps contains duplicates
1279
+ ValueError: If timesteps is not a subset of present_timesteps
1280
+ """
798
1281
  if not isinstance(timesteps, pd.DatetimeIndex):
799
1282
  raise TypeError('timesteps must be a pandas DatetimeIndex')
800
1283
 
@@ -803,22 +1286,61 @@ class TimeSeriesCollection:
803
1286
 
804
1287
  # Ensure timesteps has the required name
805
1288
  if timesteps.name != 'time':
806
- logger.warning('Renamed timesteps to "time" (was "%s")', timesteps.name)
1289
+ logger.debug('Renamed timesteps to "time" (was "%s")', timesteps.name)
807
1290
  timesteps.name = 'time'
808
1291
 
1292
+ # Ensure timesteps is sorted
1293
+ if not timesteps.is_monotonic_increasing:
1294
+ raise ValueError('timesteps must be sorted')
1295
+
1296
+ # Ensure timesteps has no duplicates
1297
+ if len(timesteps) != len(timesteps.drop_duplicates()):
1298
+ raise ValueError('timesteps must not contain duplicates')
1299
+
1300
+ # Ensure timesteps is a subset of present_timesteps
1301
+ if present_timesteps is not None and not set(timesteps).issubset(set(present_timesteps)):
1302
+ raise ValueError('timesteps must be a subset of present_timesteps')
1303
+
1304
+ return timesteps
1305
+
809
1306
  @staticmethod
810
- def _create_timesteps_with_extra(
811
- timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float]
812
- ) -> pd.DatetimeIndex:
813
- """Create timesteps with an extra step at the end."""
814
- if hours_of_last_timestep is not None:
815
- # Create the extra timestep using the specified duration
816
- last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time')
817
- else:
818
- # Use the last interval as the extra timestep duration
819
- last_date = pd.DatetimeIndex([timesteps[-1] + (timesteps[-1] - timesteps[-2])], name='time')
1307
+ def _validate_scenarios(scenarios: pd.Index, present_scenarios: Optional[pd.Index] = None) -> Optional[pd.Index]:
1308
+ """
1309
+ Validate scenario format and rename if needed.
1310
+ Args:
1311
+ scenarios: The scenarios to validate
1312
+ present_scenarios: The present_scenarios that are present in the dataset
1313
+
1314
+ Raises:
1315
+ ValueError: If timesteps is not a pandas DatetimeIndex
1316
+ ValueError: If timesteps is not at least 2 timestamps
1317
+ ValueError: If timesteps has a different name than 'time'
1318
+ ValueError: If timesteps is not sorted
1319
+ ValueError: If timesteps contains duplicates
1320
+ ValueError: If timesteps is not a subset of present_timesteps
1321
+ """
1322
+ if scenarios is None:
1323
+ return None
1324
+
1325
+ if not isinstance(scenarios, pd.Index):
1326
+ logger.warning('Converting scenarios to pandas.Index')
1327
+ scenarios = pd.Index(scenarios, name='scenario')
1328
+
1329
+ # Ensure timesteps has the required name
1330
+ if scenarios.name != 'scenario':
1331
+ logger.debug('Renamed scenarios to "scneario" (was "%s")', scenarios.name)
1332
+ scenarios.name = 'scenario'
820
1333
 
821
- # Combine with original timesteps
1334
+ # Ensure timesteps is a subset of present_timesteps
1335
+ if present_scenarios is not None and not set(scenarios).issubset(set(present_scenarios)):
1336
+ raise ValueError('scenarios must be a subset of present_scenarios')
1337
+
1338
+ return scenarios
1339
+
1340
+ @staticmethod
1341
+ def _create_timesteps_with_extra(timesteps: pd.DatetimeIndex, hours_of_last_timestep: float) -> pd.DatetimeIndex:
1342
+ """Create timesteps with an extra step at the end."""
1343
+ last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time')
822
1344
  return pd.DatetimeIndex(timesteps.append(last_date), name='time')
823
1345
 
824
1346
  @staticmethod
@@ -834,137 +1356,130 @@ class TimeSeriesCollection:
834
1356
  return first_interval.total_seconds() / 3600 # Convert to hours
835
1357
 
836
1358
  @staticmethod
837
- def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray:
838
- """Calculate duration of each timestep."""
839
- # Calculate differences between consecutive timestamps
840
- hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1)
1359
+ def _calculate_hours_of_final_timestep(
1360
+ timesteps: pd.DatetimeIndex,
1361
+ timesteps_superset: Optional[pd.DatetimeIndex] = None,
1362
+ hours_of_final_timestep: Optional[float] = None,
1363
+ ) -> float:
1364
+ """
1365
+ Calculate duration of the final timestep.
1366
+ If timesteps_subset is provided, the final timestep is calculated for this subset.
1367
+ The hours_of_final_timestep is only used if the final timestep cant be determined from the timesteps.
841
1368
 
842
- return xr.DataArray(
843
- data=hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=('time',), name='hours_per_step'
844
- )
1369
+ Args:
1370
+ timesteps: The full timesteps
1371
+ timesteps_subset: The subset of timesteps
1372
+ hours_of_final_timestep: The duration of the final timestep, if already known
845
1373
 
846
- def _calculate_group_weights(self) -> Dict[str, float]:
847
- """Calculate weights for aggregation groups."""
848
- # Count series in each group
849
- groups = [ts.aggregation_group for ts in self.time_series_data.values() if ts.aggregation_group is not None]
850
- group_counts = Counter(groups)
1374
+ Returns:
1375
+ The duration of the final timestep in hours
851
1376
 
852
- # Calculate weight for each group (1/count)
853
- return {group: 1 / count for group, count in group_counts.items()}
1377
+ Raises:
1378
+ ValueError: If the provided timesteps_subset does not end before the timesteps superset
1379
+ """
1380
+ if timesteps_superset is None:
1381
+ if hours_of_final_timestep is not None:
1382
+ return hours_of_final_timestep
1383
+ return (timesteps[-1] - timesteps[-2]) / pd.Timedelta(hours=1)
854
1384
 
855
- def _calculate_weights(self) -> Dict[str, float]:
856
- """Calculate weights for all time series."""
857
- # Calculate weight for each time series
858
- weights = {}
859
- for name, ts in self.time_series_data.items():
860
- if ts.aggregation_group is not None:
861
- # Use group weight
862
- weights[name] = self.group_weights.get(ts.aggregation_group, 1)
863
- else:
864
- # Use individual weight or default to 1
865
- weights[name] = ts.aggregation_weight or 1
1385
+ final_timestep = timesteps[-1]
866
1386
 
867
- return weights
1387
+ if timesteps_superset[-1] == final_timestep:
1388
+ if hours_of_final_timestep is not None:
1389
+ return hours_of_final_timestep
1390
+ return (timesteps_superset[-1] - timesteps_superset[-2]) / pd.Timedelta(hours=1)
868
1391
 
869
- def _format_stats(self, data) -> str:
870
- """Format statistics for a data array."""
871
- if hasattr(data, 'values'):
872
- values = data.values
1392
+ elif timesteps_superset[-1] <= final_timestep:
1393
+ raise ValueError(
1394
+ f'The provided timesteps ({timesteps}) end after the provided timesteps_superset ({timesteps_superset})'
1395
+ )
873
1396
  else:
874
- values = np.asarray(data)
875
-
876
- mean_val = np.mean(values)
877
- min_val = np.min(values)
878
- max_val = np.max(values)
879
-
880
- return f'mean: {mean_val:.2f}, min: {min_val:.2f}, max: {max_val:.2f}'
881
-
882
- def __getitem__(self, name: str) -> TimeSeries:
883
- """Get a TimeSeries by name."""
884
- try:
885
- return self.time_series_data[name]
886
- except KeyError as e:
887
- raise KeyError(f'TimeSeries "{name}" not found in the TimeSeriesCollection') from e
888
-
889
- def __iter__(self) -> Iterator[TimeSeries]:
890
- """Iterate through all TimeSeries in the collection."""
891
- return iter(self.time_series_data.values())
892
-
893
- def __len__(self) -> int:
894
- """Get the number of TimeSeries in the collection."""
895
- return len(self.time_series_data)
896
-
897
- def __contains__(self, item: Union[str, TimeSeries]) -> bool:
898
- """Check if a TimeSeries exists in the collection."""
899
- if isinstance(item, str):
900
- return item in self.time_series_data
901
- elif isinstance(item, TimeSeries):
902
- return any([item is ts for ts in self.time_series_data.values()])
903
- return False
904
-
905
- @property
906
- def non_constants(self) -> List[TimeSeries]:
907
- """Get time series with varying values."""
908
- return [ts for ts in self.time_series_data.values() if not ts.all_equal]
909
-
910
- @property
911
- def constants(self) -> List[TimeSeries]:
912
- """Get time series with constant values."""
913
- return [ts for ts in self.time_series_data.values() if ts.all_equal]
914
-
915
- @property
916
- def timesteps(self) -> pd.DatetimeIndex:
917
- """Get the active timesteps."""
918
- return self.all_timesteps if self._active_timesteps is None else self._active_timesteps
919
-
920
- @property
921
- def timesteps_extra(self) -> pd.DatetimeIndex:
922
- """Get the active timesteps with extra step."""
923
- return self.all_timesteps_extra if self._active_timesteps_extra is None else self._active_timesteps_extra
1397
+ # Get the first timestep in the superset that is after the final timestep of the subset
1398
+ extra_timestep = timesteps_superset[timesteps_superset > final_timestep].min()
1399
+ return (extra_timestep - final_timestep) / pd.Timedelta(hours=1)
924
1400
 
925
- @property
926
- def hours_per_timestep(self) -> xr.DataArray:
927
- """Get the duration of each active timestep."""
928
- return (
929
- self.all_hours_per_timestep if self._active_hours_per_timestep is None else self._active_hours_per_timestep
930
- )
931
-
932
- @property
933
- def hours_of_last_timestep(self) -> float:
934
- """Get the duration of the last timestep."""
935
- return float(self.hours_per_timestep[-1].item())
936
-
937
- def __repr__(self):
938
- return f'TimeSeriesCollection:\n{self.to_dataset()}'
1401
+ @staticmethod
1402
+ def calculate_hours_per_timestep(
1403
+ timesteps_extra: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None
1404
+ ) -> xr.DataArray:
1405
+ """Calculate duration of each timestep."""
1406
+ # Calculate differences between consecutive timestamps
1407
+ hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1)
939
1408
 
940
- def __str__(self):
941
- longest_name = max([time_series.name for time_series in self.time_series_data], key=len)
1409
+ return DataConverter.as_dataarray(
1410
+ hours_per_step,
1411
+ timesteps=timesteps_extra[:-1],
1412
+ scenarios=scenarios,
1413
+ ).rename('hours_per_step')
942
1414
 
943
- stats_summary = '\n'.join(
944
- [
945
- f' - {time_series.name:<{len(longest_name)}}: {get_numeric_stats(time_series.active_data)}'
946
- for time_series in self.time_series_data
947
- ]
948
- )
949
1415
 
950
- return (
951
- f'TimeSeriesCollection with {len(self.time_series_data)} series\n'
952
- f' Time Range: {self.timesteps[0]} {self.timesteps[-1]}\n'
953
- f' No. of timesteps: {len(self.timesteps)} + 1 extra\n'
954
- f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n'
955
- f' Time Series Data:\n'
956
- f'{stats_summary}'
957
- )
1416
+ def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10, by_scenario: bool = False) -> str:
1417
+ """
1418
+ Calculates the mean, median, min, max, and standard deviation of a numeric DataArray.
958
1419
 
1420
+ Args:
1421
+ data: The DataArray to analyze
1422
+ decimals: Number of decimal places to show
1423
+ padd: Padding for alignment
1424
+ by_scenario: Whether to break down stats by scenario
959
1425
 
960
- def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10) -> str:
961
- """Calculates the mean, median, min, max, and standard deviation of a numeric DataArray."""
1426
+ Returns:
1427
+ String representation of data statistics
1428
+ """
962
1429
  format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f'
1430
+
1431
+ # If by_scenario is True and there's a scenario dimension with multiple values
1432
+ if by_scenario and 'scenario' in data.dims and data.sizes['scenario'] > 1:
1433
+ results = []
1434
+ for scenario in data.coords['scenario'].values:
1435
+ scenario_data = data.sel(scenario=scenario)
1436
+ if np.unique(scenario_data).size == 1:
1437
+ results.append(f' {scenario}: {scenario_data.max().item():{format_spec}} (constant)')
1438
+ else:
1439
+ mean = scenario_data.mean().item()
1440
+ median = scenario_data.median().item()
1441
+ min_val = scenario_data.min().item()
1442
+ max_val = scenario_data.max().item()
1443
+ std = scenario_data.std().item()
1444
+ results.append(
1445
+ f' {scenario}: {mean:{format_spec}} (mean), {median:{format_spec}} (median), '
1446
+ f'{min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
1447
+ )
1448
+ return '\n'.join(['By scenario:'] + results)
1449
+
1450
+ # Standard logic for non-scenario data or aggregated stats
963
1451
  if np.unique(data).size == 1:
964
1452
  return f'{data.max().item():{format_spec}} (constant)'
1453
+
965
1454
  mean = data.mean().item()
966
1455
  median = data.median().item()
967
1456
  min_val = data.min().item()
968
1457
  max_val = data.max().item()
969
1458
  std = data.std().item()
1459
+
970
1460
  return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
1461
+
1462
+
1463
+ def extract_data(
1464
+ data: Optional[Union[int, float, xr.DataArray, TimeSeries]],
1465
+ if_none: Any = None
1466
+ ) -> Any:
1467
+ """
1468
+ Convert data to xr.DataArray.
1469
+
1470
+ Args:
1471
+ data: The data to convert (scalar, array, or DataArray)
1472
+ if_none: The value to return if data is None
1473
+
1474
+ Returns:
1475
+ DataArray with the converted data, or the value specified by if_none
1476
+ """
1477
+ if data is None:
1478
+ return if_none
1479
+ if isinstance(data, TimeSeries):
1480
+ return data.selected_data
1481
+ if isinstance(data, xr.DataArray):
1482
+ return data
1483
+ if isinstance(data, (int, float, np.integer, np.floating)):
1484
+ return data
1485
+ raise TypeError(f'Unsupported data type: {type(data).__name__}')