flixopt 2.2.0b0__py3-none-any.whl → 2.2.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flixopt might be problematic. Click here for more details.

Files changed (48) hide show
  1. docs/examples/00-Minimal Example.md +1 -1
  2. docs/examples/01-Basic Example.md +1 -1
  3. docs/examples/02-Complex Example.md +1 -1
  4. docs/examples/index.md +1 -1
  5. docs/faq/contribute.md +26 -14
  6. docs/faq/index.md +1 -1
  7. docs/javascripts/mathjax.js +1 -1
  8. docs/user-guide/Mathematical Notation/Bus.md +1 -1
  9. docs/user-guide/Mathematical Notation/Effects, Penalty & Objective.md +13 -13
  10. docs/user-guide/Mathematical Notation/Flow.md +1 -1
  11. docs/user-guide/Mathematical Notation/LinearConverter.md +2 -2
  12. docs/user-guide/Mathematical Notation/Piecewise.md +1 -1
  13. docs/user-guide/Mathematical Notation/Storage.md +1 -1
  14. docs/user-guide/Mathematical Notation/index.md +1 -1
  15. docs/user-guide/Mathematical Notation/others.md +1 -1
  16. docs/user-guide/index.md +2 -2
  17. flixopt/__init__.py +5 -0
  18. flixopt/aggregation.py +0 -1
  19. flixopt/calculation.py +40 -72
  20. flixopt/commons.py +10 -1
  21. flixopt/components.py +326 -154
  22. flixopt/core.py +459 -966
  23. flixopt/effects.py +67 -270
  24. flixopt/elements.py +76 -84
  25. flixopt/features.py +172 -154
  26. flixopt/flow_system.py +70 -99
  27. flixopt/interface.py +315 -147
  28. flixopt/io.py +27 -56
  29. flixopt/linear_converters.py +3 -3
  30. flixopt/network_app.py +755 -0
  31. flixopt/plotting.py +16 -34
  32. flixopt/results.py +108 -806
  33. flixopt/structure.py +11 -67
  34. flixopt/utils.py +9 -6
  35. {flixopt-2.2.0b0.dist-info → flixopt-2.2.0rc2.dist-info}/METADATA +63 -42
  36. flixopt-2.2.0rc2.dist-info/RECORD +54 -0
  37. {flixopt-2.2.0b0.dist-info → flixopt-2.2.0rc2.dist-info}/WHEEL +1 -1
  38. scripts/extract_release_notes.py +45 -0
  39. docs/release-notes/_template.txt +0 -32
  40. docs/release-notes/index.md +0 -7
  41. docs/release-notes/v2.0.0.md +0 -93
  42. docs/release-notes/v2.0.1.md +0 -12
  43. docs/release-notes/v2.1.0.md +0 -31
  44. docs/release-notes/v2.2.0.md +0 -55
  45. docs/user-guide/Mathematical Notation/Investment.md +0 -115
  46. flixopt-2.2.0b0.dist-info/RECORD +0 -59
  47. {flixopt-2.2.0b0.dist-info → flixopt-2.2.0rc2.dist-info}/licenses/LICENSE +0 -0
  48. {flixopt-2.2.0b0.dist-info → flixopt-2.2.0rc2.dist-info}/top_level.txt +0 -0
flixopt/core.py CHANGED
@@ -7,7 +7,6 @@ import inspect
7
7
  import json
8
8
  import logging
9
9
  import pathlib
10
- import textwrap
11
10
  from collections import Counter
12
11
  from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
13
12
 
@@ -26,12 +25,6 @@ NumericData = Union[int, float, np.integer, np.floating, np.ndarray, pd.Series,
26
25
  NumericDataTS = Union[NumericData, 'TimeSeriesData']
27
26
  """Represents either standard numeric data or TimeSeriesData."""
28
27
 
29
- TimestepData = NumericData
30
- """Represents any form of numeric data that corresponds to timesteps."""
31
-
32
- ScenarioData = NumericData
33
- """Represents any form of numeric data that corresponds to scenarios."""
34
-
35
28
 
36
29
  class PlausibilityError(Exception):
37
30
  """Error for a failing Plausibility check."""
@@ -47,446 +40,69 @@ class ConversionError(Exception):
47
40
 
48
41
  class DataConverter:
49
42
  """
50
- Converts various data types into xarray.DataArray with optional time and scenario dimension.
43
+ Converts various data types into xarray.DataArray with a timesteps index.
51
44
 
52
- Current implementation handles:
53
- - Scalar values
54
- - NumPy arrays
55
- - xarray.DataArray
45
+ Supports: scalars, arrays, Series, DataFrames, and DataArrays.
56
46
  """
57
47
 
58
48
  @staticmethod
59
- def as_dataarray(
60
- data: TimestepData, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None
61
- ) -> xr.DataArray:
62
- """
63
- Convert data to xarray.DataArray with specified dimensions.
64
-
65
- Args:
66
- data: The data to convert (scalar, array, or DataArray)
67
- timesteps: Optional DatetimeIndex for time dimension
68
- scenarios: Optional Index for scenario dimension
69
-
70
- Returns:
71
- DataArray with the converted data
72
- """
73
- # Prepare dimensions and coordinates
74
- coords, dims = DataConverter._prepare_dimensions(timesteps, scenarios)
75
-
76
- # Select appropriate converter based on data type
77
- if isinstance(data, (int, float, np.integer, np.floating)):
78
- return DataConverter._convert_scalar(data, coords, dims)
79
-
80
- elif isinstance(data, xr.DataArray):
81
- return DataConverter._convert_dataarray(data, coords, dims)
82
-
83
- elif isinstance(data, np.ndarray):
84
- return DataConverter._convert_ndarray(data, coords, dims)
85
-
86
- elif isinstance(data, pd.Series):
87
- return DataConverter._convert_series(data, coords, dims)
88
-
89
- elif isinstance(data, pd.DataFrame):
90
- return DataConverter._convert_dataframe(data, coords, dims)
91
-
92
- else:
93
- raise ConversionError(f'Unsupported data type: {type(data).__name__}')
94
-
95
- @staticmethod
96
- def _validate_timesteps(timesteps: pd.DatetimeIndex) -> pd.DatetimeIndex:
97
- """
98
- Validate and prepare time index.
99
-
100
- Args:
101
- timesteps: The time index to validate
102
-
103
- Returns:
104
- Validated time index
105
- """
49
+ def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex) -> xr.DataArray:
50
+ """Convert data to xarray.DataArray with specified timesteps index."""
106
51
  if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0:
107
- raise ConversionError('Timesteps must be a non-empty DatetimeIndex')
108
-
52
+ raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}')
109
53
  if not timesteps.name == 'time':
110
- raise ConversionError(f'Scenarios must be named "time", got "{timesteps.name}"')
111
-
112
- return timesteps
113
-
114
- @staticmethod
115
- def _validate_scenarios(scenarios: pd.Index) -> pd.Index:
116
- """
117
- Validate and prepare scenario index.
118
-
119
- Args:
120
- scenarios: The scenario index to validate
121
- """
122
- if not isinstance(scenarios, pd.Index) or len(scenarios) == 0:
123
- raise ConversionError('Scenarios must be a non-empty Index')
124
-
125
- if not scenarios.name == 'scenario':
126
- raise ConversionError(f'Scenarios must be named "scenario", got "{scenarios.name}"')
127
-
128
- return scenarios
129
-
130
- @staticmethod
131
- def _prepare_dimensions(
132
- timesteps: Optional[pd.DatetimeIndex], scenarios: Optional[pd.Index]
133
- ) -> Tuple[Dict[str, pd.Index], Tuple[str, ...]]:
134
- """
135
- Prepare coordinates and dimensions for the DataArray.
136
-
137
- Args:
138
- timesteps: Optional time index
139
- scenarios: Optional scenario index
140
-
141
- Returns:
142
- Tuple of (coordinates dict, dimensions tuple)
143
- """
144
- # Validate inputs if provided
145
- if timesteps is not None:
146
- timesteps = DataConverter._validate_timesteps(timesteps)
147
-
148
- if scenarios is not None:
149
- scenarios = DataConverter._validate_scenarios(scenarios)
150
-
151
- # Build coordinates and dimensions
152
- coords = {}
153
- dims = []
154
-
155
- if timesteps is not None:
156
- coords['time'] = timesteps
157
- dims.append('time')
158
-
159
- if scenarios is not None:
160
- coords['scenario'] = scenarios
161
- dims.append('scenario')
162
-
163
- return coords, tuple(dims)
164
-
165
- @staticmethod
166
- def _convert_scalar(
167
- data: Union[int, float, np.integer, np.floating], coords: Dict[str, pd.Index], dims: Tuple[str, ...]
168
- ) -> xr.DataArray:
169
- """
170
- Convert a scalar value to a DataArray.
171
-
172
- Args:
173
- data: The scalar value
174
- coords: Coordinate dictionary
175
- dims: Dimension names
176
-
177
- Returns:
178
- DataArray with the scalar value
179
- """
180
- if isinstance(data, (np.integer, np.floating)):
181
- data = data.item()
182
- return xr.DataArray(data, coords=coords, dims=dims)
183
-
184
- @staticmethod
185
- def _convert_dataarray(data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
186
- """
187
- Convert an existing DataArray to desired dimensions.
188
-
189
- Args:
190
- data: The source DataArray
191
- coords: Target coordinates
192
- dims: Target dimensions
193
-
194
- Returns:
195
- DataArray with the target dimensions
196
- """
197
- # No dimensions case
198
- if len(dims) == 0:
199
- if data.size != 1:
200
- raise ConversionError('When converting to dimensionless DataArray, source must be scalar')
201
- return xr.DataArray(data.values.item())
202
-
203
- # Check if data already has matching dimensions and coordinates
204
- if set(data.dims) == set(dims):
205
- # Check if coordinates match
206
- is_compatible = True
207
- for dim in dims:
208
- if dim in data.dims and not np.array_equal(data.coords[dim].values, coords[dim].values):
209
- is_compatible = False
210
- break
211
-
212
- if is_compatible:
213
- # Ensure dimensions are in the correct order
214
- if data.dims != dims:
215
- # Transpose to get dimensions in the right order
216
- return data.transpose(*dims).copy(deep=True)
217
- else:
218
- # Return existing DataArray if compatible and order is correct
219
- return data.copy(deep=True)
220
-
221
- # Handle dimension broadcasting
222
- if len(data.dims) == 1 and len(dims) == 2:
223
- # Single dimension to two dimensions
224
- if data.dims[0] == 'time' and 'scenario' in dims:
225
- # Broadcast time dimension to include scenarios
226
- return DataConverter._broadcast_time_to_scenarios(data, coords, dims)
227
-
228
- elif data.dims[0] == 'scenario' and 'time' in dims:
229
- # Broadcast scenario dimension to include time
230
- return DataConverter._broadcast_scenario_to_time(data, coords, dims)
231
-
232
- raise ConversionError(
233
- f'Cannot convert {data.dims} to {dims}. Source coordinates: {data.coords}, Target coordinates: {coords}'
234
- )
235
- @staticmethod
236
- def _broadcast_time_to_scenarios(
237
- data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
238
- ) -> xr.DataArray:
239
- """
240
- Broadcast a time-only DataArray to include scenarios.
241
-
242
- Args:
243
- data: The time-indexed DataArray
244
- coords: Target coordinates
245
- dims: Target dimensions
246
-
247
- Returns:
248
- DataArray with time and scenario dimensions
249
- """
250
- # Check compatibility
251
- if not np.array_equal(data.coords['time'].values, coords['time'].values):
252
- raise ConversionError("Source time coordinates don't match target time coordinates")
253
-
254
- if len(coords['scenario']) <= 1:
255
- return data.copy(deep=True)
256
-
257
- # Broadcast values
258
- values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
259
- return xr.DataArray(values.copy(), coords=coords, dims=dims)
260
-
261
- @staticmethod
262
- def _broadcast_scenario_to_time(
263
- data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
264
- ) -> xr.DataArray:
265
- """
266
- Broadcast a scenario-only DataArray to include time.
267
-
268
- Args:
269
- data: The scenario-indexed DataArray
270
- coords: Target coordinates
271
- dims: Target dimensions
272
-
273
- Returns:
274
- DataArray with time and scenario dimensions
275
- """
276
- # Check compatibility
277
- if not np.array_equal(data.coords['scenario'].values, coords['scenario'].values):
278
- raise ConversionError("Source scenario coordinates don't match target scenario coordinates")
279
-
280
- # Broadcast values
281
- values = np.repeat(data.values[:, np.newaxis], len(coords['time']), axis=1).T
282
- return xr.DataArray(values.copy(), coords=coords, dims=dims)
283
-
284
- @staticmethod
285
- def _convert_ndarray(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
286
- """
287
- Convert a NumPy array to a DataArray.
288
-
289
- Args:
290
- data: The NumPy array
291
- coords: Target coordinates
292
- dims: Target dimensions
293
-
294
- Returns:
295
- DataArray from the NumPy array
296
- """
297
- # Handle dimensionless case
298
- if len(dims) == 0:
299
- if data.size != 1:
300
- raise ConversionError('Without dimensions, can only convert scalar arrays')
301
- return xr.DataArray(data.item())
302
-
303
- # Handle single dimension
304
- elif len(dims) == 1:
305
- return DataConverter._convert_ndarray_single_dim(data, coords, dims)
306
-
307
- # Handle two dimensions
308
- elif len(dims) == 2:
309
- return DataConverter._convert_ndarray_two_dims(data, coords, dims)
310
-
311
- else:
312
- raise ConversionError('Maximum 2 dimensions supported')
313
-
314
- @staticmethod
315
- def _convert_ndarray_single_dim(
316
- data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
317
- ) -> xr.DataArray:
318
- """
319
- Convert a NumPy array to a single-dimension DataArray.
320
-
321
- Args:
322
- data: The NumPy array
323
- coords: Target coordinates
324
- dims: Target dimensions (length 1)
325
-
326
- Returns:
327
- DataArray with single dimension
328
- """
329
- dim_name = dims[0]
330
- dim_length = len(coords[dim_name])
331
-
332
- if data.ndim == 1:
333
- # 1D array must match dimension length
334
- if data.shape[0] != dim_length:
335
- raise ConversionError(f"Array length {data.shape[0]} doesn't match {dim_name} length {dim_length}")
336
- return xr.DataArray(data, coords=coords, dims=dims)
337
- else:
338
- raise ConversionError(f'Expected 1D array for single dimension, got {data.ndim}D')
339
-
340
- @staticmethod
341
- def _convert_ndarray_two_dims(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
342
- """
343
- Convert a NumPy array to a two-dimension DataArray.
344
-
345
- Args:
346
- data: The NumPy array
347
- coords: Target coordinates
348
- dims: Target dimensions (length 2)
349
-
350
- Returns:
351
- DataArray with two dimensions
352
- """
353
- scenario_length = len(coords['scenario'])
354
- time_length = len(coords['time'])
355
-
356
- if data.ndim == 1:
357
- # For 1D array, create 2D array based on which dimension it matches
358
- if data.shape[0] == time_length:
359
- # Broadcast across scenarios
360
- values = np.repeat(data[:, np.newaxis], scenario_length, axis=1)
361
- return xr.DataArray(values, coords=coords, dims=dims)
362
- elif data.shape[0] == scenario_length:
363
- # Broadcast across time
364
- values = np.repeat(data[np.newaxis, :], time_length, axis=0)
365
- return xr.DataArray(values, coords=coords, dims=dims)
366
- else:
367
- raise ConversionError(f"1D array length {data.shape[0]} doesn't match either dimension")
368
-
369
- elif data.ndim == 2:
370
- # For 2D array, shape must match dimensions
371
- expected_shape = (time_length, scenario_length)
372
- if data.shape != expected_shape:
373
- raise ConversionError(f"2D array shape {data.shape} doesn't match expected shape {expected_shape}")
374
- return xr.DataArray(data, coords=coords, dims=dims)
375
-
376
- else:
377
- raise ConversionError(f'Expected 1D or 2D array for two dimensions, got {data.ndim}D')
378
-
379
- @staticmethod
380
- def _convert_series(data: pd.Series, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
381
- """
382
- Convert pandas Series to xarray DataArray.
383
-
384
- Args:
385
- data: pandas Series to convert
386
- coords: Target coordinates
387
- dims: Target dimensions
388
-
389
- Returns:
390
- DataArray from the pandas Series
391
- """
392
- # Handle single dimension case
393
- if len(dims) == 1:
394
- dim_name = dims[0]
395
-
396
- # Check if series index matches the dimension
397
- if data.index.equals(coords[dim_name]):
398
- return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
54
+ raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}')
55
+
56
+ coords = [timesteps]
57
+ dims = ['time']
58
+ expected_shape = (len(timesteps),)
59
+
60
+ try:
61
+ if isinstance(data, (int, float, np.integer, np.floating)):
62
+ return xr.DataArray(data, coords=coords, dims=dims)
63
+ elif isinstance(data, pd.DataFrame):
64
+ if not data.index.equals(timesteps):
65
+ raise ConversionError(
66
+ f"DataFrame index doesn't match timesteps index. "
67
+ f'Its missing the following time steps: {timesteps.difference(data.index)}. '
68
+ f'Some parameters might need an extra timestep at the end.'
69
+ )
70
+ if not len(data.columns) == 1:
71
+ raise ConversionError('DataFrame must have exactly one column')
72
+ return xr.DataArray(data.values.flatten(), coords=coords, dims=dims)
73
+ elif isinstance(data, pd.Series):
74
+ if not data.index.equals(timesteps):
75
+ raise ConversionError(
76
+ f"Series index doesn't match timesteps index. "
77
+ f'Its missing the following time steps: {timesteps.difference(data.index)}. '
78
+ f'Some parameters might need an extra timestep at the end.'
79
+ )
80
+ return xr.DataArray(data.values, coords=coords, dims=dims)
81
+ elif isinstance(data, np.ndarray):
82
+ if data.ndim != 1:
83
+ raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}')
84
+ elif data.shape[0] != expected_shape[0]:
85
+ raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}")
86
+ return xr.DataArray(data, coords=coords, dims=dims)
87
+ elif isinstance(data, xr.DataArray):
88
+ if data.dims != tuple(dims):
89
+ raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}")
90
+ if data.sizes[dims[0]] != len(coords[0]):
91
+ raise ConversionError(
92
+ f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}"
93
+ )
94
+ return data.copy(deep=True)
399
95
  else:
400
- raise ConversionError(
401
- f"Series index doesn't match {dim_name} coordinates.\n"
402
- f'Series index: {data.index}\n'
403
- f'Target {dim_name} coordinates: {coords[dim_name]}'
404
- )
405
-
406
- # Handle two dimensions case
407
- elif len(dims) == 2:
408
- # Check if dimensions are time and scenario
409
- if dims != ('time', 'scenario'):
410
- raise ConversionError(
411
- f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}'
412
- )
413
-
414
- # Case 1: Series is indexed by time
415
- if data.index.equals(coords['time']):
416
- # Broadcast across scenarios
417
- values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
418
- return xr.DataArray(values.copy(), coords=coords, dims=dims)
419
-
420
- # Case 2: Series is indexed by scenario
421
- elif data.index.equals(coords['scenario']):
422
- # Broadcast across time
423
- values = np.repeat(data.values[np.newaxis, :], len(coords['time']), axis=0)
424
- return xr.DataArray(values.copy(), coords=coords, dims=dims)
425
-
426
- else:
427
- raise ConversionError(
428
- "Series index must match either 'time' or 'scenario' coordinates.\n"
429
- f'Series index: {data.index}\n'
430
- f'Target time coordinates: {coords["time"]}\n'
431
- f'Target scenario coordinates: {coords["scenario"]}'
432
- )
433
-
434
- else:
435
- raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}')
436
-
437
- @staticmethod
438
- def _convert_dataframe(data: pd.DataFrame, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
439
- """
440
- Convert pandas DataFrame to xarray DataArray.
441
- Only allows time as index and scenarios as columns.
442
-
443
- Args:
444
- data: pandas DataFrame to convert
445
- coords: Target coordinates
446
- dims: Target dimensions
447
-
448
- Returns:
449
- DataArray from the pandas DataFrame
450
- """
451
- # Single dimension case
452
- if len(dims) == 1:
453
- # If DataFrame has one column, treat it like a Series
454
- if len(data.columns) == 1:
455
- series = data.iloc[:, 0]
456
- return DataConverter._convert_series(series, coords, dims)
457
-
458
- raise ConversionError(
459
- f'When converting DataFrame to single-dimension DataArray, DataFrame must have exactly one column, got {len(data.columns)}'
460
- )
461
-
462
- # Two dimensions case
463
- elif len(dims) == 2:
464
- # Check if dimensions are time and scenario
465
- if dims != ('time', 'scenario'):
466
- raise ConversionError(
467
- f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}'
468
- )
469
-
470
- # DataFrame must have time as index and scenarios as columns
471
- if data.index.equals(coords['time']) and data.columns.equals(coords['scenario']):
472
- # Create DataArray with proper dimension order
473
- return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
474
- else:
475
- raise ConversionError(
476
- 'DataFrame must have time as index and scenarios as columns.\n'
477
- f'DataFrame index: {data.index}\n'
478
- f'DataFrame columns: {data.columns}\n'
479
- f'Target time coordinates: {coords["time"]}\n'
480
- f'Target scenario coordinates: {coords["scenario"]}'
481
- )
482
-
483
- else:
484
- raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}')
96
+ raise ConversionError(f'Unsupported type: {type(data).__name__}')
97
+ except Exception as e:
98
+ if isinstance(e, ConversionError):
99
+ raise
100
+ raise ConversionError(f'Converting data {type(data)} to xarray.Dataset raised an error: {str(e)}') from e
485
101
 
486
102
 
487
103
  class TimeSeriesData:
488
104
  # TODO: Move to Interface.py
489
- def __init__(self, data: TimestepData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None):
105
+ def __init__(self, data: NumericData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None):
490
106
  """
491
107
  timeseries class for transmit timeseries AND special characteristics of timeseries,
492
108
  i.g. to define weights needed in calculation_type 'aggregated'
@@ -538,19 +154,18 @@ class TimeSeries:
538
154
  name (str): The name of the time series
539
155
  aggregation_weight (Optional[float]): Weight used for aggregation
540
156
  aggregation_group (Optional[str]): Group name for shared aggregation weighting
541
- has_extra_timestep (bool): Whether this series needs an extra timestep
157
+ needs_extra_timestep (bool): Whether this series needs an extra timestep
542
158
  """
543
159
 
544
160
  @classmethod
545
161
  def from_datasource(
546
162
  cls,
547
- data: NumericDataTS,
163
+ data: NumericData,
548
164
  name: str,
549
165
  timesteps: pd.DatetimeIndex,
550
- scenarios: Optional[pd.Index] = None,
551
166
  aggregation_weight: Optional[float] = None,
552
167
  aggregation_group: Optional[str] = None,
553
- has_extra_timestep: bool = False,
168
+ needs_extra_timestep: bool = False,
554
169
  ) -> 'TimeSeries':
555
170
  """
556
171
  Initialize the TimeSeries from multiple data sources.
@@ -559,20 +174,19 @@ class TimeSeries:
559
174
  data: The time series data
560
175
  name: The name of the TimeSeries
561
176
  timesteps: The timesteps of the TimeSeries
562
- scenarios: The scenarios of the TimeSeries
563
177
  aggregation_weight: The weight in aggregation calculations
564
178
  aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing
565
- has_extra_timestep: Whether this series requires an extra timestep
179
+ needs_extra_timestep: Whether this series requires an extra timestep
566
180
 
567
181
  Returns:
568
182
  A new TimeSeries instance
569
183
  """
570
184
  return cls(
571
- DataConverter.as_dataarray(data, timesteps, scenarios),
185
+ DataConverter.as_dataarray(data, timesteps),
572
186
  name,
573
187
  aggregation_weight,
574
188
  aggregation_group,
575
- has_extra_timestep,
189
+ needs_extra_timestep,
576
190
  )
577
191
 
578
192
  @classmethod
@@ -606,7 +220,7 @@ class TimeSeries:
606
220
  name=data['name'],
607
221
  aggregation_weight=data['aggregation_weight'],
608
222
  aggregation_group=data['aggregation_group'],
609
- has_extra_timestep=data['has_extra_timestep'],
223
+ needs_extra_timestep=data['needs_extra_timestep'],
610
224
  )
611
225
 
612
226
  def __init__(
@@ -615,7 +229,7 @@ class TimeSeries:
615
229
  name: str,
616
230
  aggregation_weight: Optional[float] = None,
617
231
  aggregation_group: Optional[str] = None,
618
- has_extra_timestep: bool = False,
232
+ needs_extra_timestep: bool = False,
619
233
  ):
620
234
  """
621
235
  Initialize a TimeSeries with a DataArray.
@@ -625,40 +239,35 @@ class TimeSeries:
625
239
  name: The name of the TimeSeries
626
240
  aggregation_weight: The weight in aggregation calculations
627
241
  aggregation_group: Group this TimeSeries belongs to for weight sharing
628
- has_extra_timestep: Whether this series requires an extra timestep
242
+ needs_extra_timestep: Whether this series requires an extra timestep
629
243
 
630
244
  Raises:
631
- ValueError: If data has unsupported dimensions
245
+ ValueError: If data doesn't have a 'time' index or has more than 1 dimension
632
246
  """
633
- allowed_dims = {'time', 'scenario'}
634
- if not set(data.dims).issubset(allowed_dims):
635
- raise ValueError(f'DataArray dimensions must be subset of {allowed_dims}. Got {data.dims}')
247
+ if 'time' not in data.indexes:
248
+ raise ValueError(f'DataArray must have a "time" index. Got {data.indexes}')
249
+ if data.ndim > 1:
250
+ raise ValueError(f'Number of dimensions of DataArray must be 1. Got {data.ndim}')
636
251
 
637
252
  self.name = name
638
253
  self.aggregation_weight = aggregation_weight
639
254
  self.aggregation_group = aggregation_group
640
- self.has_extra_timestep = has_extra_timestep
255
+ self.needs_extra_timestep = needs_extra_timestep
641
256
 
642
257
  # Data management
643
258
  self._stored_data = data.copy(deep=True)
644
259
  self._backup = self._stored_data.copy(deep=True)
260
+ self._active_timesteps = self._stored_data.indexes['time']
261
+ self._active_data = None
262
+ self._update_active_data()
645
263
 
646
- # Selection state
647
- self._selected_timesteps: Optional[pd.DatetimeIndex] = None
648
- self._selected_scenarios: Optional[pd.Index] = None
649
-
650
- # Flag for whether this series has various dimensions
651
- self.has_time_dim = 'time' in data.dims
652
- self.has_scenario_dim = 'scenario' in data.dims
653
-
654
- def reset(self) -> None:
264
+ def reset(self):
655
265
  """
656
- Reset selections to include all timesteps and scenarios.
657
- This is equivalent to clearing all selections.
266
+ Reset active timesteps to the full set of stored timesteps.
658
267
  """
659
- self.set_selection(None, None)
268
+ self.active_timesteps = None
660
269
 
661
- def restore_data(self) -> None:
270
+ def restore_data(self):
662
271
  """
663
272
  Restore stored_data from the backup and reset active timesteps.
664
273
  """
@@ -679,8 +288,8 @@ class TimeSeries:
679
288
  'name': self.name,
680
289
  'aggregation_weight': self.aggregation_weight,
681
290
  'aggregation_group': self.aggregation_group,
682
- 'has_extra_timestep': self.has_extra_timestep,
683
- 'data': self.selected_data.to_dict(),
291
+ 'needs_extra_timestep': self.needs_extra_timestep,
292
+ 'data': self.active_data.to_dict(),
684
293
  }
685
294
 
686
295
  # Convert datetime objects to ISO strings
@@ -688,7 +297,7 @@ class TimeSeries:
688
297
 
689
298
  # Save to file if path is provided
690
299
  if path is not None:
691
- indent = 4 if len(self.selected_timesteps) <= 480 else None
300
+ indent = 4 if len(self.active_timesteps) <= 480 else None
692
301
  with open(path, 'w', encoding='utf-8') as f:
693
302
  json.dump(data, f, indent=indent, ensure_ascii=False)
694
303
 
@@ -702,116 +311,84 @@ class TimeSeries:
702
311
  Returns:
703
312
  String representation of data statistics
704
313
  """
705
- return get_numeric_stats(self.selected_data, padd=0, by_scenario=True)
314
+ return get_numeric_stats(self.active_data, padd=0)
315
+
316
+ def _update_active_data(self):
317
+ """
318
+ Update the active data based on active_timesteps.
319
+ """
320
+ self._active_data = self._stored_data.sel(time=self.active_timesteps)
706
321
 
707
322
  @property
708
323
  def all_equal(self) -> bool:
709
324
  """Check if all values in the series are equal."""
710
- return np.unique(self.selected_data.values).size == 1
325
+ return np.unique(self.active_data.values).size == 1
711
326
 
712
327
  @property
713
- def selected_data(self) -> xr.DataArray:
328
+ def active_timesteps(self) -> pd.DatetimeIndex:
329
+ """Get the current active timesteps."""
330
+ return self._active_timesteps
331
+
332
+ @active_timesteps.setter
333
+ def active_timesteps(self, timesteps: Optional[pd.DatetimeIndex]):
714
334
  """
715
- Get a view of stored_data based on current selections.
716
- This computes the view dynamically based on the current selection state.
335
+ Set active_timesteps and refresh active_data.
336
+
337
+ Args:
338
+ timesteps: New timesteps to activate, or None to use all stored timesteps
339
+
340
+ Raises:
341
+ TypeError: If timesteps is not a pandas DatetimeIndex or None
717
342
  """
718
- return self._stored_data.sel(**self._valid_selector)
343
+ if timesteps is None:
344
+ self._active_timesteps = self.stored_data.indexes['time']
345
+ elif isinstance(timesteps, pd.DatetimeIndex):
346
+ self._active_timesteps = timesteps
347
+ else:
348
+ raise TypeError('active_timesteps must be a pandas DatetimeIndex or None')
719
349
 
720
- @property
721
- def selected_timesteps(self) -> Optional[pd.DatetimeIndex]:
722
- """Get the current active timesteps, or None if no time dimension."""
723
- if not self.has_time_dim:
724
- return None
725
- if self._selected_timesteps is None:
726
- return self._stored_data.indexes['time']
727
- return self._selected_timesteps
350
+ self._update_active_data()
728
351
 
729
352
  @property
730
- def active_scenarios(self) -> Optional[pd.Index]:
731
- """Get the current active scenarios, or None if no scenario dimension."""
732
- if not self.has_scenario_dim:
733
- return None
734
- if self._selected_scenarios is None:
735
- return self._stored_data.indexes['scenario']
736
- return self._selected_scenarios
353
+ def active_data(self) -> xr.DataArray:
354
+ """Get a view of stored_data based on active_timesteps."""
355
+ return self._active_data
737
356
 
738
357
  @property
739
358
  def stored_data(self) -> xr.DataArray:
740
359
  """Get a copy of the full stored data."""
741
360
  return self._stored_data.copy()
742
361
 
743
- def update_stored_data(self, value: xr.DataArray) -> None:
362
+ @stored_data.setter
363
+ def stored_data(self, value: NumericData):
744
364
  """
745
- Update stored_data and refresh selected_data.
365
+ Update stored_data and refresh active_data.
746
366
 
747
367
  Args:
748
368
  value: New data to store
749
369
  """
750
- new_data = DataConverter.as_dataarray(
751
- value,
752
- timesteps=self.selected_timesteps if self.has_time_dim else None,
753
- scenarios=self.active_scenarios if self.has_scenario_dim else None,
754
- )
370
+ new_data = DataConverter.as_dataarray(value, timesteps=self.active_timesteps)
755
371
 
756
372
  # Skip if data is unchanged to avoid overwriting backup
757
373
  if new_data.equals(self._stored_data):
758
374
  return
759
375
 
760
376
  self._stored_data = new_data
761
- self.set_selection(None, None) # Reset selections to full dataset
762
-
763
- def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None:
764
- """
765
- Set active subset for timesteps and scenarios.
766
-
767
- Args:
768
- timesteps: Timesteps to activate, or None to clear. Ignored if series has no time dimension.
769
- scenarios: Scenarios to activate, or None to clear. Ignored if series has no scenario dimension.
770
- """
771
- # Only update timesteps if the series has time dimension
772
- if self.has_time_dim:
773
- if timesteps is None or timesteps.equals(self._stored_data.indexes['time']):
774
- self._selected_timesteps = None
775
- else:
776
- self._selected_timesteps = timesteps
777
-
778
- # Only update scenarios if the series has scenario dimension
779
- if self.has_scenario_dim:
780
- if scenarios is None or scenarios.equals(self._stored_data.indexes['scenario']):
781
- self._selected_scenarios = None
782
- else:
783
- self._selected_scenarios = scenarios
377
+ self.active_timesteps = None # Reset to full timeline
784
378
 
785
379
  @property
786
380
  def sel(self):
787
- """Direct access to the selected_data's sel method for convenience."""
788
- return self.selected_data.sel
381
+ return self.active_data.sel
789
382
 
790
383
  @property
791
384
  def isel(self):
792
- """Direct access to the selected_data's isel method for convenience."""
793
- return self.selected_data.isel
794
-
795
- @property
796
- def _valid_selector(self) -> Dict[str, pd.Index]:
797
- """Get the current selection as a dictionary."""
798
- selector = {}
799
-
800
- # Only include time in selector if series has time dimension
801
- if self.has_time_dim and self._selected_timesteps is not None:
802
- selector['time'] = self._selected_timesteps
803
-
804
- # Only include scenario in selector if series has scenario dimension
805
- if self.has_scenario_dim and self._selected_scenarios is not None:
806
- selector['scenario'] = self._selected_scenarios
807
-
808
- return selector
385
+ return self.active_data.isel
809
386
 
810
387
  def _apply_operation(self, other, op):
811
388
  """Apply an operation between this TimeSeries and another object."""
812
389
  if isinstance(other, TimeSeries):
813
- other = other.selected_data
814
- return op(self.selected_data, other)
390
+ other = other.active_data
391
+ return op(self.active_data, other)
815
392
 
816
393
  def __add__(self, other):
817
394
  return self._apply_operation(other, lambda x, y: x + y)
@@ -826,25 +403,25 @@ class TimeSeries:
826
403
  return self._apply_operation(other, lambda x, y: x / y)
827
404
 
828
405
  def __radd__(self, other):
829
- return other + self.selected_data
406
+ return other + self.active_data
830
407
 
831
408
  def __rsub__(self, other):
832
- return other - self.selected_data
409
+ return other - self.active_data
833
410
 
834
411
  def __rmul__(self, other):
835
- return other * self.selected_data
412
+ return other * self.active_data
836
413
 
837
414
  def __rtruediv__(self, other):
838
- return other / self.selected_data
415
+ return other / self.active_data
839
416
 
840
417
  def __neg__(self) -> xr.DataArray:
841
- return -self.selected_data
418
+ return -self.active_data
842
419
 
843
420
  def __pos__(self) -> xr.DataArray:
844
- return +self.selected_data
421
+ return +self.active_data
845
422
 
846
423
  def __abs__(self) -> xr.DataArray:
847
- return abs(self.selected_data)
424
+ return abs(self.active_data)
848
425
 
849
426
  def __gt__(self, other):
850
427
  """
@@ -857,8 +434,8 @@ class TimeSeries:
857
434
  True if all values in this TimeSeries are greater than other
858
435
  """
859
436
  if isinstance(other, TimeSeries):
860
- return self.selected_data > other.selected_data
861
- return self.selected_data > other
437
+ return self.active_data > other.active_data
438
+ return self.active_data > other
862
439
 
863
440
  def __ge__(self, other):
864
441
  """
@@ -871,8 +448,8 @@ class TimeSeries:
871
448
  True if all values in this TimeSeries are greater than or equal to other
872
449
  """
873
450
  if isinstance(other, TimeSeries):
874
- return self.selected_data >= other.selected_data
875
- return self.selected_data >= other
451
+ return self.active_data >= other.active_data
452
+ return self.active_data >= other
876
453
 
877
454
  def __lt__(self, other):
878
455
  """
@@ -885,8 +462,8 @@ class TimeSeries:
885
462
  True if all values in this TimeSeries are less than other
886
463
  """
887
464
  if isinstance(other, TimeSeries):
888
- return self.selected_data < other.selected_data
889
- return self.selected_data < other
465
+ return self.active_data < other.active_data
466
+ return self.active_data < other
890
467
 
891
468
  def __le__(self, other):
892
469
  """
@@ -899,8 +476,8 @@ class TimeSeries:
899
476
  True if all values in this TimeSeries are less than or equal to other
900
477
  """
901
478
  if isinstance(other, TimeSeries):
902
- return self.selected_data <= other.selected_data
903
- return self.selected_data <= other
479
+ return self.active_data <= other.active_data
480
+ return self.active_data <= other
904
481
 
905
482
  def __eq__(self, other):
906
483
  """
@@ -913,8 +490,8 @@ class TimeSeries:
913
490
  True if all values in this TimeSeries are equal to other
914
491
  """
915
492
  if isinstance(other, TimeSeries):
916
- return self.selected_data == other.selected_data
917
- return self.selected_data == other
493
+ return self.active_data == other.active_data
494
+ return self.active_data == other
918
495
 
919
496
  def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
920
497
  """
@@ -922,8 +499,8 @@ class TimeSeries:
922
499
 
923
500
  This allows NumPy functions to work with TimeSeries objects.
924
501
  """
925
- # Convert any TimeSeries inputs to their selected_data
926
- inputs = [x.selected_data if isinstance(x, TimeSeries) else x for x in inputs]
502
+ # Convert any TimeSeries inputs to their active_data
503
+ inputs = [x.active_data if isinstance(x, TimeSeries) else x for x in inputs]
927
504
  return getattr(ufunc, method)(*inputs, **kwargs)
928
505
 
929
506
  def __repr__(self):
@@ -937,10 +514,10 @@ class TimeSeries:
937
514
  'name': self.name,
938
515
  'aggregation_weight': self.aggregation_weight,
939
516
  'aggregation_group': self.aggregation_group,
940
- 'has_extra_timestep': self.has_extra_timestep,
941
- 'shape': self.selected_data.shape,
517
+ 'needs_extra_timestep': self.needs_extra_timestep,
518
+ 'shape': self.active_data.shape,
519
+ 'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}',
942
520
  }
943
-
944
521
  attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items())
945
522
  return f'TimeSeries({attr_str})'
946
523
 
@@ -951,333 +528,281 @@ class TimeSeries:
951
528
  Returns:
952
529
  Descriptive string with statistics
953
530
  """
954
- return f'TimeSeries "{self.name}":\n{textwrap.indent(self.stats, " ")}'
531
+ return f"TimeSeries '{self.name}': {self.stats}"
955
532
 
956
533
 
957
534
  class TimeSeriesCollection:
958
535
  """
959
- Simplified central manager for time series data with reference tracking.
536
+ Collection of TimeSeries objects with shared timestep management.
960
537
 
961
- Provides a way to store time series data and work with subsets of dimensions
962
- that automatically update all references when changed.
538
+ TimeSeriesCollection handles multiple TimeSeries objects with synchronized
539
+ timesteps, provides operations on collections, and manages extra timesteps.
963
540
  """
964
541
 
965
542
  def __init__(
966
543
  self,
967
544
  timesteps: pd.DatetimeIndex,
968
- scenarios: Optional[pd.Index] = None,
969
545
  hours_of_last_timestep: Optional[float] = None,
970
546
  hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None,
971
547
  ):
972
- """Initialize a TimeSeriesCollection."""
973
- self._full_timesteps = self._validate_timesteps(timesteps)
974
- self._full_scenarios = self._validate_scenarios(scenarios)
975
-
976
- self._full_timesteps_extra = self._create_timesteps_with_extra(
977
- self._full_timesteps,
978
- self._calculate_hours_of_final_timestep(
979
- self._full_timesteps, hours_of_final_timestep=hours_of_last_timestep
980
- ),
981
- )
982
- self._full_hours_per_timestep = self.calculate_hours_per_timestep(
983
- self._full_timesteps_extra, self._full_scenarios
984
- )
985
-
548
+ """
549
+ Args:
550
+ timesteps: The timesteps of the Collection.
551
+ hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified
552
+ hours_of_previous_timesteps: The duration of previous timesteps.
553
+ If None, the first time increment of time_series is used.
554
+ This is needed to calculate previous durations (for example consecutive_on_hours).
555
+ If you use an array, take care that its long enough to cover all previous values!
556
+ """
557
+ # Prepare and validate timesteps
558
+ self._validate_timesteps(timesteps)
986
559
  self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps(
987
560
  timesteps, hours_of_previous_timesteps
988
- ) # TODO: Make dynamic
561
+ )
989
562
 
990
- # Series that need extra timestep
991
- self._has_extra_timestep: set = set()
563
+ # Set up timesteps and hours
564
+ self.all_timesteps = timesteps
565
+ self.all_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep)
566
+ self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra)
992
567
 
993
- # Storage for TimeSeries objects
994
- self._time_series: Dict[str, TimeSeries] = {}
568
+ # Active timestep tracking
569
+ self._active_timesteps = None
570
+ self._active_timesteps_extra = None
571
+ self._active_hours_per_timestep = None
995
572
 
996
- # Active subset selectors
997
- self._selected_timesteps: Optional[pd.DatetimeIndex] = None
998
- self._selected_scenarios: Optional[pd.Index] = None
999
- self._selected_timesteps_extra: Optional[pd.DatetimeIndex] = None
1000
- self._selected_hours_per_timestep: Optional[xr.DataArray] = None
573
+ # Dictionary of time series by name
574
+ self.time_series_data: Dict[str, TimeSeries] = {}
1001
575
 
1002
- def add_time_series(
1003
- self,
1004
- name: str,
1005
- data: Union[NumericDataTS, TimeSeries],
1006
- has_time_dim: bool = True,
1007
- has_scenario_dim: bool = True,
1008
- aggregation_weight: Optional[float] = None,
1009
- aggregation_group: Optional[str] = None,
1010
- has_extra_timestep: bool = False,
576
+ # Aggregation
577
+ self.group_weights: Dict[str, float] = {}
578
+ self.weights: Dict[str, float] = {}
579
+
580
+ @classmethod
581
+ def with_uniform_timesteps(
582
+ cls, start_time: pd.Timestamp, periods: int, freq: str, hours_per_step: Optional[float] = None
583
+ ) -> 'TimeSeriesCollection':
584
+ """Create a collection with uniform timesteps."""
585
+ timesteps = pd.date_range(start_time, periods=periods, freq=freq, name='time')
586
+ return cls(timesteps, hours_of_previous_timesteps=hours_per_step)
587
+
588
+ def create_time_series(
589
+ self, data: Union[NumericData, TimeSeriesData], name: str, needs_extra_timestep: bool = False
1011
590
  ) -> TimeSeries:
1012
591
  """
1013
- Add a new TimeSeries to the allocator.
592
+ Creates a TimeSeries from the given data and adds it to the collection.
1014
593
 
1015
594
  Args:
1016
- name: Name of the time series
1017
- data: Data for the time series (can be raw data or an existing TimeSeries)
1018
- has_time_dim: Whether the TimeSeries has a time dimension
1019
- has_scenario_dim: Whether the TimeSeries has a scenario dimension
1020
- aggregation_weight: Weight used for aggregation
1021
- aggregation_group: Group name for shared aggregation weighting
1022
- has_extra_timestep: Whether this series needs an extra timestep
595
+ data: The data to create the TimeSeries from.
596
+ name: The name of the TimeSeries.
597
+ needs_extra_timestep: Whether to create an additional timestep at the end of the timesteps.
598
+ The data to create the TimeSeries from.
1023
599
 
1024
600
  Returns:
1025
- The created TimeSeries object
601
+ The created TimeSeries.
602
+
1026
603
  """
1027
- if name in self._time_series:
1028
- raise KeyError(f"TimeSeries '{name}' already exists in allocator")
1029
- if not has_time_dim and has_extra_timestep:
1030
- raise ValueError('A not time-indexed TimeSeries cannot have an extra timestep')
1031
-
1032
- # Choose which timesteps to use
1033
- if has_time_dim:
1034
- target_timesteps = self.timesteps_extra if has_extra_timestep else self.timesteps
1035
- else:
1036
- target_timesteps = None
604
+ # Check for duplicate name
605
+ if name in self.time_series_data:
606
+ raise ValueError(f"TimeSeries '{name}' already exists in this collection")
1037
607
 
1038
- target_scenarios = self.scenarios if has_scenario_dim else None
608
+ # Determine which timesteps to use
609
+ timesteps_to_use = self.timesteps_extra if needs_extra_timestep else self.timesteps
1039
610
 
1040
- # Create or adapt the TimeSeries object
1041
- if isinstance(data, TimeSeries):
1042
- # Use the existing TimeSeries but update its parameters
1043
- time_series = data
1044
- # Update the stored data to use our timesteps and scenarios
1045
- data_array = DataConverter.as_dataarray(
1046
- time_series.stored_data, timesteps=target_timesteps, scenarios=target_scenarios
1047
- )
1048
- time_series = TimeSeries(
1049
- data=data_array,
611
+ # Create the time series
612
+ if isinstance(data, TimeSeriesData):
613
+ time_series = TimeSeries.from_datasource(
1050
614
  name=name,
1051
- aggregation_weight=aggregation_weight or time_series.aggregation_weight,
1052
- aggregation_group=aggregation_group or time_series.aggregation_group,
1053
- has_extra_timestep=has_extra_timestep or time_series.has_extra_timestep,
615
+ data=data.data,
616
+ timesteps=timesteps_to_use,
617
+ aggregation_weight=data.agg_weight,
618
+ aggregation_group=data.agg_group,
619
+ needs_extra_timestep=needs_extra_timestep,
1054
620
  )
621
+ # Connect the user time series to the created TimeSeries
622
+ data.label = name
1055
623
  else:
1056
- # Create a new TimeSeries from raw data
1057
624
  time_series = TimeSeries.from_datasource(
1058
- data=data,
1059
- name=name,
1060
- timesteps=target_timesteps,
1061
- scenarios=target_scenarios,
1062
- aggregation_weight=aggregation_weight,
1063
- aggregation_group=aggregation_group,
1064
- has_extra_timestep=has_extra_timestep,
625
+ name=name, data=data, timesteps=timesteps_to_use, needs_extra_timestep=needs_extra_timestep
1065
626
  )
1066
627
 
1067
- # Add to storage
1068
- self._time_series[name] = time_series
628
+ # Add to the collection
629
+ self.add_time_series(time_series)
1069
630
 
1070
- # Track if it needs extra timestep
1071
- if has_extra_timestep:
1072
- self._has_extra_timestep.add(name)
1073
-
1074
- # Return the TimeSeries object
1075
631
  return time_series
1076
632
 
1077
- def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None:
633
+ def calculate_aggregation_weights(self) -> Dict[str, float]:
634
+ """Calculate and return aggregation weights for all time series."""
635
+ self.group_weights = self._calculate_group_weights()
636
+ self.weights = self._calculate_weights()
637
+
638
+ if np.all(np.isclose(list(self.weights.values()), 1, atol=1e-6)):
639
+ logger.info('All Aggregation weights were set to 1')
640
+
641
+ return self.weights
642
+
643
+ def activate_timesteps(self, active_timesteps: Optional[pd.DatetimeIndex] = None):
1078
644
  """
1079
- Set active subset for timesteps and scenarios.
645
+ Update active timesteps for the collection and all time series.
646
+ If no arguments are provided, the active timesteps are reset.
1080
647
 
1081
648
  Args:
1082
- timesteps: Timesteps to activate, or None to clear
1083
- scenarios: Scenarios to activate, or None to clear
649
+ active_timesteps: The active timesteps of the model.
650
+ If None, the all timesteps of the TimeSeriesCollection are taken.
1084
651
  """
1085
- if timesteps is None:
1086
- self._selected_timesteps = None
1087
- self._selected_timesteps_extra = None
1088
- else:
1089
- self._selected_timesteps = self._validate_timesteps(timesteps, self._full_timesteps)
1090
- self._selected_timesteps_extra = self._create_timesteps_with_extra(
1091
- timesteps, self._calculate_hours_of_final_timestep(timesteps, self._full_timesteps)
1092
- )
652
+ if active_timesteps is None:
653
+ return self.reset()
1093
654
 
1094
- if scenarios is None:
1095
- self._selected_scenarios = None
1096
- else:
1097
- self._selected_scenarios = self._validate_scenarios(scenarios, self._full_scenarios)
655
+ if not np.all(np.isin(active_timesteps, self.all_timesteps)):
656
+ raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection')
1098
657
 
1099
- self._selected_hours_per_timestep = self.calculate_hours_per_timestep(self.timesteps_extra, self.scenarios)
658
+ # Calculate derived timesteps
659
+ self._active_timesteps = active_timesteps
660
+ first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0]
661
+ last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0]
662
+ self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2]
663
+ self._active_hours_per_timestep = self.all_hours_per_timestep.isel(time=slice(first_ts_index, last_ts_idx + 1))
1100
664
 
1101
- # Apply the selection to all TimeSeries objects
1102
- for ts_name, ts in self._time_series.items():
1103
- if ts.has_time_dim:
1104
- timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps
1105
- else:
1106
- timesteps = None
665
+ # Update all time series
666
+ self._update_time_series_timesteps()
1107
667
 
1108
- ts.set_selection(timesteps=timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None)
1109
- self._propagate_selection_to_time_series()
668
+ def reset(self):
669
+ """Reset active timesteps to defaults for all time series."""
670
+ self._active_timesteps = None
671
+ self._active_timesteps_extra = None
672
+ self._active_hours_per_timestep = None
1110
673
 
1111
- def as_dataset(self, with_extra_timestep: bool = True, with_constants: bool = True) -> xr.Dataset:
1112
- """
1113
- Convert the TimeSeriesCollection to a xarray Dataset, containing the data of each TimeSeries.
674
+ for time_series in self.time_series_data.values():
675
+ time_series.reset()
1114
676
 
1115
- Args:
1116
- with_extra_timestep: Whether to exclude the extra timesteps.
1117
- Effectively, this removes the last timestep for certain TimeSeries, but mitigates the presence of NANs in others.
1118
- with_constants: Whether to exclude TimeSeries with a constant value from the dataset.
1119
- """
1120
- if self.scenarios is None:
1121
- ds = xr.Dataset(coords={'time': self.timesteps_extra})
1122
- else:
1123
- ds = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra})
677
+ def restore_data(self):
678
+ """Restore original data for all time series."""
679
+ for time_series in self.time_series_data.values():
680
+ time_series.restore_data()
1124
681
 
1125
- for ts in self._time_series.values():
1126
- if not with_constants and ts.all_equal:
1127
- continue
1128
- ds[ts.name] = ts.selected_data
682
+ def add_time_series(self, time_series: TimeSeries):
683
+ """Add an existing TimeSeries to the collection."""
684
+ if time_series.name in self.time_series_data:
685
+ raise ValueError(f"TimeSeries '{time_series.name}' already exists in this collection")
1129
686
 
1130
- if not with_extra_timestep:
1131
- return ds.sel(time=self.timesteps)
687
+ self.time_series_data[time_series.name] = time_series
1132
688
 
1133
- return ds
689
+ def insert_new_data(self, data: pd.DataFrame, include_extra_timestep: bool = False):
690
+ """
691
+ Update time series with new data from a DataFrame.
1134
692
 
1135
- @property
1136
- def timesteps(self) -> pd.DatetimeIndex:
1137
- """Get the current active timesteps."""
1138
- if self._selected_timesteps is None:
1139
- return self._full_timesteps
1140
- return self._selected_timesteps
693
+ Args:
694
+ data: DataFrame containing new data with timestamps as index
695
+ include_extra_timestep: Whether the provided data already includes the extra timestep, by default False
696
+ """
697
+ if not isinstance(data, pd.DataFrame):
698
+ raise TypeError(f'data must be a pandas DataFrame, got {type(data).__name__}')
1141
699
 
1142
- @property
1143
- def timesteps_extra(self) -> pd.DatetimeIndex:
1144
- """Get the current active timesteps with extra timestep."""
1145
- if self._selected_timesteps_extra is None:
1146
- return self._full_timesteps_extra
1147
- return self._selected_timesteps_extra
700
+ # Check if the DataFrame index matches the expected timesteps
701
+ expected_timesteps = self.timesteps_extra if include_extra_timestep else self.timesteps
702
+ if not data.index.equals(expected_timesteps):
703
+ raise ValueError(
704
+ f'DataFrame index must match {"collection timesteps with extra timestep" if include_extra_timestep else "collection timesteps"}'
705
+ )
1148
706
 
1149
- @property
1150
- def hours_per_timestep(self) -> xr.DataArray:
1151
- """Get the current active hours per timestep."""
1152
- if self._selected_hours_per_timestep is None:
1153
- return self._full_hours_per_timestep
1154
- return self._selected_hours_per_timestep
707
+ for name, ts in self.time_series_data.items():
708
+ if name in data.columns:
709
+ if not ts.needs_extra_timestep:
710
+ # For time series without extra timestep
711
+ if include_extra_timestep:
712
+ # If data includes extra timestep but series doesn't need it, exclude the last point
713
+ ts.stored_data = data[name].iloc[:-1]
714
+ else:
715
+ # Use data as is
716
+ ts.stored_data = data[name]
717
+ else:
718
+ # For time series with extra timestep
719
+ if include_extra_timestep:
720
+ # Data already includes extra timestep
721
+ ts.stored_data = data[name]
722
+ else:
723
+ # Need to add extra timestep - extrapolate from the last value
724
+ extra_step_value = data[name].iloc[-1]
725
+ extra_step_index = pd.DatetimeIndex([self.timesteps_extra[-1]], name='time')
726
+ extra_step_series = pd.Series([extra_step_value], index=extra_step_index)
1155
727
 
1156
- @property
1157
- def scenarios(self) -> Optional[pd.Index]:
1158
- """Get the current active scenarios."""
1159
- if self._selected_scenarios is None:
1160
- return self._full_scenarios
1161
- return self._selected_scenarios
1162
-
1163
- def _propagate_selection_to_time_series(self) -> None:
1164
- """Apply the current selection to all TimeSeries objects."""
1165
- for ts_name, ts in self._time_series.items():
1166
- if ts.has_time_dim:
1167
- timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps
1168
- else:
1169
- timesteps = None
728
+ # Combine the regular data with the extra timestep
729
+ ts.stored_data = pd.concat([data[name], extra_step_series])
1170
730
 
1171
- ts.set_selection(timesteps=timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None)
731
+ logger.debug(f'Updated data for {name}')
1172
732
 
1173
- def __getitem__(self, name: str) -> TimeSeries:
733
+ def to_dataframe(
734
+ self, filtered: Literal['all', 'constant', 'non_constant'] = 'non_constant', include_extra_timestep: bool = True
735
+ ) -> pd.DataFrame:
1174
736
  """
1175
- Get a reference to a time series or data array.
737
+ Convert collection to DataFrame with optional filtering and timestep control.
1176
738
 
1177
739
  Args:
1178
- name: Name of the data array or time series
740
+ filtered: Filter time series by variability, by default 'non_constant'
741
+ include_extra_timestep: Whether to include the extra timestep in the result, by default True
1179
742
 
1180
743
  Returns:
1181
- TimeSeries object if it exists, otherwise DataArray with current selection applied
744
+ DataFrame representation of the collection
1182
745
  """
1183
- # First check if this is a TimeSeries
1184
- if name in self._time_series:
1185
- # Return the TimeSeries object (it will handle selection internally)
1186
- return self._time_series[name]
1187
- raise ValueError(f'No TimeSeries named "{name}" found')
1188
-
1189
- def __contains__(self, value) -> bool:
1190
- if isinstance(value, str):
1191
- return value in self._time_series
1192
- elif isinstance(value, TimeSeries):
1193
- return value.name in self._time_series
1194
- raise TypeError(f'Invalid type for __contains__ of {self.__class__.__name__}: {type(value)}')
746
+ include_constants = filtered != 'non_constant'
747
+ ds = self.to_dataset(include_constants=include_constants)
1195
748
 
1196
- def __iter__(self) -> Iterator[TimeSeries]:
1197
- """Iterate over TimeSeries objects."""
1198
- return iter(self._time_series.values())
749
+ if not include_extra_timestep:
750
+ ds = ds.isel(time=slice(None, -1))
751
+
752
+ df = ds.to_dataframe()
1199
753
 
1200
- def update_time_series(self, name: str, data: TimestepData) -> TimeSeries:
754
+ # Apply filtering
755
+ if filtered == 'all':
756
+ return df
757
+ elif filtered == 'constant':
758
+ return df.loc[:, df.nunique() == 1]
759
+ elif filtered == 'non_constant':
760
+ return df.loc[:, df.nunique() > 1]
761
+ else:
762
+ raise ValueError("filtered must be one of: 'all', 'constant', 'non_constant'")
763
+
764
+ def to_dataset(self, include_constants: bool = True) -> xr.Dataset:
1201
765
  """
1202
- Update an existing TimeSeries with new data.
766
+ Combine all time series into a single Dataset with all timesteps.
1203
767
 
1204
768
  Args:
1205
- name: Name of the TimeSeries to update
1206
- data: New data to assign
769
+ include_constants: Whether to include time series with constant values, by default True
1207
770
 
1208
771
  Returns:
1209
- The updated TimeSeries
1210
-
1211
- Raises:
1212
- KeyError: If no TimeSeries with the given name exists
772
+ Dataset containing all selected time series with all timesteps
1213
773
  """
1214
- if name not in self._time_series:
1215
- raise KeyError(f"No TimeSeries named '{name}' found")
1216
-
1217
- # Get the TimeSeries
1218
- ts = self._time_series[name]
1219
-
1220
- # Determine which timesteps to use if the series has a time dimension
1221
- if ts.has_time_dim:
1222
- target_timesteps = self.timesteps_extra if name in self._has_extra_timestep else self.timesteps
774
+ # Determine which series to include
775
+ if include_constants:
776
+ series_to_include = self.time_series_data.values()
1223
777
  else:
1224
- target_timesteps = None
778
+ series_to_include = self.non_constants
1225
779
 
1226
- # Convert data to proper format
1227
- data_array = DataConverter.as_dataarray(
1228
- data, timesteps=target_timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None
1229
- )
780
+ # Create individual datasets and merge them
781
+ ds = xr.merge([ts.active_data.to_dataset(name=ts.name) for ts in series_to_include])
1230
782
 
1231
- # Update the TimeSeries
1232
- ts.update_stored_data(data_array)
783
+ # Ensure the correct time coordinates
784
+ ds = ds.reindex(time=self.timesteps_extra)
1233
785
 
1234
- return ts
786
+ ds.attrs.update(
787
+ {
788
+ 'timesteps_extra': f'{self.timesteps_extra[0]} ... {self.timesteps_extra[-1]} | len={len(self.timesteps_extra)}',
789
+ 'hours_per_timestep': self._format_stats(self.hours_per_timestep),
790
+ }
791
+ )
1235
792
 
1236
- def calculate_aggregation_weights(self) -> Dict[str, float]:
1237
- """Calculate and return aggregation weights for all time series."""
1238
- group_weights = self._calculate_group_weights()
793
+ return ds
1239
794
 
1240
- weights = {}
1241
- for name, ts in self._time_series.items():
1242
- if ts.aggregation_group is not None:
1243
- # Use group weight
1244
- weights[name] = group_weights.get(ts.aggregation_group, 1)
795
+ def _update_time_series_timesteps(self):
796
+ """Update active timesteps for all time series."""
797
+ for ts in self.time_series_data.values():
798
+ if ts.needs_extra_timestep:
799
+ ts.active_timesteps = self.timesteps_extra
1245
800
  else:
1246
- # Use individual weight or default to 1
1247
- weights[name] = ts.aggregation_weight or 1
1248
-
1249
- if np.all(np.isclose(list(weights.values()), 1, atol=1e-6)):
1250
- logger.info('All Aggregation weights were set to 1')
1251
-
1252
- return weights
1253
-
1254
- def _calculate_group_weights(self) -> Dict[str, float]:
1255
- """Calculate weights for aggregation groups."""
1256
- # Count series in each group
1257
- groups = [ts.aggregation_group for ts in self._time_series.values() if ts.aggregation_group is not None]
1258
- group_counts = Counter(groups)
1259
-
1260
- # Calculate weight for each group (1/count)
1261
- return {group: 1 / count for group, count in group_counts.items()}
801
+ ts.active_timesteps = self.timesteps
1262
802
 
1263
803
  @staticmethod
1264
- def _validate_timesteps(
1265
- timesteps: pd.DatetimeIndex, present_timesteps: Optional[pd.DatetimeIndex] = None
1266
- ) -> pd.DatetimeIndex:
1267
- """
1268
- Validate timesteps format and rename if needed.
1269
- Args:
1270
- timesteps: The timesteps to validate
1271
- present_timesteps: The timesteps that are present in the dataset
1272
-
1273
- Raises:
1274
- ValueError: If timesteps is not a pandas DatetimeIndex
1275
- ValueError: If timesteps is not at least 2 timestamps
1276
- ValueError: If timesteps has a different name than 'time'
1277
- ValueError: If timesteps is not sorted
1278
- ValueError: If timesteps contains duplicates
1279
- ValueError: If timesteps is not a subset of present_timesteps
1280
- """
804
+ def _validate_timesteps(timesteps: pd.DatetimeIndex):
805
+ """Validate timesteps format and rename if needed."""
1281
806
  if not isinstance(timesteps, pd.DatetimeIndex):
1282
807
  raise TypeError('timesteps must be a pandas DatetimeIndex')
1283
808
 
@@ -1286,61 +811,22 @@ class TimeSeriesCollection:
1286
811
 
1287
812
  # Ensure timesteps has the required name
1288
813
  if timesteps.name != 'time':
1289
- logger.debug('Renamed timesteps to "time" (was "%s")', timesteps.name)
814
+ logger.warning('Renamed timesteps to "time" (was "%s")', timesteps.name)
1290
815
  timesteps.name = 'time'
1291
816
 
1292
- # Ensure timesteps is sorted
1293
- if not timesteps.is_monotonic_increasing:
1294
- raise ValueError('timesteps must be sorted')
1295
-
1296
- # Ensure timesteps has no duplicates
1297
- if len(timesteps) != len(timesteps.drop_duplicates()):
1298
- raise ValueError('timesteps must not contain duplicates')
1299
-
1300
- # Ensure timesteps is a subset of present_timesteps
1301
- if present_timesteps is not None and not set(timesteps).issubset(set(present_timesteps)):
1302
- raise ValueError('timesteps must be a subset of present_timesteps')
1303
-
1304
- return timesteps
1305
-
1306
817
  @staticmethod
1307
- def _validate_scenarios(scenarios: pd.Index, present_scenarios: Optional[pd.Index] = None) -> Optional[pd.Index]:
1308
- """
1309
- Validate scenario format and rename if needed.
1310
- Args:
1311
- scenarios: The scenarios to validate
1312
- present_scenarios: The present_scenarios that are present in the dataset
1313
-
1314
- Raises:
1315
- ValueError: If timesteps is not a pandas DatetimeIndex
1316
- ValueError: If timesteps is not at least 2 timestamps
1317
- ValueError: If timesteps has a different name than 'time'
1318
- ValueError: If timesteps is not sorted
1319
- ValueError: If timesteps contains duplicates
1320
- ValueError: If timesteps is not a subset of present_timesteps
1321
- """
1322
- if scenarios is None:
1323
- return None
1324
-
1325
- if not isinstance(scenarios, pd.Index):
1326
- logger.warning('Converting scenarios to pandas.Index')
1327
- scenarios = pd.Index(scenarios, name='scenario')
1328
-
1329
- # Ensure timesteps has the required name
1330
- if scenarios.name != 'scenario':
1331
- logger.debug('Renamed scenarios to "scneario" (was "%s")', scenarios.name)
1332
- scenarios.name = 'scenario'
1333
-
1334
- # Ensure timesteps is a subset of present_timesteps
1335
- if present_scenarios is not None and not set(scenarios).issubset(set(present_scenarios)):
1336
- raise ValueError('scenarios must be a subset of present_scenarios')
1337
-
1338
- return scenarios
1339
-
1340
- @staticmethod
1341
- def _create_timesteps_with_extra(timesteps: pd.DatetimeIndex, hours_of_last_timestep: float) -> pd.DatetimeIndex:
818
+ def _create_timesteps_with_extra(
819
+ timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float]
820
+ ) -> pd.DatetimeIndex:
1342
821
  """Create timesteps with an extra step at the end."""
1343
- last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time')
822
+ if hours_of_last_timestep is not None:
823
+ # Create the extra timestep using the specified duration
824
+ last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time')
825
+ else:
826
+ # Use the last interval as the extra timestep duration
827
+ last_date = pd.DatetimeIndex([timesteps[-1] + (timesteps[-1] - timesteps[-2])], name='time')
828
+
829
+ # Combine with original timesteps
1344
830
  return pd.DatetimeIndex(timesteps.append(last_date), name='time')
1345
831
 
1346
832
  @staticmethod
@@ -1356,130 +842,137 @@ class TimeSeriesCollection:
1356
842
  return first_interval.total_seconds() / 3600 # Convert to hours
1357
843
 
1358
844
  @staticmethod
1359
- def _calculate_hours_of_final_timestep(
1360
- timesteps: pd.DatetimeIndex,
1361
- timesteps_superset: Optional[pd.DatetimeIndex] = None,
1362
- hours_of_final_timestep: Optional[float] = None,
1363
- ) -> float:
1364
- """
1365
- Calculate duration of the final timestep.
1366
- If timesteps_subset is provided, the final timestep is calculated for this subset.
1367
- The hours_of_final_timestep is only used if the final timestep cant be determined from the timesteps.
845
+ def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray:
846
+ """Calculate duration of each timestep."""
847
+ # Calculate differences between consecutive timestamps
848
+ hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1)
1368
849
 
1369
- Args:
1370
- timesteps: The full timesteps
1371
- timesteps_subset: The subset of timesteps
1372
- hours_of_final_timestep: The duration of the final timestep, if already known
850
+ return xr.DataArray(
851
+ data=hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=('time',), name='hours_per_step'
852
+ )
1373
853
 
1374
- Returns:
1375
- The duration of the final timestep in hours
854
+ def _calculate_group_weights(self) -> Dict[str, float]:
855
+ """Calculate weights for aggregation groups."""
856
+ # Count series in each group
857
+ groups = [ts.aggregation_group for ts in self.time_series_data.values() if ts.aggregation_group is not None]
858
+ group_counts = Counter(groups)
1376
859
 
1377
- Raises:
1378
- ValueError: If the provided timesteps_subset does not end before the timesteps superset
1379
- """
1380
- if timesteps_superset is None:
1381
- if hours_of_final_timestep is not None:
1382
- return hours_of_final_timestep
1383
- return (timesteps[-1] - timesteps[-2]) / pd.Timedelta(hours=1)
860
+ # Calculate weight for each group (1/count)
861
+ return {group: 1 / count for group, count in group_counts.items()}
1384
862
 
1385
- final_timestep = timesteps[-1]
863
+ def _calculate_weights(self) -> Dict[str, float]:
864
+ """Calculate weights for all time series."""
865
+ # Calculate weight for each time series
866
+ weights = {}
867
+ for name, ts in self.time_series_data.items():
868
+ if ts.aggregation_group is not None:
869
+ # Use group weight
870
+ weights[name] = self.group_weights.get(ts.aggregation_group, 1)
871
+ else:
872
+ # Use individual weight or default to 1
873
+ weights[name] = ts.aggregation_weight or 1
1386
874
 
1387
- if timesteps_superset[-1] == final_timestep:
1388
- if hours_of_final_timestep is not None:
1389
- return hours_of_final_timestep
1390
- return (timesteps_superset[-1] - timesteps_superset[-2]) / pd.Timedelta(hours=1)
875
+ return weights
1391
876
 
1392
- elif timesteps_superset[-1] <= final_timestep:
1393
- raise ValueError(
1394
- f'The provided timesteps ({timesteps}) end after the provided timesteps_superset ({timesteps_superset})'
1395
- )
877
+ def _format_stats(self, data) -> str:
878
+ """Format statistics for a data array."""
879
+ if hasattr(data, 'values'):
880
+ values = data.values
1396
881
  else:
1397
- # Get the first timestep in the superset that is after the final timestep of the subset
1398
- extra_timestep = timesteps_superset[timesteps_superset > final_timestep].min()
1399
- return (extra_timestep - final_timestep) / pd.Timedelta(hours=1)
882
+ values = np.asarray(data)
1400
883
 
1401
- @staticmethod
1402
- def calculate_hours_per_timestep(
1403
- timesteps_extra: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None
1404
- ) -> xr.DataArray:
1405
- """Calculate duration of each timestep."""
1406
- # Calculate differences between consecutive timestamps
1407
- hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1)
884
+ mean_val = np.mean(values)
885
+ min_val = np.min(values)
886
+ max_val = np.max(values)
1408
887
 
1409
- return DataConverter.as_dataarray(
1410
- hours_per_step,
1411
- timesteps=timesteps_extra[:-1],
1412
- scenarios=scenarios,
1413
- ).rename('hours_per_step')
888
+ return f'mean: {mean_val:.2f}, min: {min_val:.2f}, max: {max_val:.2f}'
1414
889
 
890
+ def __getitem__(self, name: str) -> TimeSeries:
891
+ """Get a TimeSeries by name."""
892
+ try:
893
+ return self.time_series_data[name]
894
+ except KeyError as e:
895
+ raise KeyError(f'TimeSeries "{name}" not found in the TimeSeriesCollection') from e
1415
896
 
1416
- def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10, by_scenario: bool = False) -> str:
1417
- """
1418
- Calculates the mean, median, min, max, and standard deviation of a numeric DataArray.
897
+ def __iter__(self) -> Iterator[TimeSeries]:
898
+ """Iterate through all TimeSeries in the collection."""
899
+ return iter(self.time_series_data.values())
1419
900
 
1420
- Args:
1421
- data: The DataArray to analyze
1422
- decimals: Number of decimal places to show
1423
- padd: Padding for alignment
1424
- by_scenario: Whether to break down stats by scenario
901
+ def __len__(self) -> int:
902
+ """Get the number of TimeSeries in the collection."""
903
+ return len(self.time_series_data)
1425
904
 
1426
- Returns:
1427
- String representation of data statistics
1428
- """
1429
- format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f'
905
+ def __contains__(self, item: Union[str, TimeSeries]) -> bool:
906
+ """Check if a TimeSeries exists in the collection."""
907
+ if isinstance(item, str):
908
+ return item in self.time_series_data
909
+ elif isinstance(item, TimeSeries):
910
+ return any([item is ts for ts in self.time_series_data.values()])
911
+ return False
1430
912
 
1431
- # If by_scenario is True and there's a scenario dimension with multiple values
1432
- if by_scenario and 'scenario' in data.dims and data.sizes['scenario'] > 1:
1433
- results = []
1434
- for scenario in data.coords['scenario'].values:
1435
- scenario_data = data.sel(scenario=scenario)
1436
- if np.unique(scenario_data).size == 1:
1437
- results.append(f' {scenario}: {scenario_data.max().item():{format_spec}} (constant)')
1438
- else:
1439
- mean = scenario_data.mean().item()
1440
- median = scenario_data.median().item()
1441
- min_val = scenario_data.min().item()
1442
- max_val = scenario_data.max().item()
1443
- std = scenario_data.std().item()
1444
- results.append(
1445
- f' {scenario}: {mean:{format_spec}} (mean), {median:{format_spec}} (median), '
1446
- f'{min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
1447
- )
1448
- return '\n'.join(['By scenario:'] + results)
1449
-
1450
- # Standard logic for non-scenario data or aggregated stats
913
+ @property
914
+ def non_constants(self) -> List[TimeSeries]:
915
+ """Get time series with varying values."""
916
+ return [ts for ts in self.time_series_data.values() if not ts.all_equal]
917
+
918
+ @property
919
+ def constants(self) -> List[TimeSeries]:
920
+ """Get time series with constant values."""
921
+ return [ts for ts in self.time_series_data.values() if ts.all_equal]
922
+
923
+ @property
924
+ def timesteps(self) -> pd.DatetimeIndex:
925
+ """Get the active timesteps."""
926
+ return self.all_timesteps if self._active_timesteps is None else self._active_timesteps
927
+
928
+ @property
929
+ def timesteps_extra(self) -> pd.DatetimeIndex:
930
+ """Get the active timesteps with extra step."""
931
+ return self.all_timesteps_extra if self._active_timesteps_extra is None else self._active_timesteps_extra
932
+
933
+ @property
934
+ def hours_per_timestep(self) -> xr.DataArray:
935
+ """Get the duration of each active timestep."""
936
+ return (
937
+ self.all_hours_per_timestep if self._active_hours_per_timestep is None else self._active_hours_per_timestep
938
+ )
939
+
940
+ @property
941
+ def hours_of_last_timestep(self) -> float:
942
+ """Get the duration of the last timestep."""
943
+ return float(self.hours_per_timestep[-1].item())
944
+
945
+ def __repr__(self):
946
+ return f'TimeSeriesCollection:\n{self.to_dataset()}'
947
+
948
+ def __str__(self):
949
+ longest_name = max([time_series.name for time_series in self.time_series_data], key=len)
950
+
951
+ stats_summary = '\n'.join(
952
+ [
953
+ f' - {time_series.name:<{len(longest_name)}}: {get_numeric_stats(time_series.active_data)}'
954
+ for time_series in self.time_series_data
955
+ ]
956
+ )
957
+
958
+ return (
959
+ f'TimeSeriesCollection with {len(self.time_series_data)} series\n'
960
+ f' Time Range: {self.timesteps[0]} → {self.timesteps[-1]}\n'
961
+ f' No. of timesteps: {len(self.timesteps)} + 1 extra\n'
962
+ f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n'
963
+ f' Time Series Data:\n'
964
+ f'{stats_summary}'
965
+ )
966
+
967
+
968
+ def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10) -> str:
969
+ """Calculates the mean, median, min, max, and standard deviation of a numeric DataArray."""
970
+ format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f'
1451
971
  if np.unique(data).size == 1:
1452
972
  return f'{data.max().item():{format_spec}} (constant)'
1453
-
1454
973
  mean = data.mean().item()
1455
974
  median = data.median().item()
1456
975
  min_val = data.min().item()
1457
976
  max_val = data.max().item()
1458
977
  std = data.std().item()
1459
-
1460
978
  return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
1461
-
1462
-
1463
- def extract_data(
1464
- data: Optional[Union[int, float, xr.DataArray, TimeSeries]],
1465
- if_none: Any = None
1466
- ) -> Any:
1467
- """
1468
- Convert data to xr.DataArray.
1469
-
1470
- Args:
1471
- data: The data to convert (scalar, array, or DataArray)
1472
- if_none: The value to return if data is None
1473
-
1474
- Returns:
1475
- DataArray with the converted data, or the value specified by if_none
1476
- """
1477
- if data is None:
1478
- return if_none
1479
- if isinstance(data, TimeSeries):
1480
- return data.selected_data
1481
- if isinstance(data, xr.DataArray):
1482
- return data
1483
- if isinstance(data, (int, float, np.integer, np.floating)):
1484
- return data
1485
- raise TypeError(f'Unsupported data type: {type(data).__name__}')