flixopt 2.2.0rc2__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flixopt might be problematic. Click here for more details.

Files changed (58) hide show
  1. flixopt/__init__.py +33 -4
  2. flixopt/aggregation.py +60 -80
  3. flixopt/calculation.py +395 -178
  4. flixopt/commons.py +1 -10
  5. flixopt/components.py +939 -448
  6. flixopt/config.py +553 -191
  7. flixopt/core.py +513 -846
  8. flixopt/effects.py +644 -178
  9. flixopt/elements.py +610 -355
  10. flixopt/features.py +394 -966
  11. flixopt/flow_system.py +736 -219
  12. flixopt/interface.py +1104 -302
  13. flixopt/io.py +103 -79
  14. flixopt/linear_converters.py +387 -95
  15. flixopt/modeling.py +759 -0
  16. flixopt/network_app.py +73 -39
  17. flixopt/plotting.py +294 -138
  18. flixopt/results.py +1253 -299
  19. flixopt/solvers.py +25 -21
  20. flixopt/structure.py +938 -396
  21. flixopt/utils.py +38 -12
  22. flixopt-3.0.0.dist-info/METADATA +209 -0
  23. flixopt-3.0.0.dist-info/RECORD +26 -0
  24. flixopt-3.0.0.dist-info/top_level.txt +1 -0
  25. docs/examples/00-Minimal Example.md +0 -5
  26. docs/examples/01-Basic Example.md +0 -5
  27. docs/examples/02-Complex Example.md +0 -10
  28. docs/examples/03-Calculation Modes.md +0 -5
  29. docs/examples/index.md +0 -5
  30. docs/faq/contribute.md +0 -61
  31. docs/faq/index.md +0 -3
  32. docs/images/architecture_flixOpt-pre2.0.0.png +0 -0
  33. docs/images/architecture_flixOpt.png +0 -0
  34. docs/images/flixopt-icon.svg +0 -1
  35. docs/javascripts/mathjax.js +0 -18
  36. docs/user-guide/Mathematical Notation/Bus.md +0 -33
  37. docs/user-guide/Mathematical Notation/Effects, Penalty & Objective.md +0 -132
  38. docs/user-guide/Mathematical Notation/Flow.md +0 -26
  39. docs/user-guide/Mathematical Notation/LinearConverter.md +0 -21
  40. docs/user-guide/Mathematical Notation/Piecewise.md +0 -49
  41. docs/user-guide/Mathematical Notation/Storage.md +0 -44
  42. docs/user-guide/Mathematical Notation/index.md +0 -22
  43. docs/user-guide/Mathematical Notation/others.md +0 -3
  44. docs/user-guide/index.md +0 -124
  45. flixopt/config.yaml +0 -10
  46. flixopt-2.2.0rc2.dist-info/METADATA +0 -167
  47. flixopt-2.2.0rc2.dist-info/RECORD +0 -54
  48. flixopt-2.2.0rc2.dist-info/top_level.txt +0 -5
  49. pics/architecture_flixOpt-pre2.0.0.png +0 -0
  50. pics/architecture_flixOpt.png +0 -0
  51. pics/flixOpt_plotting.jpg +0 -0
  52. pics/flixopt-icon.svg +0 -1
  53. pics/pics.pptx +0 -0
  54. scripts/extract_release_notes.py +0 -45
  55. scripts/gen_ref_pages.py +0 -54
  56. tests/ressources/Zeitreihen2020.csv +0 -35137
  57. {flixopt-2.2.0rc2.dist-info → flixopt-3.0.0.dist-info}/WHEEL +0 -0
  58. {flixopt-2.2.0rc2.dist-info → flixopt-3.0.0.dist-info}/licenses/LICENSE +0 -0
flixopt/core.py CHANGED
@@ -3,12 +3,10 @@ This module contains the core functionality of the flixopt framework.
3
3
  It provides Datatypes, logging functionality, and some functions to transform data structures.
4
4
  """
5
5
 
6
- import inspect
7
- import json
8
6
  import logging
9
- import pathlib
10
- from collections import Counter
11
- from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
7
+ import warnings
8
+ from itertools import permutations
9
+ from typing import Literal, Union
12
10
 
13
11
  import numpy as np
14
12
  import pandas as pd
@@ -16,14 +14,17 @@ import xarray as xr
16
14
 
17
15
  logger = logging.getLogger('flixopt')
18
16
 
19
- Scalar = Union[int, float]
20
- """A type representing a single number, either integer or float."""
17
+ Scalar = int | float
18
+ """A single number, either integer or float."""
21
19
 
22
- NumericData = Union[int, float, np.integer, np.floating, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray]
23
- """Represents any form of numeric data, from simple scalars to complex data structures."""
20
+ PeriodicDataUser = int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray
21
+ """User data which has no time dimension. Internally converted to a Scalar or an xr.DataArray without a time dimension."""
24
22
 
25
- NumericDataTS = Union[NumericData, 'TimeSeriesData']
26
- """Represents either standard numeric data or TimeSeriesData."""
23
+ PeriodicData = xr.DataArray
24
+ """Internally used datatypes for periodic data."""
25
+
26
+ FlowSystemDimensions = Literal['time', 'period', 'scenario']
27
+ """Possible dimensions of a FlowSystem."""
27
28
 
28
29
 
29
30
  class PlausibilityError(Exception):
@@ -38,941 +39,607 @@ class ConversionError(Exception):
38
39
  pass
39
40
 
40
41
 
41
- class DataConverter:
42
- """
43
- Converts various data types into xarray.DataArray with a timesteps index.
44
-
45
- Supports: scalars, arrays, Series, DataFrames, and DataArrays.
46
- """
47
-
48
- @staticmethod
49
- def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex) -> xr.DataArray:
50
- """Convert data to xarray.DataArray with specified timesteps index."""
51
- if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0:
52
- raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}')
53
- if not timesteps.name == 'time':
54
- raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}')
55
-
56
- coords = [timesteps]
57
- dims = ['time']
58
- expected_shape = (len(timesteps),)
42
+ class TimeSeriesData(xr.DataArray):
43
+ """Minimal TimeSeriesData that inherits from xr.DataArray with aggregation metadata."""
59
44
 
60
- try:
61
- if isinstance(data, (int, float, np.integer, np.floating)):
62
- return xr.DataArray(data, coords=coords, dims=dims)
63
- elif isinstance(data, pd.DataFrame):
64
- if not data.index.equals(timesteps):
65
- raise ConversionError(
66
- f"DataFrame index doesn't match timesteps index. "
67
- f'Its missing the following time steps: {timesteps.difference(data.index)}. '
68
- f'Some parameters might need an extra timestep at the end.'
69
- )
70
- if not len(data.columns) == 1:
71
- raise ConversionError('DataFrame must have exactly one column')
72
- return xr.DataArray(data.values.flatten(), coords=coords, dims=dims)
73
- elif isinstance(data, pd.Series):
74
- if not data.index.equals(timesteps):
75
- raise ConversionError(
76
- f"Series index doesn't match timesteps index. "
77
- f'Its missing the following time steps: {timesteps.difference(data.index)}. '
78
- f'Some parameters might need an extra timestep at the end.'
79
- )
80
- return xr.DataArray(data.values, coords=coords, dims=dims)
81
- elif isinstance(data, np.ndarray):
82
- if data.ndim != 1:
83
- raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}')
84
- elif data.shape[0] != expected_shape[0]:
85
- raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}")
86
- return xr.DataArray(data, coords=coords, dims=dims)
87
- elif isinstance(data, xr.DataArray):
88
- if data.dims != tuple(dims):
89
- raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}")
90
- if data.sizes[dims[0]] != len(coords[0]):
91
- raise ConversionError(
92
- f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}"
93
- )
94
- return data.copy(deep=True)
95
- else:
96
- raise ConversionError(f'Unsupported type: {type(data).__name__}')
97
- except Exception as e:
98
- if isinstance(e, ConversionError):
99
- raise
100
- raise ConversionError(f'Converting data {type(data)} to xarray.Dataset raised an error: {str(e)}') from e
101
-
102
-
103
- class TimeSeriesData:
104
- # TODO: Move to Interface.py
105
- def __init__(self, data: NumericData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None):
106
- """
107
- timeseries class for transmit timeseries AND special characteristics of timeseries,
108
- i.g. to define weights needed in calculation_type 'aggregated'
109
- EXAMPLE solar:
110
- you have several solar timeseries. These should not be overweighted
111
- compared to the remaining timeseries (i.g. heat load, price)!
112
- fixed_relative_profile_solar1 = TimeSeriesData(sol_array_1, type = 'solar')
113
- fixed_relative_profile_solar2 = TimeSeriesData(sol_array_2, type = 'solar')
114
- fixed_relative_profile_solar3 = TimeSeriesData(sol_array_3, type = 'solar')
115
- --> this 3 series of same type share one weight, i.e. internally assigned each weight = 1/3
116
- (instead of standard weight = 1)
117
-
118
- Args:
119
- data: The timeseries data, which can be a scalar, array, or numpy array.
120
- agg_group: The group this TimeSeriesData is a part of. agg_weight is split between members of a group. Default is None.
121
- agg_weight: The weight for calculation_type 'aggregated', should be between 0 and 1. Default is None.
122
-
123
- Raises:
124
- Exception: If both agg_group and agg_weight are set, an exception is raised.
125
- """
126
- self.data = data
127
- self.agg_group = agg_group
128
- self.agg_weight = agg_weight
129
- if (agg_group is not None) and (agg_weight is not None):
130
- raise ValueError('Either <agg_group> or explicit <agg_weigth> can be used. Not both!')
131
- self.label: Optional[str] = None
132
-
133
- def __repr__(self):
134
- # Get the constructor arguments and their current values
135
- init_signature = inspect.signature(self.__init__)
136
- init_args = init_signature.parameters
137
-
138
- # Create a dictionary with argument names and their values
139
- args_str = ', '.join(f'{name}={repr(getattr(self, name, None))}' for name in init_args if name != 'self')
140
- return f'{self.__class__.__name__}({args_str})'
141
-
142
- def __str__(self):
143
- return str(self.data)
144
-
145
-
146
- class TimeSeries:
147
- """
148
- A class representing time series data with active and stored states.
149
-
150
- TimeSeries provides a way to store time-indexed data and work with temporal subsets.
151
- It supports arithmetic operations, aggregation, and JSON serialization.
152
-
153
- Attributes:
154
- name (str): The name of the time series
155
- aggregation_weight (Optional[float]): Weight used for aggregation
156
- aggregation_group (Optional[str]): Group name for shared aggregation weighting
157
- needs_extra_timestep (bool): Whether this series needs an extra timestep
158
- """
159
-
160
- @classmethod
161
- def from_datasource(
162
- cls,
163
- data: NumericData,
164
- name: str,
165
- timesteps: pd.DatetimeIndex,
166
- aggregation_weight: Optional[float] = None,
167
- aggregation_group: Optional[str] = None,
168
- needs_extra_timestep: bool = False,
169
- ) -> 'TimeSeries':
170
- """
171
- Initialize the TimeSeries from multiple data sources.
172
-
173
- Args:
174
- data: The time series data
175
- name: The name of the TimeSeries
176
- timesteps: The timesteps of the TimeSeries
177
- aggregation_weight: The weight in aggregation calculations
178
- aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing
179
- needs_extra_timestep: Whether this series requires an extra timestep
180
-
181
- Returns:
182
- A new TimeSeries instance
183
- """
184
- return cls(
185
- DataConverter.as_dataarray(data, timesteps),
186
- name,
187
- aggregation_weight,
188
- aggregation_group,
189
- needs_extra_timestep,
190
- )
191
-
192
- @classmethod
193
- def from_json(cls, data: Optional[Dict[str, Any]] = None, path: Optional[str] = None) -> 'TimeSeries':
194
- """
195
- Load a TimeSeries from a dictionary or json file.
196
-
197
- Args:
198
- data: Dictionary containing TimeSeries data
199
- path: Path to a JSON file containing TimeSeries data
200
-
201
- Returns:
202
- A new TimeSeries instance
203
-
204
- Raises:
205
- ValueError: If both path and data are provided or neither is provided
206
- """
207
- if (path is None and data is None) or (path is not None and data is not None):
208
- raise ValueError("Exactly one of 'path' or 'data' must be provided")
209
-
210
- if path is not None:
211
- with open(path, 'r') as f:
212
- data = json.load(f)
213
-
214
- # Convert ISO date strings to datetime objects
215
- data['data']['coords']['time']['data'] = pd.to_datetime(data['data']['coords']['time']['data'])
216
-
217
- # Create the TimeSeries instance
218
- return cls(
219
- data=xr.DataArray.from_dict(data['data']),
220
- name=data['name'],
221
- aggregation_weight=data['aggregation_weight'],
222
- aggregation_group=data['aggregation_group'],
223
- needs_extra_timestep=data['needs_extra_timestep'],
224
- )
45
+ __slots__ = () # No additional instance attributes - everything goes in attrs
225
46
 
226
47
  def __init__(
227
48
  self,
228
- data: xr.DataArray,
229
- name: str,
230
- aggregation_weight: Optional[float] = None,
231
- aggregation_group: Optional[str] = None,
232
- needs_extra_timestep: bool = False,
49
+ *args,
50
+ aggregation_group: str | None = None,
51
+ aggregation_weight: float | None = None,
52
+ agg_group: str | None = None,
53
+ agg_weight: float | None = None,
54
+ **kwargs,
233
55
  ):
234
56
  """
235
- Initialize a TimeSeries with a DataArray.
236
-
237
- Args:
238
- data: The DataArray containing time series data
239
- name: The name of the TimeSeries
240
- aggregation_weight: The weight in aggregation calculations
241
- aggregation_group: Group this TimeSeries belongs to for weight sharing
242
- needs_extra_timestep: Whether this series requires an extra timestep
243
-
244
- Raises:
245
- ValueError: If data doesn't have a 'time' index or has more than 1 dimension
246
- """
247
- if 'time' not in data.indexes:
248
- raise ValueError(f'DataArray must have a "time" index. Got {data.indexes}')
249
- if data.ndim > 1:
250
- raise ValueError(f'Number of dimensions of DataArray must be 1. Got {data.ndim}')
251
-
252
- self.name = name
253
- self.aggregation_weight = aggregation_weight
254
- self.aggregation_group = aggregation_group
255
- self.needs_extra_timestep = needs_extra_timestep
256
-
257
- # Data management
258
- self._stored_data = data.copy(deep=True)
259
- self._backup = self._stored_data.copy(deep=True)
260
- self._active_timesteps = self._stored_data.indexes['time']
261
- self._active_data = None
262
- self._update_active_data()
263
-
264
- def reset(self):
265
- """
266
- Reset active timesteps to the full set of stored timesteps.
267
- """
268
- self.active_timesteps = None
269
-
270
- def restore_data(self):
271
- """
272
- Restore stored_data from the backup and reset active timesteps.
273
- """
274
- self._stored_data = self._backup.copy(deep=True)
275
- self.reset()
276
-
277
- def to_json(self, path: Optional[pathlib.Path] = None) -> Dict[str, Any]:
278
- """
279
- Save the TimeSeries to a dictionary or JSON file.
280
-
281
57
  Args:
282
- path: Optional path to save JSON file
283
-
284
- Returns:
285
- Dictionary representation of the TimeSeries
286
- """
287
- data = {
288
- 'name': self.name,
289
- 'aggregation_weight': self.aggregation_weight,
290
- 'aggregation_group': self.aggregation_group,
291
- 'needs_extra_timestep': self.needs_extra_timestep,
292
- 'data': self.active_data.to_dict(),
293
- }
294
-
295
- # Convert datetime objects to ISO strings
296
- data['data']['coords']['time']['data'] = [date.isoformat() for date in data['data']['coords']['time']['data']]
297
-
298
- # Save to file if path is provided
299
- if path is not None:
300
- indent = 4 if len(self.active_timesteps) <= 480 else None
301
- with open(path, 'w', encoding='utf-8') as f:
302
- json.dump(data, f, indent=indent, ensure_ascii=False)
303
-
304
- return data
305
-
306
- @property
307
- def stats(self) -> str:
308
- """
309
- Return a statistical summary of the active data.
310
-
311
- Returns:
312
- String representation of data statistics
313
- """
314
- return get_numeric_stats(self.active_data, padd=0)
315
-
316
- def _update_active_data(self):
317
- """
318
- Update the active data based on active_timesteps.
319
- """
320
- self._active_data = self._stored_data.sel(time=self.active_timesteps)
58
+ *args: Arguments passed to DataArray
59
+ aggregation_group: Aggregation group name
60
+ aggregation_weight: Aggregation weight (0-1)
61
+ agg_group: Deprecated, use aggregation_group instead
62
+ agg_weight: Deprecated, use aggregation_weight instead
63
+ **kwargs: Additional arguments passed to DataArray
64
+ """
65
+ if agg_group is not None:
66
+ warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2)
67
+ aggregation_group = agg_group
68
+ if agg_weight is not None:
69
+ warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2)
70
+ aggregation_weight = agg_weight
71
+
72
+ if (aggregation_group is not None) and (aggregation_weight is not None):
73
+ raise ValueError('Use either aggregation_group or aggregation_weight, not both')
74
+
75
+ # Let xarray handle all the initialization complexity
76
+ super().__init__(*args, **kwargs)
77
+
78
+ # Add our metadata to attrs after initialization
79
+ if aggregation_group is not None:
80
+ self.attrs['aggregation_group'] = aggregation_group
81
+ if aggregation_weight is not None:
82
+ self.attrs['aggregation_weight'] = aggregation_weight
83
+
84
+ # Always mark as TimeSeriesData
85
+ self.attrs['__timeseries_data__'] = True
86
+
87
+ def fit_to_coords(
88
+ self,
89
+ coords: dict[str, pd.Index],
90
+ name: str | None = None,
91
+ ) -> 'TimeSeriesData':
92
+ """Fit the data to the given coordinates. Returns a new TimeSeriesData object if the current coords are different."""
93
+ if self.coords.equals(xr.Coordinates(coords)):
94
+ return self
95
+
96
+ da = DataConverter.to_dataarray(self.data, coords=coords)
97
+ return self.__class__(
98
+ da,
99
+ aggregation_group=self.aggregation_group,
100
+ aggregation_weight=self.aggregation_weight,
101
+ name=name if name is not None else self.name,
102
+ )
321
103
 
322
104
  @property
323
- def all_equal(self) -> bool:
324
- """Check if all values in the series are equal."""
325
- return np.unique(self.active_data.values).size == 1
105
+ def aggregation_group(self) -> str | None:
106
+ return self.attrs.get('aggregation_group')
326
107
 
327
108
  @property
328
- def active_timesteps(self) -> pd.DatetimeIndex:
329
- """Get the current active timesteps."""
330
- return self._active_timesteps
109
+ def aggregation_weight(self) -> float | None:
110
+ return self.attrs.get('aggregation_weight')
331
111
 
332
- @active_timesteps.setter
333
- def active_timesteps(self, timesteps: Optional[pd.DatetimeIndex]):
334
- """
335
- Set active_timesteps and refresh active_data.
336
-
337
- Args:
338
- timesteps: New timesteps to activate, or None to use all stored timesteps
339
-
340
- Raises:
341
- TypeError: If timesteps is not a pandas DatetimeIndex or None
342
- """
343
- if timesteps is None:
344
- self._active_timesteps = self.stored_data.indexes['time']
345
- elif isinstance(timesteps, pd.DatetimeIndex):
346
- self._active_timesteps = timesteps
347
- else:
348
- raise TypeError('active_timesteps must be a pandas DatetimeIndex or None')
349
-
350
- self._update_active_data()
351
-
352
- @property
353
- def active_data(self) -> xr.DataArray:
354
- """Get a view of stored_data based on active_timesteps."""
355
- return self._active_data
356
-
357
- @property
358
- def stored_data(self) -> xr.DataArray:
359
- """Get a copy of the full stored data."""
360
- return self._stored_data.copy()
112
+ @classmethod
113
+ def from_dataarray(
114
+ cls, da: xr.DataArray, aggregation_group: str | None = None, aggregation_weight: float | None = None
115
+ ):
116
+ """Create TimeSeriesData from DataArray, extracting metadata from attrs."""
117
+ # Get aggregation metadata from attrs or parameters
118
+ final_aggregation_group = (
119
+ aggregation_group if aggregation_group is not None else da.attrs.get('aggregation_group')
120
+ )
121
+ final_aggregation_weight = (
122
+ aggregation_weight if aggregation_weight is not None else da.attrs.get('aggregation_weight')
123
+ )
361
124
 
362
- @stored_data.setter
363
- def stored_data(self, value: NumericData):
364
- """
365
- Update stored_data and refresh active_data.
125
+ return cls(da, aggregation_group=final_aggregation_group, aggregation_weight=final_aggregation_weight)
366
126
 
367
- Args:
368
- value: New data to store
369
- """
370
- new_data = DataConverter.as_dataarray(value, timesteps=self.active_timesteps)
127
+ @classmethod
128
+ def is_timeseries_data(cls, obj) -> bool:
129
+ """Check if an object is TimeSeriesData."""
130
+ return isinstance(obj, xr.DataArray) and obj.attrs.get('__timeseries_data__', False)
371
131
 
372
- # Skip if data is unchanged to avoid overwriting backup
373
- if new_data.equals(self._stored_data):
374
- return
132
+ def __repr__(self):
133
+ agg_info = []
134
+ if self.aggregation_group:
135
+ agg_info.append(f"aggregation_group='{self.aggregation_group}'")
136
+ if self.aggregation_weight is not None:
137
+ agg_info.append(f'aggregation_weight={self.aggregation_weight}')
375
138
 
376
- self._stored_data = new_data
377
- self.active_timesteps = None # Reset to full timeline
139
+ info_str = f'TimeSeriesData({", ".join(agg_info)})' if agg_info else 'TimeSeriesData'
140
+ return f'{info_str}\n{super().__repr__()}'
378
141
 
379
142
  @property
380
- def sel(self):
381
- return self.active_data.sel
143
+ def agg_group(self):
144
+ warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2)
145
+ return self.aggregation_group
382
146
 
383
147
  @property
384
- def isel(self):
385
- return self.active_data.isel
386
-
387
- def _apply_operation(self, other, op):
388
- """Apply an operation between this TimeSeries and another object."""
389
- if isinstance(other, TimeSeries):
390
- other = other.active_data
391
- return op(self.active_data, other)
148
+ def agg_weight(self):
149
+ warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2)
150
+ return self.aggregation_weight
392
151
 
393
- def __add__(self, other):
394
- return self._apply_operation(other, lambda x, y: x + y)
395
152
 
396
- def __sub__(self, other):
397
- return self._apply_operation(other, lambda x, y: x - y)
153
+ TemporalDataUser = (
154
+ int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray | TimeSeriesData
155
+ )
156
+ """User data which might have a time dimension. Internally converted to an xr.DataArray with time dimension."""
398
157
 
399
- def __mul__(self, other):
400
- return self._apply_operation(other, lambda x, y: x * y)
158
+ TemporalData = xr.DataArray | TimeSeriesData
159
+ """Internally used datatypes for temporal data (data with a time dimension)."""
401
160
 
402
- def __truediv__(self, other):
403
- return self._apply_operation(other, lambda x, y: x / y)
404
161
 
405
- def __radd__(self, other):
406
- return other + self.active_data
407
-
408
- def __rsub__(self, other):
409
- return other - self.active_data
410
-
411
- def __rmul__(self, other):
412
- return other * self.active_data
413
-
414
- def __rtruediv__(self, other):
415
- return other / self.active_data
416
-
417
- def __neg__(self) -> xr.DataArray:
418
- return -self.active_data
419
-
420
- def __pos__(self) -> xr.DataArray:
421
- return +self.active_data
422
-
423
- def __abs__(self) -> xr.DataArray:
424
- return abs(self.active_data)
425
-
426
- def __gt__(self, other):
427
- """
428
- Compare if this TimeSeries is greater than another.
429
-
430
- Args:
431
- other: Another TimeSeries to compare with
162
+ class DataConverter:
163
+ """
164
+ Converts various data types into xarray.DataArray with specified target coordinates.
165
+
166
+ This converter handles intelligent dimension matching and broadcasting to ensure
167
+ the output DataArray always conforms to the specified coordinate structure.
168
+
169
+ Supported input types:
170
+ - Scalars: int, float, np.number (broadcast to all target dimensions)
171
+ - 1D data: np.ndarray, pd.Series, single-column DataFrame (matched by length/index)
172
+ - Multi-dimensional arrays: np.ndarray, DataFrame (matched by shape)
173
+ - xr.DataArray: validated and potentially broadcast to target dimensions
174
+
175
+ The converter uses smart matching strategies:
176
+ - Series: matched by exact index comparison
177
+ - 1D arrays: matched by length to target coordinates
178
+ - Multi-dimensional arrays: matched by shape permutation analysis
179
+ - DataArrays: validated for compatibility and broadcast as needed
180
+ """
432
181
 
433
- Returns:
434
- True if all values in this TimeSeries are greater than other
182
+ @staticmethod
183
+ def _match_series_by_index_alignment(
184
+ data: pd.Series, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
185
+ ) -> xr.DataArray:
435
186
  """
436
- if isinstance(other, TimeSeries):
437
- return self.active_data > other.active_data
438
- return self.active_data > other
187
+ Match pandas Series to target dimension by exact index comparison.
439
188
 
440
- def __ge__(self, other):
441
- """
442
- Compare if this TimeSeries is greater than or equal to another.
189
+ Attempts to find a target dimension whose coordinates exactly match
190
+ the Series index values, ensuring proper alignment.
443
191
 
444
192
  Args:
445
- other: Another TimeSeries to compare with
193
+ data: pandas Series to convert
194
+ target_coords: Available target coordinates {dim_name: coordinate_index}
195
+ target_dims: Target dimension names to consider for matching
446
196
 
447
197
  Returns:
448
- True if all values in this TimeSeries are greater than or equal to other
449
- """
450
- if isinstance(other, TimeSeries):
451
- return self.active_data >= other.active_data
452
- return self.active_data >= other
453
-
454
- def __lt__(self, other):
455
- """
456
- Compare if this TimeSeries is less than another.
198
+ DataArray with Series matched to the appropriate dimension
457
199
 
458
- Args:
459
- other: Another TimeSeries to compare with
200
+ Raises:
201
+ ConversionError: If Series cannot be matched to any target dimension,
202
+ or if no target dimensions provided for multi-element Series
203
+ """
204
+ # Handle edge case: no target dimensions
205
+ if len(target_dims) == 0:
206
+ if len(data) != 1:
207
+ raise ConversionError(
208
+ f'Cannot convert multi-element Series without target dimensions. '
209
+ f'Series has {len(data)} elements but no target dimensions specified.'
210
+ )
211
+ return xr.DataArray(data.iloc[0])
212
+
213
+ # Attempt exact index matching with each target dimension
214
+ for dim_name in target_dims:
215
+ target_index = target_coords[dim_name]
216
+ if data.index.equals(target_index):
217
+ return xr.DataArray(data.values.copy(), coords={dim_name: target_index}, dims=dim_name)
218
+
219
+ # No exact matches found
220
+ available_lengths = {dim: len(target_coords[dim]) for dim in target_dims}
221
+ raise ConversionError(
222
+ f'Series index does not match any target dimension coordinates. '
223
+ f'Series length: {len(data)}, available coordinate lengths: {available_lengths}'
224
+ )
460
225
 
461
- Returns:
462
- True if all values in this TimeSeries are less than other
226
+ @staticmethod
227
+ def _match_1d_array_by_length(
228
+ data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
229
+ ) -> xr.DataArray:
463
230
  """
464
- if isinstance(other, TimeSeries):
465
- return self.active_data < other.active_data
466
- return self.active_data < other
231
+ Match 1D numpy array to target dimension by length comparison.
467
232
 
468
- def __le__(self, other):
469
- """
470
- Compare if this TimeSeries is less than or equal to another.
233
+ Finds target dimensions whose coordinate length matches the array length.
234
+ Requires unique length match to avoid ambiguity.
471
235
 
472
236
  Args:
473
- other: Another TimeSeries to compare with
237
+ data: 1D numpy array to convert
238
+ target_coords: Available target coordinates {dim_name: coordinate_index}
239
+ target_dims: Target dimension names to consider for matching
474
240
 
475
241
  Returns:
476
- True if all values in this TimeSeries are less than or equal to other
477
- """
478
- if isinstance(other, TimeSeries):
479
- return self.active_data <= other.active_data
480
- return self.active_data <= other
481
-
482
- def __eq__(self, other):
483
- """
484
- Compare if this TimeSeries is equal to another.
242
+ DataArray with array matched to the uniquely identified dimension
485
243
 
486
- Args:
487
- other: Another TimeSeries to compare with
244
+ Raises:
245
+ ConversionError: If array length matches zero or multiple target dimensions,
246
+ or if no target dimensions provided for multi-element array
247
+ """
248
+ # Handle edge case: no target dimensions
249
+ if len(target_dims) == 0:
250
+ if len(data) != 1:
251
+ raise ConversionError(
252
+ f'Cannot convert multi-element array without target dimensions. Array has {len(data)} elements.'
253
+ )
254
+ return xr.DataArray(data[0])
255
+
256
+ # Find all dimensions with matching lengths
257
+ array_length = len(data)
258
+ matching_dims = []
259
+ coordinate_lengths = {}
260
+
261
+ for dim_name in target_dims:
262
+ coord_length = len(target_coords[dim_name])
263
+ coordinate_lengths[dim_name] = coord_length
264
+ if array_length == coord_length:
265
+ matching_dims.append(dim_name)
266
+
267
+ # Validate matching results
268
+ if len(matching_dims) == 0:
269
+ raise ConversionError(
270
+ f'Array length {array_length} does not match any target dimension lengths: {coordinate_lengths}'
271
+ )
272
+ elif len(matching_dims) > 1:
273
+ raise ConversionError(
274
+ f'Array length {array_length} matches multiple dimensions: {matching_dims}. '
275
+ f'Cannot uniquely determine target dimension. Consider using explicit '
276
+ f'dimension specification or converting to DataArray manually.'
277
+ )
488
278
 
489
- Returns:
490
- True if all values in this TimeSeries are equal to other
491
- """
492
- if isinstance(other, TimeSeries):
493
- return self.active_data == other.active_data
494
- return self.active_data == other
279
+ # Create DataArray with the uniquely matched dimension
280
+ matched_dim = matching_dims[0]
281
+ return xr.DataArray(data.copy(), coords={matched_dim: target_coords[matched_dim]}, dims=matched_dim)
495
282
 
496
- def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
283
+ @staticmethod
284
+ def _match_multidim_array_by_shape_permutation(
285
+ data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
286
+ ) -> xr.DataArray:
497
287
  """
498
- Handle NumPy universal functions.
288
+ Match multi-dimensional array to target dimensions using shape permutation analysis.
499
289
 
500
- This allows NumPy functions to work with TimeSeries objects.
501
- """
502
- # Convert any TimeSeries inputs to their active_data
503
- inputs = [x.active_data if isinstance(x, TimeSeries) else x for x in inputs]
504
- return getattr(ufunc, method)(*inputs, **kwargs)
290
+ Analyzes all possible mappings between array shape and target coordinate lengths
291
+ to find the unique valid dimension assignment.
505
292
 
506
- def __repr__(self):
507
- """
508
- Get a string representation of the TimeSeries.
293
+ Args:
294
+ data: Multi-dimensional numpy array to convert
295
+ target_coords: Available target coordinates {dim_name: coordinate_index}
296
+ target_dims: Target dimension names to consider for matching
509
297
 
510
298
  Returns:
511
- String showing TimeSeries details
512
- """
513
- attrs = {
514
- 'name': self.name,
515
- 'aggregation_weight': self.aggregation_weight,
516
- 'aggregation_group': self.aggregation_group,
517
- 'needs_extra_timestep': self.needs_extra_timestep,
518
- 'shape': self.active_data.shape,
519
- 'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}',
520
- }
521
- attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items())
522
- return f'TimeSeries({attr_str})'
523
-
524
- def __str__(self):
525
- """
526
- Get a human-readable string representation.
299
+ DataArray with array dimensions mapped to target dimensions by shape
527
300
 
528
- Returns:
529
- Descriptive string with statistics
530
- """
531
- return f"TimeSeries '{self.name}': {self.stats}"
301
+ Raises:
302
+ ConversionError: If array shape cannot be uniquely mapped to target dimensions,
303
+ or if no target dimensions provided for multi-element array
304
+ """
305
+ # Handle edge case: no target dimensions
306
+ if len(target_dims) == 0:
307
+ if data.size != 1:
308
+ raise ConversionError(
309
+ f'Cannot convert multi-element array without target dimensions. '
310
+ f'Array has {data.size} elements with shape {data.shape}.'
311
+ )
312
+ return xr.DataArray(data.item())
313
+
314
+ array_shape = data.shape
315
+ coordinate_lengths = {dim: len(target_coords[dim]) for dim in target_dims}
316
+
317
+ # Find all valid dimension permutations that match the array shape
318
+ valid_mappings = []
319
+ for dim_permutation in permutations(target_dims, data.ndim):
320
+ shape_matches = all(
321
+ array_shape[i] == coordinate_lengths[dim_permutation[i]] for i in range(len(dim_permutation))
322
+ )
323
+ if shape_matches:
324
+ valid_mappings.append(dim_permutation)
325
+
326
+ # Validate mapping results
327
+ if len(valid_mappings) == 0:
328
+ raise ConversionError(
329
+ f'Array shape {array_shape} cannot be mapped to any combination of target '
330
+ f'coordinate lengths: {coordinate_lengths}. Consider reshaping the array '
331
+ f'or adjusting target coordinates.'
332
+ )
532
333
 
334
+ if len(valid_mappings) > 1:
335
+ raise ConversionError(
336
+ f'Array shape {array_shape} matches multiple dimension combinations: '
337
+ f'{valid_mappings}. Cannot uniquely determine dimension mapping. '
338
+ f'Consider using explicit dimension specification.'
339
+ )
533
340
 
534
- class TimeSeriesCollection:
535
- """
536
- Collection of TimeSeries objects with shared timestep management.
341
+ # Create DataArray with the uniquely determined mapping
342
+ matched_dims = valid_mappings[0]
343
+ matched_coords = {dim: target_coords[dim] for dim in matched_dims}
537
344
 
538
- TimeSeriesCollection handles multiple TimeSeries objects with synchronized
539
- timesteps, provides operations on collections, and manages extra timesteps.
540
- """
345
+ return xr.DataArray(data.copy(), coords=matched_coords, dims=matched_dims)
541
346
 
542
- def __init__(
543
- self,
544
- timesteps: pd.DatetimeIndex,
545
- hours_of_last_timestep: Optional[float] = None,
546
- hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None,
547
- ):
548
- """
549
- Args:
550
- timesteps: The timesteps of the Collection.
551
- hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified
552
- hours_of_previous_timesteps: The duration of previous timesteps.
553
- If None, the first time increment of time_series is used.
554
- This is needed to calculate previous durations (for example consecutive_on_hours).
555
- If you use an array, take care that its long enough to cover all previous values!
347
+ @staticmethod
348
+ def _broadcast_dataarray_to_target_specification(
349
+ source_data: xr.DataArray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
350
+ ) -> xr.DataArray:
556
351
  """
557
- # Prepare and validate timesteps
558
- self._validate_timesteps(timesteps)
559
- self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps(
560
- timesteps, hours_of_previous_timesteps
561
- )
562
-
563
- # Set up timesteps and hours
564
- self.all_timesteps = timesteps
565
- self.all_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep)
566
- self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra)
567
-
568
- # Active timestep tracking
569
- self._active_timesteps = None
570
- self._active_timesteps_extra = None
571
- self._active_hours_per_timestep = None
352
+ Broadcast DataArray to conform to target coordinate and dimension specification.
572
353
 
573
- # Dictionary of time series by name
574
- self.time_series_data: Dict[str, TimeSeries] = {}
575
-
576
- # Aggregation
577
- self.group_weights: Dict[str, float] = {}
578
- self.weights: Dict[str, float] = {}
579
-
580
- @classmethod
581
- def with_uniform_timesteps(
582
- cls, start_time: pd.Timestamp, periods: int, freq: str, hours_per_step: Optional[float] = None
583
- ) -> 'TimeSeriesCollection':
584
- """Create a collection with uniform timesteps."""
585
- timesteps = pd.date_range(start_time, periods=periods, freq=freq, name='time')
586
- return cls(timesteps, hours_of_previous_timesteps=hours_per_step)
587
-
588
- def create_time_series(
589
- self, data: Union[NumericData, TimeSeriesData], name: str, needs_extra_timestep: bool = False
590
- ) -> TimeSeries:
591
- """
592
- Creates a TimeSeries from the given data and adds it to the collection.
354
+ Performs comprehensive validation and broadcasting to ensure the result exactly
355
+ matches the target specification. Handles scalar expansion, dimension validation,
356
+ coordinate compatibility checking, and broadcasting to additional dimensions.
593
357
 
594
358
  Args:
595
- data: The data to create the TimeSeries from.
596
- name: The name of the TimeSeries.
597
- needs_extra_timestep: Whether to create an additional timestep at the end of the timesteps.
598
- The data to create the TimeSeries from.
359
+ source_data: Source DataArray to broadcast
360
+ target_coords: Target coordinates {dim_name: coordinate_index}
361
+ target_dims: Target dimension names in desired order
599
362
 
600
363
  Returns:
601
- The created TimeSeries.
364
+ DataArray broadcast to target specification with proper dimension ordering
602
365
 
603
- """
604
- # Check for duplicate name
605
- if name in self.time_series_data:
606
- raise ValueError(f"TimeSeries '{name}' already exists in this collection")
607
-
608
- # Determine which timesteps to use
609
- timesteps_to_use = self.timesteps_extra if needs_extra_timestep else self.timesteps
610
-
611
- # Create the time series
612
- if isinstance(data, TimeSeriesData):
613
- time_series = TimeSeries.from_datasource(
614
- name=name,
615
- data=data.data,
616
- timesteps=timesteps_to_use,
617
- aggregation_weight=data.agg_weight,
618
- aggregation_group=data.agg_group,
619
- needs_extra_timestep=needs_extra_timestep,
620
- )
621
- # Connect the user time series to the created TimeSeries
622
- data.label = name
623
- else:
624
- time_series = TimeSeries.from_datasource(
625
- name=name, data=data, timesteps=timesteps_to_use, needs_extra_timestep=needs_extra_timestep
366
+ Raises:
367
+ ConversionError: If broadcasting is impossible due to incompatible dimensions
368
+ or coordinate mismatches
369
+ """
370
+ # Validate: cannot reduce dimensions
371
+ if len(source_data.dims) > len(target_dims):
372
+ raise ConversionError(
373
+ f'Cannot reduce DataArray dimensionality from {len(source_data.dims)} '
374
+ f'to {len(target_dims)} dimensions. Source dims: {source_data.dims}, '
375
+ f'target dims: {target_dims}'
626
376
  )
627
377
 
628
- # Add to the collection
629
- self.add_time_series(time_series)
378
+ # Validate: all source dimensions must exist in target
379
+ missing_dims = set(source_data.dims) - set(target_dims)
380
+ if missing_dims:
381
+ raise ConversionError(
382
+ f'Source DataArray has dimensions {missing_dims} not present in target dimensions {target_dims}'
383
+ )
630
384
 
631
- return time_series
385
+ # Validate: coordinate compatibility for overlapping dimensions
386
+ for dim in source_data.dims:
387
+ if dim in source_data.coords and dim in target_coords:
388
+ source_coords = source_data.coords[dim]
389
+ target_coords_for_dim = target_coords[dim]
632
390
 
633
- def calculate_aggregation_weights(self) -> Dict[str, float]:
634
- """Calculate and return aggregation weights for all time series."""
635
- self.group_weights = self._calculate_group_weights()
636
- self.weights = self._calculate_weights()
391
+ if not np.array_equal(source_coords.values, target_coords_for_dim.values):
392
+ raise ConversionError(
393
+ f'Coordinate mismatch for dimension "{dim}". '
394
+ f'Source and target coordinates have different values.'
395
+ )
637
396
 
638
- if np.all(np.isclose(list(self.weights.values()), 1, atol=1e-6)):
639
- logger.info('All Aggregation weights were set to 1')
397
+ # Create target template for broadcasting
398
+ target_shape = [len(target_coords[dim]) for dim in target_dims]
399
+ target_template = xr.DataArray(np.empty(target_shape), coords=target_coords, dims=target_dims)
640
400
 
641
- return self.weights
401
+ # Perform broadcasting and ensure proper dimension ordering
402
+ broadcasted = source_data.broadcast_like(target_template)
403
+ return broadcasted.transpose(*target_dims)
642
404
 
643
- def activate_timesteps(self, active_timesteps: Optional[pd.DatetimeIndex] = None):
644
- """
645
- Update active timesteps for the collection and all time series.
646
- If no arguments are provided, the active timesteps are reset.
405
+ @classmethod
406
+ def to_dataarray(
407
+ cls,
408
+ data: int
409
+ | float
410
+ | bool
411
+ | np.integer
412
+ | np.floating
413
+ | np.bool_
414
+ | np.ndarray
415
+ | pd.Series
416
+ | pd.DataFrame
417
+ | xr.DataArray,
418
+ coords: dict[str, pd.Index] | None = None,
419
+ ) -> xr.DataArray:
420
+ """
421
+ Convert various data types to xarray.DataArray with specified target coordinates.
422
+
423
+ This is the main conversion method that intelligently handles different input types
424
+ and ensures the result conforms to the specified coordinate structure through
425
+ smart dimension matching and broadcasting.
647
426
 
648
427
  Args:
649
- active_timesteps: The active timesteps of the model.
650
- If None, the all timesteps of the TimeSeriesCollection are taken.
651
- """
652
- if active_timesteps is None:
653
- return self.reset()
654
-
655
- if not np.all(np.isin(active_timesteps, self.all_timesteps)):
656
- raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection')
657
-
658
- # Calculate derived timesteps
659
- self._active_timesteps = active_timesteps
660
- first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0]
661
- last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0]
662
- self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2]
663
- self._active_hours_per_timestep = self.all_hours_per_timestep.isel(time=slice(first_ts_index, last_ts_idx + 1))
664
-
665
- # Update all time series
666
- self._update_time_series_timesteps()
428
+ data: Input data to convert. Supported types:
429
+ - Scalars: int, float, bool, np.integer, np.floating, np.bool_
430
+ - Arrays: np.ndarray (1D and multi-dimensional)
431
+ - Pandas: pd.Series, pd.DataFrame
432
+ - xarray: xr.DataArray
433
+ coords: Target coordinate specification as {dimension_name: coordinate_index}.
434
+ All coordinate indices must be pandas.Index objects.
667
435
 
668
- def reset(self):
669
- """Reset active timesteps to defaults for all time series."""
670
- self._active_timesteps = None
671
- self._active_timesteps_extra = None
672
- self._active_hours_per_timestep = None
673
-
674
- for time_series in self.time_series_data.values():
675
- time_series.reset()
676
-
677
- def restore_data(self):
678
- """Restore original data for all time series."""
679
- for time_series in self.time_series_data.values():
680
- time_series.restore_data()
681
-
682
- def add_time_series(self, time_series: TimeSeries):
683
- """Add an existing TimeSeries to the collection."""
684
- if time_series.name in self.time_series_data:
685
- raise ValueError(f"TimeSeries '{time_series.name}' already exists in this collection")
436
+ Returns:
437
+ DataArray conforming to the target coordinate specification,
438
+ with input data appropriately matched and broadcast
686
439
 
687
- self.time_series_data[time_series.name] = time_series
440
+ Raises:
441
+ ConversionError: If data type is unsupported, conversion fails,
442
+ or broadcasting to target coordinates is impossible
443
+
444
+ Examples:
445
+ # Scalar broadcasting
446
+ >>> coords = {'x': pd.Index([1, 2, 3]), 'y': pd.Index(['a', 'b'])}
447
+ >>> converter.to_dataarray(42, coords)
448
+ # Returns: DataArray with shape (3, 2), all values = 42
449
+
450
+ # Series index matching
451
+ >>> series = pd.Series([10, 20, 30], index=[1, 2, 3])
452
+ >>> converter.to_dataarray(series, coords)
453
+ # Returns: DataArray matched to 'x' dimension, broadcast to 'y'
454
+
455
+ # Array shape matching
456
+ >>> array = np.array([[1, 2], [3, 4], [5, 6]]) # Shape (3, 2)
457
+ >>> converter.to_dataarray(array, coords)
458
+ # Returns: DataArray with dimensions ('x', 'y') based on shape
459
+ """
460
+ # Prepare and validate target specification
461
+ if coords is None:
462
+ coords = {}
463
+
464
+ validated_coords, target_dims = cls._validate_and_prepare_target_coordinates(coords)
465
+
466
+ # Convert input data to intermediate DataArray based on type
467
+ if isinstance(data, (int, float, bool, np.integer, np.floating, np.bool_)):
468
+ # Scalar values - create scalar DataArray
469
+ intermediate = xr.DataArray(data.item() if hasattr(data, 'item') else data)
470
+
471
+ elif isinstance(data, np.ndarray):
472
+ # NumPy arrays - dispatch based on dimensionality
473
+ if data.ndim == 0:
474
+ # 0-dimensional array (scalar)
475
+ intermediate = xr.DataArray(data.item())
476
+ elif data.ndim == 1:
477
+ # 1-dimensional array
478
+ intermediate = cls._match_1d_array_by_length(data, validated_coords, target_dims)
479
+ else:
480
+ # Multi-dimensional array
481
+ intermediate = cls._match_multidim_array_by_shape_permutation(data, validated_coords, target_dims)
482
+
483
+ elif isinstance(data, pd.Series):
484
+ # Pandas Series - validate and match by index
485
+ if isinstance(data.index, pd.MultiIndex):
486
+ raise ConversionError('MultiIndex Series are not supported. Please use a single-level index.')
487
+ intermediate = cls._match_series_by_index_alignment(data, validated_coords, target_dims)
488
+
489
+ elif isinstance(data, pd.DataFrame):
490
+ # Pandas DataFrame - validate and convert
491
+ if isinstance(data.index, pd.MultiIndex):
492
+ raise ConversionError('MultiIndex DataFrames are not supported. Please use a single-level index.')
493
+ if len(data.columns) == 0 or data.empty:
494
+ raise ConversionError('DataFrame must have at least one column and cannot be empty.')
495
+
496
+ if len(data.columns) == 1:
497
+ # Single-column DataFrame - treat as Series
498
+ series_data = data.iloc[:, 0]
499
+ intermediate = cls._match_series_by_index_alignment(series_data, validated_coords, target_dims)
500
+ else:
501
+ # Multi-column DataFrame - treat as multi-dimensional array
502
+ intermediate = cls._match_multidim_array_by_shape_permutation(
503
+ data.to_numpy(), validated_coords, target_dims
504
+ )
688
505
 
689
- def insert_new_data(self, data: pd.DataFrame, include_extra_timestep: bool = False):
690
- """
691
- Update time series with new data from a DataFrame.
506
+ elif isinstance(data, xr.DataArray):
507
+ # Existing DataArray - use as-is
508
+ intermediate = data.copy()
692
509
 
693
- Args:
694
- data: DataFrame containing new data with timestamps as index
695
- include_extra_timestep: Whether the provided data already includes the extra timestep, by default False
696
- """
697
- if not isinstance(data, pd.DataFrame):
698
- raise TypeError(f'data must be a pandas DataFrame, got {type(data).__name__}')
699
-
700
- # Check if the DataFrame index matches the expected timesteps
701
- expected_timesteps = self.timesteps_extra if include_extra_timestep else self.timesteps
702
- if not data.index.equals(expected_timesteps):
703
- raise ValueError(
704
- f'DataFrame index must match {"collection timesteps with extra timestep" if include_extra_timestep else "collection timesteps"}'
510
+ else:
511
+ # Unsupported data type
512
+ supported_types = [
513
+ 'int',
514
+ 'float',
515
+ 'bool',
516
+ 'np.integer',
517
+ 'np.floating',
518
+ 'np.bool_',
519
+ 'np.ndarray',
520
+ 'pd.Series',
521
+ 'pd.DataFrame',
522
+ 'xr.DataArray',
523
+ ]
524
+ raise ConversionError(
525
+ f'Unsupported data type: {type(data).__name__}. Supported types: {", ".join(supported_types)}'
705
526
  )
706
527
 
707
- for name, ts in self.time_series_data.items():
708
- if name in data.columns:
709
- if not ts.needs_extra_timestep:
710
- # For time series without extra timestep
711
- if include_extra_timestep:
712
- # If data includes extra timestep but series doesn't need it, exclude the last point
713
- ts.stored_data = data[name].iloc[:-1]
714
- else:
715
- # Use data as is
716
- ts.stored_data = data[name]
717
- else:
718
- # For time series with extra timestep
719
- if include_extra_timestep:
720
- # Data already includes extra timestep
721
- ts.stored_data = data[name]
722
- else:
723
- # Need to add extra timestep - extrapolate from the last value
724
- extra_step_value = data[name].iloc[-1]
725
- extra_step_index = pd.DatetimeIndex([self.timesteps_extra[-1]], name='time')
726
- extra_step_series = pd.Series([extra_step_value], index=extra_step_index)
727
-
728
- # Combine the regular data with the extra timestep
729
- ts.stored_data = pd.concat([data[name], extra_step_series])
730
-
731
- logger.debug(f'Updated data for {name}')
732
-
733
- def to_dataframe(
734
- self, filtered: Literal['all', 'constant', 'non_constant'] = 'non_constant', include_extra_timestep: bool = True
735
- ) -> pd.DataFrame:
736
- """
737
- Convert collection to DataFrame with optional filtering and timestep control.
738
-
739
- Args:
740
- filtered: Filter time series by variability, by default 'non_constant'
741
- include_extra_timestep: Whether to include the extra timestep in the result, by default True
528
+ # Broadcast intermediate result to target specification
529
+ return cls._broadcast_dataarray_to_target_specification(intermediate, validated_coords, target_dims)
742
530
 
743
- Returns:
744
- DataFrame representation of the collection
531
+ @staticmethod
532
+ def _validate_and_prepare_target_coordinates(
533
+ coords: dict[str, pd.Index],
534
+ ) -> tuple[dict[str, pd.Index], tuple[str, ...]]:
745
535
  """
746
- include_constants = filtered != 'non_constant'
747
- ds = self.to_dataset(include_constants=include_constants)
748
-
749
- if not include_extra_timestep:
750
- ds = ds.isel(time=slice(None, -1))
751
-
752
- df = ds.to_dataframe()
753
-
754
- # Apply filtering
755
- if filtered == 'all':
756
- return df
757
- elif filtered == 'constant':
758
- return df.loc[:, df.nunique() == 1]
759
- elif filtered == 'non_constant':
760
- return df.loc[:, df.nunique() > 1]
761
- else:
762
- raise ValueError("filtered must be one of: 'all', 'constant', 'non_constant'")
536
+ Validate and prepare target coordinate specification for DataArray creation.
763
537
 
764
- def to_dataset(self, include_constants: bool = True) -> xr.Dataset:
765
- """
766
- Combine all time series into a single Dataset with all timesteps.
538
+ Performs comprehensive validation of coordinate inputs and prepares them
539
+ for use in DataArray construction with appropriate naming and type checking.
767
540
 
768
541
  Args:
769
- include_constants: Whether to include time series with constant values, by default True
542
+ coords: Raw coordinate specification {dimension_name: coordinate_index}
770
543
 
771
544
  Returns:
772
- Dataset containing all selected time series with all timesteps
773
- """
774
- # Determine which series to include
775
- if include_constants:
776
- series_to_include = self.time_series_data.values()
777
- else:
778
- series_to_include = self.non_constants
779
-
780
- # Create individual datasets and merge them
781
- ds = xr.merge([ts.active_data.to_dataset(name=ts.name) for ts in series_to_include])
782
-
783
- # Ensure the correct time coordinates
784
- ds = ds.reindex(time=self.timesteps_extra)
785
-
786
- ds.attrs.update(
787
- {
788
- 'timesteps_extra': f'{self.timesteps_extra[0]} ... {self.timesteps_extra[-1]} | len={len(self.timesteps_extra)}',
789
- 'hours_per_timestep': self._format_stats(self.hours_per_timestep),
790
- }
791
- )
792
-
793
- return ds
794
-
795
- def _update_time_series_timesteps(self):
796
- """Update active timesteps for all time series."""
797
- for ts in self.time_series_data.values():
798
- if ts.needs_extra_timestep:
799
- ts.active_timesteps = self.timesteps_extra
800
- else:
801
- ts.active_timesteps = self.timesteps
545
+ Tuple of (validated_coordinates_dict, dimension_names_tuple)
802
546
 
803
- @staticmethod
804
- def _validate_timesteps(timesteps: pd.DatetimeIndex):
805
- """Validate timesteps format and rename if needed."""
806
- if not isinstance(timesteps, pd.DatetimeIndex):
807
- raise TypeError('timesteps must be a pandas DatetimeIndex')
808
-
809
- if len(timesteps) < 2:
810
- raise ValueError('timesteps must contain at least 2 timestamps')
811
-
812
- # Ensure timesteps has the required name
813
- if timesteps.name != 'time':
814
- logger.warning('Renamed timesteps to "time" (was "%s")', timesteps.name)
815
- timesteps.name = 'time'
816
-
817
- @staticmethod
818
- def _create_timesteps_with_extra(
819
- timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float]
820
- ) -> pd.DatetimeIndex:
821
- """Create timesteps with an extra step at the end."""
822
- if hours_of_last_timestep is not None:
823
- # Create the extra timestep using the specified duration
824
- last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time')
825
- else:
826
- # Use the last interval as the extra timestep duration
827
- last_date = pd.DatetimeIndex([timesteps[-1] + (timesteps[-1] - timesteps[-2])], name='time')
828
-
829
- # Combine with original timesteps
830
- return pd.DatetimeIndex(timesteps.append(last_date), name='time')
831
-
832
- @staticmethod
833
- def _calculate_hours_of_previous_timesteps(
834
- timesteps: pd.DatetimeIndex, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]]
835
- ) -> Union[float, np.ndarray]:
836
- """Calculate duration of regular timesteps."""
837
- if hours_of_previous_timesteps is not None:
838
- return hours_of_previous_timesteps
547
+ Raises:
548
+ ConversionError: If any coordinates are invalid, improperly typed,
549
+ or have inconsistent naming
550
+ """
551
+ validated_coords = {}
552
+ dimension_names = []
839
553
 
840
- # Calculate from the first interval
841
- first_interval = timesteps[1] - timesteps[0]
842
- return first_interval.total_seconds() / 3600 # Convert to hours
554
+ for dim_name, coord_index in coords.items():
555
+ # Type validation
556
+ if not isinstance(coord_index, pd.Index):
557
+ raise ConversionError(
558
+ f'Coordinate for dimension "{dim_name}" must be a pandas.Index, got {type(coord_index).__name__}'
559
+ )
843
560
 
844
- @staticmethod
845
- def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray:
846
- """Calculate duration of each timestep."""
847
- # Calculate differences between consecutive timestamps
848
- hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1)
561
+ # Non-empty validation
562
+ if len(coord_index) == 0:
563
+ raise ConversionError(f'Coordinate for dimension "{dim_name}" cannot be empty')
849
564
 
850
- return xr.DataArray(
851
- data=hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=('time',), name='hours_per_step'
852
- )
853
-
854
- def _calculate_group_weights(self) -> Dict[str, float]:
855
- """Calculate weights for aggregation groups."""
856
- # Count series in each group
857
- groups = [ts.aggregation_group for ts in self.time_series_data.values() if ts.aggregation_group is not None]
858
- group_counts = Counter(groups)
859
-
860
- # Calculate weight for each group (1/count)
861
- return {group: 1 / count for group, count in group_counts.items()}
862
-
863
- def _calculate_weights(self) -> Dict[str, float]:
864
- """Calculate weights for all time series."""
865
- # Calculate weight for each time series
866
- weights = {}
867
- for name, ts in self.time_series_data.items():
868
- if ts.aggregation_group is not None:
869
- # Use group weight
870
- weights[name] = self.group_weights.get(ts.aggregation_group, 1)
871
- else:
872
- # Use individual weight or default to 1
873
- weights[name] = ts.aggregation_weight or 1
565
+ # Ensure coordinate index has consistent naming
566
+ if coord_index.name != dim_name:
567
+ coord_index = coord_index.rename(dim_name)
874
568
 
875
- return weights
569
+ # Special validation for time dimensions (common pattern)
570
+ if dim_name == 'time' and not isinstance(coord_index, pd.DatetimeIndex):
571
+ raise ConversionError(
572
+ f'Dimension named "time" should use DatetimeIndex for proper '
573
+ f'time-series functionality, got {type(coord_index).__name__}'
574
+ )
876
575
 
877
- def _format_stats(self, data) -> str:
878
- """Format statistics for a data array."""
879
- if hasattr(data, 'values'):
880
- values = data.values
881
- else:
882
- values = np.asarray(data)
576
+ validated_coords[dim_name] = coord_index
577
+ dimension_names.append(dim_name)
883
578
 
884
- mean_val = np.mean(values)
885
- min_val = np.min(values)
886
- max_val = np.max(values)
579
+ return validated_coords, tuple(dimension_names)
887
580
 
888
- return f'mean: {mean_val:.2f}, min: {min_val:.2f}, max: {max_val:.2f}'
889
581
 
890
- def __getitem__(self, name: str) -> TimeSeries:
891
- """Get a TimeSeries by name."""
582
+ def get_dataarray_stats(arr: xr.DataArray) -> dict:
583
+ """Generate statistical summary of a DataArray."""
584
+ stats = {}
585
+ if arr.dtype.kind in 'biufc': # bool, int, uint, float, complex
892
586
  try:
893
- return self.time_series_data[name]
894
- except KeyError as e:
895
- raise KeyError(f'TimeSeries "{name}" not found in the TimeSeriesCollection') from e
896
-
897
- def __iter__(self) -> Iterator[TimeSeries]:
898
- """Iterate through all TimeSeries in the collection."""
899
- return iter(self.time_series_data.values())
900
-
901
- def __len__(self) -> int:
902
- """Get the number of TimeSeries in the collection."""
903
- return len(self.time_series_data)
904
-
905
- def __contains__(self, item: Union[str, TimeSeries]) -> bool:
906
- """Check if a TimeSeries exists in the collection."""
907
- if isinstance(item, str):
908
- return item in self.time_series_data
909
- elif isinstance(item, TimeSeries):
910
- return any([item is ts for ts in self.time_series_data.values()])
911
- return False
587
+ stats.update(
588
+ {
589
+ 'min': float(arr.min().values),
590
+ 'max': float(arr.max().values),
591
+ 'mean': float(arr.mean().values),
592
+ 'median': float(arr.median().values),
593
+ 'std': float(arr.std().values),
594
+ 'count': int(arr.count().values), # non-null count
595
+ }
596
+ )
912
597
 
913
- @property
914
- def non_constants(self) -> List[TimeSeries]:
915
- """Get time series with varying values."""
916
- return [ts for ts in self.time_series_data.values() if not ts.all_equal]
598
+ # Add null count only if there are nulls
599
+ null_count = int(arr.isnull().sum().values)
600
+ if null_count > 0:
601
+ stats['nulls'] = null_count
917
602
 
918
- @property
919
- def constants(self) -> List[TimeSeries]:
920
- """Get time series with constant values."""
921
- return [ts for ts in self.time_series_data.values() if ts.all_equal]
603
+ except Exception:
604
+ pass
922
605
 
923
- @property
924
- def timesteps(self) -> pd.DatetimeIndex:
925
- """Get the active timesteps."""
926
- return self.all_timesteps if self._active_timesteps is None else self._active_timesteps
606
+ return stats
927
607
 
928
- @property
929
- def timesteps_extra(self) -> pd.DatetimeIndex:
930
- """Get the active timesteps with extra step."""
931
- return self.all_timesteps_extra if self._active_timesteps_extra is None else self._active_timesteps_extra
932
608
 
933
- @property
934
- def hours_per_timestep(self) -> xr.DataArray:
935
- """Get the duration of each active timestep."""
936
- return (
937
- self.all_hours_per_timestep if self._active_hours_per_timestep is None else self._active_hours_per_timestep
938
- )
609
+ def drop_constant_arrays(ds: xr.Dataset, dim: str = 'time', drop_arrays_without_dim: bool = True) -> xr.Dataset:
610
+ """Drop variables with constant values along a dimension.
939
611
 
940
- @property
941
- def hours_of_last_timestep(self) -> float:
942
- """Get the duration of the last timestep."""
943
- return float(self.hours_per_timestep[-1].item())
944
-
945
- def __repr__(self):
946
- return f'TimeSeriesCollection:\n{self.to_dataset()}'
947
-
948
- def __str__(self):
949
- longest_name = max([time_series.name for time_series in self.time_series_data], key=len)
612
+ Args:
613
+ ds: Input dataset to filter.
614
+ dim: Dimension along which to check for constant values.
615
+ drop_arrays_without_dim: If True, also drop variables that don't have the specified dimension.
950
616
 
951
- stats_summary = '\n'.join(
952
- [
953
- f' - {time_series.name:<{len(longest_name)}}: {get_numeric_stats(time_series.active_data)}'
954
- for time_series in self.time_series_data
955
- ]
617
+ Returns:
618
+ Dataset with constant variables removed.
619
+ """
620
+ drop_vars = []
621
+
622
+ for name, da in ds.data_vars.items():
623
+ # Skip variables without the dimension
624
+ if dim not in da.dims:
625
+ if drop_arrays_without_dim:
626
+ drop_vars.append(name)
627
+ continue
628
+
629
+ # Check if variable is constant along the dimension
630
+ if (da.max(dim, skipna=True) == da.min(dim, skipna=True)).all().item():
631
+ drop_vars.append(name)
632
+
633
+ if drop_vars:
634
+ drop_vars = sorted(drop_vars)
635
+ logger.debug(
636
+ f'Dropping {len(drop_vars)} constant/dimension-less arrays: {drop_vars[:5]}{"..." if len(drop_vars) > 5 else ""}'
956
637
  )
957
638
 
958
- return (
959
- f'TimeSeriesCollection with {len(self.time_series_data)} series\n'
960
- f' Time Range: {self.timesteps[0]} → {self.timesteps[-1]}\n'
961
- f' No. of timesteps: {len(self.timesteps)} + 1 extra\n'
962
- f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n'
963
- f' Time Series Data:\n'
964
- f'{stats_summary}'
965
- )
639
+ return ds.drop_vars(drop_vars)
966
640
 
967
641
 
968
- def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10) -> str:
969
- """Calculates the mean, median, min, max, and standard deviation of a numeric DataArray."""
970
- format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f'
971
- if np.unique(data).size == 1:
972
- return f'{data.max().item():{format_spec}} (constant)'
973
- mean = data.mean().item()
974
- median = data.median().item()
975
- min_val = data.min().item()
976
- max_val = data.max().item()
977
- std = data.std().item()
978
- return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
642
+ # Backward compatibility aliases
643
+ # TODO: Needed?
644
+ NonTemporalDataUser = PeriodicDataUser
645
+ NonTemporalData = PeriodicData