flixopt 2.2.0b0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flixopt might be problematic. Click here for more details.

Files changed (63) hide show
  1. flixopt/__init__.py +35 -1
  2. flixopt/aggregation.py +60 -81
  3. flixopt/calculation.py +381 -196
  4. flixopt/components.py +1022 -359
  5. flixopt/config.py +553 -191
  6. flixopt/core.py +475 -1315
  7. flixopt/effects.py +477 -214
  8. flixopt/elements.py +591 -344
  9. flixopt/features.py +403 -957
  10. flixopt/flow_system.py +781 -293
  11. flixopt/interface.py +1159 -189
  12. flixopt/io.py +50 -55
  13. flixopt/linear_converters.py +384 -92
  14. flixopt/modeling.py +759 -0
  15. flixopt/network_app.py +789 -0
  16. flixopt/plotting.py +273 -135
  17. flixopt/results.py +639 -383
  18. flixopt/solvers.py +25 -21
  19. flixopt/structure.py +928 -442
  20. flixopt/utils.py +34 -5
  21. flixopt-3.0.0.dist-info/METADATA +209 -0
  22. flixopt-3.0.0.dist-info/RECORD +26 -0
  23. {flixopt-2.2.0b0.dist-info → flixopt-3.0.0.dist-info}/WHEEL +1 -1
  24. flixopt-3.0.0.dist-info/top_level.txt +1 -0
  25. docs/examples/00-Minimal Example.md +0 -5
  26. docs/examples/01-Basic Example.md +0 -5
  27. docs/examples/02-Complex Example.md +0 -10
  28. docs/examples/03-Calculation Modes.md +0 -5
  29. docs/examples/index.md +0 -5
  30. docs/faq/contribute.md +0 -49
  31. docs/faq/index.md +0 -3
  32. docs/images/architecture_flixOpt-pre2.0.0.png +0 -0
  33. docs/images/architecture_flixOpt.png +0 -0
  34. docs/images/flixopt-icon.svg +0 -1
  35. docs/javascripts/mathjax.js +0 -18
  36. docs/release-notes/_template.txt +0 -32
  37. docs/release-notes/index.md +0 -7
  38. docs/release-notes/v2.0.0.md +0 -93
  39. docs/release-notes/v2.0.1.md +0 -12
  40. docs/release-notes/v2.1.0.md +0 -31
  41. docs/release-notes/v2.2.0.md +0 -55
  42. docs/user-guide/Mathematical Notation/Bus.md +0 -33
  43. docs/user-guide/Mathematical Notation/Effects, Penalty & Objective.md +0 -132
  44. docs/user-guide/Mathematical Notation/Flow.md +0 -26
  45. docs/user-guide/Mathematical Notation/Investment.md +0 -115
  46. docs/user-guide/Mathematical Notation/LinearConverter.md +0 -21
  47. docs/user-guide/Mathematical Notation/Piecewise.md +0 -49
  48. docs/user-guide/Mathematical Notation/Storage.md +0 -44
  49. docs/user-guide/Mathematical Notation/index.md +0 -22
  50. docs/user-guide/Mathematical Notation/others.md +0 -3
  51. docs/user-guide/index.md +0 -124
  52. flixopt/config.yaml +0 -10
  53. flixopt-2.2.0b0.dist-info/METADATA +0 -146
  54. flixopt-2.2.0b0.dist-info/RECORD +0 -59
  55. flixopt-2.2.0b0.dist-info/top_level.txt +0 -5
  56. pics/architecture_flixOpt-pre2.0.0.png +0 -0
  57. pics/architecture_flixOpt.png +0 -0
  58. pics/flixOpt_plotting.jpg +0 -0
  59. pics/flixopt-icon.svg +0 -1
  60. pics/pics.pptx +0 -0
  61. scripts/gen_ref_pages.py +0 -54
  62. tests/ressources/Zeitreihen2020.csv +0 -35137
  63. {flixopt-2.2.0b0.dist-info → flixopt-3.0.0.dist-info}/licenses/LICENSE +0 -0
flixopt/core.py CHANGED
@@ -3,13 +3,10 @@ This module contains the core functionality of the flixopt framework.
3
3
  It provides Datatypes, logging functionality, and some functions to transform data structures.
4
4
  """
5
5
 
6
- import inspect
7
- import json
8
6
  import logging
9
- import pathlib
10
- import textwrap
11
- from collections import Counter
12
- from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
7
+ import warnings
8
+ from itertools import permutations
9
+ from typing import Literal, Union
13
10
 
14
11
  import numpy as np
15
12
  import pandas as pd
@@ -17,20 +14,17 @@ import xarray as xr
17
14
 
18
15
  logger = logging.getLogger('flixopt')
19
16
 
20
- Scalar = Union[int, float]
21
- """A type representing a single number, either integer or float."""
17
+ Scalar = int | float
18
+ """A single number, either integer or float."""
22
19
 
23
- NumericData = Union[int, float, np.integer, np.floating, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray]
24
- """Represents any form of numeric data, from simple scalars to complex data structures."""
20
+ PeriodicDataUser = int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray
21
+ """User data which has no time dimension. Internally converted to a Scalar or an xr.DataArray without a time dimension."""
25
22
 
26
- NumericDataTS = Union[NumericData, 'TimeSeriesData']
27
- """Represents either standard numeric data or TimeSeriesData."""
23
+ PeriodicData = xr.DataArray
24
+ """Internally used datatypes for periodic data."""
28
25
 
29
- TimestepData = NumericData
30
- """Represents any form of numeric data that corresponds to timesteps."""
31
-
32
- ScenarioData = NumericData
33
- """Represents any form of numeric data that corresponds to scenarios."""
26
+ FlowSystemDimensions = Literal['time', 'period', 'scenario']
27
+ """Possible dimensions of a FlowSystem."""
34
28
 
35
29
 
36
30
  class PlausibilityError(Exception):
@@ -45,1441 +39,607 @@ class ConversionError(Exception):
45
39
  pass
46
40
 
47
41
 
48
- class DataConverter:
49
- """
50
- Converts various data types into xarray.DataArray with optional time and scenario dimension.
42
+ class TimeSeriesData(xr.DataArray):
43
+ """Minimal TimeSeriesData that inherits from xr.DataArray with aggregation metadata."""
51
44
 
52
- Current implementation handles:
53
- - Scalar values
54
- - NumPy arrays
55
- - xarray.DataArray
56
- """
45
+ __slots__ = () # No additional instance attributes - everything goes in attrs
57
46
 
58
- @staticmethod
59
- def as_dataarray(
60
- data: TimestepData, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None
61
- ) -> xr.DataArray:
47
+ def __init__(
48
+ self,
49
+ *args,
50
+ aggregation_group: str | None = None,
51
+ aggregation_weight: float | None = None,
52
+ agg_group: str | None = None,
53
+ agg_weight: float | None = None,
54
+ **kwargs,
55
+ ):
62
56
  """
63
- Convert data to xarray.DataArray with specified dimensions.
64
-
65
57
  Args:
66
- data: The data to convert (scalar, array, or DataArray)
67
- timesteps: Optional DatetimeIndex for time dimension
68
- scenarios: Optional Index for scenario dimension
69
-
70
- Returns:
71
- DataArray with the converted data
58
+ *args: Arguments passed to DataArray
59
+ aggregation_group: Aggregation group name
60
+ aggregation_weight: Aggregation weight (0-1)
61
+ agg_group: Deprecated, use aggregation_group instead
62
+ agg_weight: Deprecated, use aggregation_weight instead
63
+ **kwargs: Additional arguments passed to DataArray
72
64
  """
73
- # Prepare dimensions and coordinates
74
- coords, dims = DataConverter._prepare_dimensions(timesteps, scenarios)
75
-
76
- # Select appropriate converter based on data type
77
- if isinstance(data, (int, float, np.integer, np.floating)):
78
- return DataConverter._convert_scalar(data, coords, dims)
79
-
80
- elif isinstance(data, xr.DataArray):
81
- return DataConverter._convert_dataarray(data, coords, dims)
82
-
83
- elif isinstance(data, np.ndarray):
84
- return DataConverter._convert_ndarray(data, coords, dims)
65
+ if agg_group is not None:
66
+ warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2)
67
+ aggregation_group = agg_group
68
+ if agg_weight is not None:
69
+ warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2)
70
+ aggregation_weight = agg_weight
85
71
 
86
- elif isinstance(data, pd.Series):
87
- return DataConverter._convert_series(data, coords, dims)
72
+ if (aggregation_group is not None) and (aggregation_weight is not None):
73
+ raise ValueError('Use either aggregation_group or aggregation_weight, not both')
88
74
 
89
- elif isinstance(data, pd.DataFrame):
90
- return DataConverter._convert_dataframe(data, coords, dims)
91
-
92
- else:
93
- raise ConversionError(f'Unsupported data type: {type(data).__name__}')
75
+ # Let xarray handle all the initialization complexity
76
+ super().__init__(*args, **kwargs)
94
77
 
95
- @staticmethod
96
- def _validate_timesteps(timesteps: pd.DatetimeIndex) -> pd.DatetimeIndex:
97
- """
98
- Validate and prepare time index.
78
+ # Add our metadata to attrs after initialization
79
+ if aggregation_group is not None:
80
+ self.attrs['aggregation_group'] = aggregation_group
81
+ if aggregation_weight is not None:
82
+ self.attrs['aggregation_weight'] = aggregation_weight
99
83
 
100
- Args:
101
- timesteps: The time index to validate
84
+ # Always mark as TimeSeriesData
85
+ self.attrs['__timeseries_data__'] = True
102
86
 
103
- Returns:
104
- Validated time index
105
- """
106
- if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0:
107
- raise ConversionError('Timesteps must be a non-empty DatetimeIndex')
87
+ def fit_to_coords(
88
+ self,
89
+ coords: dict[str, pd.Index],
90
+ name: str | None = None,
91
+ ) -> 'TimeSeriesData':
92
+ """Fit the data to the given coordinates. Returns a new TimeSeriesData object if the current coords are different."""
93
+ if self.coords.equals(xr.Coordinates(coords)):
94
+ return self
95
+
96
+ da = DataConverter.to_dataarray(self.data, coords=coords)
97
+ return self.__class__(
98
+ da,
99
+ aggregation_group=self.aggregation_group,
100
+ aggregation_weight=self.aggregation_weight,
101
+ name=name if name is not None else self.name,
102
+ )
108
103
 
109
- if not timesteps.name == 'time':
110
- raise ConversionError(f'Scenarios must be named "time", got "{timesteps.name}"')
104
+ @property
105
+ def aggregation_group(self) -> str | None:
106
+ return self.attrs.get('aggregation_group')
111
107
 
112
- return timesteps
108
+ @property
109
+ def aggregation_weight(self) -> float | None:
110
+ return self.attrs.get('aggregation_weight')
113
111
 
114
- @staticmethod
115
- def _validate_scenarios(scenarios: pd.Index) -> pd.Index:
116
- """
117
- Validate and prepare scenario index.
112
+ @classmethod
113
+ def from_dataarray(
114
+ cls, da: xr.DataArray, aggregation_group: str | None = None, aggregation_weight: float | None = None
115
+ ):
116
+ """Create TimeSeriesData from DataArray, extracting metadata from attrs."""
117
+ # Get aggregation metadata from attrs or parameters
118
+ final_aggregation_group = (
119
+ aggregation_group if aggregation_group is not None else da.attrs.get('aggregation_group')
120
+ )
121
+ final_aggregation_weight = (
122
+ aggregation_weight if aggregation_weight is not None else da.attrs.get('aggregation_weight')
123
+ )
118
124
 
119
- Args:
120
- scenarios: The scenario index to validate
121
- """
122
- if not isinstance(scenarios, pd.Index) or len(scenarios) == 0:
123
- raise ConversionError('Scenarios must be a non-empty Index')
125
+ return cls(da, aggregation_group=final_aggregation_group, aggregation_weight=final_aggregation_weight)
124
126
 
125
- if not scenarios.name == 'scenario':
126
- raise ConversionError(f'Scenarios must be named "scenario", got "{scenarios.name}"')
127
+ @classmethod
128
+ def is_timeseries_data(cls, obj) -> bool:
129
+ """Check if an object is TimeSeriesData."""
130
+ return isinstance(obj, xr.DataArray) and obj.attrs.get('__timeseries_data__', False)
127
131
 
128
- return scenarios
132
+ def __repr__(self):
133
+ agg_info = []
134
+ if self.aggregation_group:
135
+ agg_info.append(f"aggregation_group='{self.aggregation_group}'")
136
+ if self.aggregation_weight is not None:
137
+ agg_info.append(f'aggregation_weight={self.aggregation_weight}')
129
138
 
130
- @staticmethod
131
- def _prepare_dimensions(
132
- timesteps: Optional[pd.DatetimeIndex], scenarios: Optional[pd.Index]
133
- ) -> Tuple[Dict[str, pd.Index], Tuple[str, ...]]:
134
- """
135
- Prepare coordinates and dimensions for the DataArray.
139
+ info_str = f'TimeSeriesData({", ".join(agg_info)})' if agg_info else 'TimeSeriesData'
140
+ return f'{info_str}\n{super().__repr__()}'
136
141
 
137
- Args:
138
- timesteps: Optional time index
139
- scenarios: Optional scenario index
142
+ @property
143
+ def agg_group(self):
144
+ warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2)
145
+ return self.aggregation_group
140
146
 
141
- Returns:
142
- Tuple of (coordinates dict, dimensions tuple)
143
- """
144
- # Validate inputs if provided
145
- if timesteps is not None:
146
- timesteps = DataConverter._validate_timesteps(timesteps)
147
+ @property
148
+ def agg_weight(self):
149
+ warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2)
150
+ return self.aggregation_weight
147
151
 
148
- if scenarios is not None:
149
- scenarios = DataConverter._validate_scenarios(scenarios)
150
152
 
151
- # Build coordinates and dimensions
152
- coords = {}
153
- dims = []
153
+ TemporalDataUser = (
154
+ int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray | TimeSeriesData
155
+ )
156
+ """User data which might have a time dimension. Internally converted to an xr.DataArray with time dimension."""
154
157
 
155
- if timesteps is not None:
156
- coords['time'] = timesteps
157
- dims.append('time')
158
+ TemporalData = xr.DataArray | TimeSeriesData
159
+ """Internally used datatypes for temporal data (data with a time dimension)."""
158
160
 
159
- if scenarios is not None:
160
- coords['scenario'] = scenarios
161
- dims.append('scenario')
162
161
 
163
- return coords, tuple(dims)
162
+ class DataConverter:
163
+ """
164
+ Converts various data types into xarray.DataArray with specified target coordinates.
165
+
166
+ This converter handles intelligent dimension matching and broadcasting to ensure
167
+ the output DataArray always conforms to the specified coordinate structure.
168
+
169
+ Supported input types:
170
+ - Scalars: int, float, np.number (broadcast to all target dimensions)
171
+ - 1D data: np.ndarray, pd.Series, single-column DataFrame (matched by length/index)
172
+ - Multi-dimensional arrays: np.ndarray, DataFrame (matched by shape)
173
+ - xr.DataArray: validated and potentially broadcast to target dimensions
174
+
175
+ The converter uses smart matching strategies:
176
+ - Series: matched by exact index comparison
177
+ - 1D arrays: matched by length to target coordinates
178
+ - Multi-dimensional arrays: matched by shape permutation analysis
179
+ - DataArrays: validated for compatibility and broadcast as needed
180
+ """
164
181
 
165
182
  @staticmethod
166
- def _convert_scalar(
167
- data: Union[int, float, np.integer, np.floating], coords: Dict[str, pd.Index], dims: Tuple[str, ...]
183
+ def _match_series_by_index_alignment(
184
+ data: pd.Series, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
168
185
  ) -> xr.DataArray:
169
186
  """
170
- Convert a scalar value to a DataArray.
187
+ Match pandas Series to target dimension by exact index comparison.
188
+
189
+ Attempts to find a target dimension whose coordinates exactly match
190
+ the Series index values, ensuring proper alignment.
171
191
 
172
192
  Args:
173
- data: The scalar value
174
- coords: Coordinate dictionary
175
- dims: Dimension names
193
+ data: pandas Series to convert
194
+ target_coords: Available target coordinates {dim_name: coordinate_index}
195
+ target_dims: Target dimension names to consider for matching
176
196
 
177
197
  Returns:
178
- DataArray with the scalar value
179
- """
180
- if isinstance(data, (np.integer, np.floating)):
181
- data = data.item()
182
- return xr.DataArray(data, coords=coords, dims=dims)
198
+ DataArray with Series matched to the appropriate dimension
183
199
 
184
- @staticmethod
185
- def _convert_dataarray(data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
200
+ Raises:
201
+ ConversionError: If Series cannot be matched to any target dimension,
202
+ or if no target dimensions provided for multi-element Series
186
203
  """
187
- Convert an existing DataArray to desired dimensions.
188
-
189
- Args:
190
- data: The source DataArray
191
- coords: Target coordinates
192
- dims: Target dimensions
204
+ # Handle edge case: no target dimensions
205
+ if len(target_dims) == 0:
206
+ if len(data) != 1:
207
+ raise ConversionError(
208
+ f'Cannot convert multi-element Series without target dimensions. '
209
+ f'Series has {len(data)} elements but no target dimensions specified.'
210
+ )
211
+ return xr.DataArray(data.iloc[0])
193
212
 
194
- Returns:
195
- DataArray with the target dimensions
196
- """
197
- # No dimensions case
198
- if len(dims) == 0:
199
- if data.size != 1:
200
- raise ConversionError('When converting to dimensionless DataArray, source must be scalar')
201
- return xr.DataArray(data.values.item())
202
-
203
- # Check if data already has matching dimensions and coordinates
204
- if set(data.dims) == set(dims):
205
- # Check if coordinates match
206
- is_compatible = True
207
- for dim in dims:
208
- if dim in data.dims and not np.array_equal(data.coords[dim].values, coords[dim].values):
209
- is_compatible = False
210
- break
211
-
212
- if is_compatible:
213
- # Ensure dimensions are in the correct order
214
- if data.dims != dims:
215
- # Transpose to get dimensions in the right order
216
- return data.transpose(*dims).copy(deep=True)
217
- else:
218
- # Return existing DataArray if compatible and order is correct
219
- return data.copy(deep=True)
220
-
221
- # Handle dimension broadcasting
222
- if len(data.dims) == 1 and len(dims) == 2:
223
- # Single dimension to two dimensions
224
- if data.dims[0] == 'time' and 'scenario' in dims:
225
- # Broadcast time dimension to include scenarios
226
- return DataConverter._broadcast_time_to_scenarios(data, coords, dims)
227
-
228
- elif data.dims[0] == 'scenario' and 'time' in dims:
229
- # Broadcast scenario dimension to include time
230
- return DataConverter._broadcast_scenario_to_time(data, coords, dims)
213
+ # Attempt exact index matching with each target dimension
214
+ for dim_name in target_dims:
215
+ target_index = target_coords[dim_name]
216
+ if data.index.equals(target_index):
217
+ return xr.DataArray(data.values.copy(), coords={dim_name: target_index}, dims=dim_name)
231
218
 
219
+ # No exact matches found
220
+ available_lengths = {dim: len(target_coords[dim]) for dim in target_dims}
232
221
  raise ConversionError(
233
- f'Cannot convert {data.dims} to {dims}. Source coordinates: {data.coords}, Target coordinates: {coords}'
222
+ f'Series index does not match any target dimension coordinates. '
223
+ f'Series length: {len(data)}, available coordinate lengths: {available_lengths}'
234
224
  )
235
- @staticmethod
236
- def _broadcast_time_to_scenarios(
237
- data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
238
- ) -> xr.DataArray:
239
- """
240
- Broadcast a time-only DataArray to include scenarios.
241
-
242
- Args:
243
- data: The time-indexed DataArray
244
- coords: Target coordinates
245
- dims: Target dimensions
246
-
247
- Returns:
248
- DataArray with time and scenario dimensions
249
- """
250
- # Check compatibility
251
- if not np.array_equal(data.coords['time'].values, coords['time'].values):
252
- raise ConversionError("Source time coordinates don't match target time coordinates")
253
-
254
- if len(coords['scenario']) <= 1:
255
- return data.copy(deep=True)
256
-
257
- # Broadcast values
258
- values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
259
- return xr.DataArray(values.copy(), coords=coords, dims=dims)
260
225
 
261
226
  @staticmethod
262
- def _broadcast_scenario_to_time(
263
- data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
227
+ def _match_1d_array_by_length(
228
+ data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
264
229
  ) -> xr.DataArray:
265
230
  """
266
- Broadcast a scenario-only DataArray to include time.
231
+ Match 1D numpy array to target dimension by length comparison.
232
+
233
+ Finds target dimensions whose coordinate length matches the array length.
234
+ Requires unique length match to avoid ambiguity.
267
235
 
268
236
  Args:
269
- data: The scenario-indexed DataArray
270
- coords: Target coordinates
271
- dims: Target dimensions
237
+ data: 1D numpy array to convert
238
+ target_coords: Available target coordinates {dim_name: coordinate_index}
239
+ target_dims: Target dimension names to consider for matching
272
240
 
273
241
  Returns:
274
- DataArray with time and scenario dimensions
275
- """
276
- # Check compatibility
277
- if not np.array_equal(data.coords['scenario'].values, coords['scenario'].values):
278
- raise ConversionError("Source scenario coordinates don't match target scenario coordinates")
279
-
280
- # Broadcast values
281
- values = np.repeat(data.values[:, np.newaxis], len(coords['time']), axis=1).T
282
- return xr.DataArray(values.copy(), coords=coords, dims=dims)
242
+ DataArray with array matched to the uniquely identified dimension
283
243
 
284
- @staticmethod
285
- def _convert_ndarray(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
244
+ Raises:
245
+ ConversionError: If array length matches zero or multiple target dimensions,
246
+ or if no target dimensions provided for multi-element array
286
247
  """
287
- Convert a NumPy array to a DataArray.
288
-
289
- Args:
290
- data: The NumPy array
291
- coords: Target coordinates
292
- dims: Target dimensions
248
+ # Handle edge case: no target dimensions
249
+ if len(target_dims) == 0:
250
+ if len(data) != 1:
251
+ raise ConversionError(
252
+ f'Cannot convert multi-element array without target dimensions. Array has {len(data)} elements.'
253
+ )
254
+ return xr.DataArray(data[0])
293
255
 
294
- Returns:
295
- DataArray from the NumPy array
296
- """
297
- # Handle dimensionless case
298
- if len(dims) == 0:
299
- if data.size != 1:
300
- raise ConversionError('Without dimensions, can only convert scalar arrays')
301
- return xr.DataArray(data.item())
256
+ # Find all dimensions with matching lengths
257
+ array_length = len(data)
258
+ matching_dims = []
259
+ coordinate_lengths = {}
302
260
 
303
- # Handle single dimension
304
- elif len(dims) == 1:
305
- return DataConverter._convert_ndarray_single_dim(data, coords, dims)
261
+ for dim_name in target_dims:
262
+ coord_length = len(target_coords[dim_name])
263
+ coordinate_lengths[dim_name] = coord_length
264
+ if array_length == coord_length:
265
+ matching_dims.append(dim_name)
306
266
 
307
- # Handle two dimensions
308
- elif len(dims) == 2:
309
- return DataConverter._convert_ndarray_two_dims(data, coords, dims)
267
+ # Validate matching results
268
+ if len(matching_dims) == 0:
269
+ raise ConversionError(
270
+ f'Array length {array_length} does not match any target dimension lengths: {coordinate_lengths}'
271
+ )
272
+ elif len(matching_dims) > 1:
273
+ raise ConversionError(
274
+ f'Array length {array_length} matches multiple dimensions: {matching_dims}. '
275
+ f'Cannot uniquely determine target dimension. Consider using explicit '
276
+ f'dimension specification or converting to DataArray manually.'
277
+ )
310
278
 
311
- else:
312
- raise ConversionError('Maximum 2 dimensions supported')
279
+ # Create DataArray with the uniquely matched dimension
280
+ matched_dim = matching_dims[0]
281
+ return xr.DataArray(data.copy(), coords={matched_dim: target_coords[matched_dim]}, dims=matched_dim)
313
282
 
314
283
  @staticmethod
315
- def _convert_ndarray_single_dim(
316
- data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]
284
+ def _match_multidim_array_by_shape_permutation(
285
+ data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
317
286
  ) -> xr.DataArray:
318
287
  """
319
- Convert a NumPy array to a single-dimension DataArray.
288
+ Match multi-dimensional array to target dimensions using shape permutation analysis.
320
289
 
321
- Args:
322
- data: The NumPy array
323
- coords: Target coordinates
324
- dims: Target dimensions (length 1)
325
-
326
- Returns:
327
- DataArray with single dimension
328
- """
329
- dim_name = dims[0]
330
- dim_length = len(coords[dim_name])
331
-
332
- if data.ndim == 1:
333
- # 1D array must match dimension length
334
- if data.shape[0] != dim_length:
335
- raise ConversionError(f"Array length {data.shape[0]} doesn't match {dim_name} length {dim_length}")
336
- return xr.DataArray(data, coords=coords, dims=dims)
337
- else:
338
- raise ConversionError(f'Expected 1D array for single dimension, got {data.ndim}D')
339
-
340
- @staticmethod
341
- def _convert_ndarray_two_dims(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
342
- """
343
- Convert a NumPy array to a two-dimension DataArray.
290
+ Analyzes all possible mappings between array shape and target coordinate lengths
291
+ to find the unique valid dimension assignment.
344
292
 
345
293
  Args:
346
- data: The NumPy array
347
- coords: Target coordinates
348
- dims: Target dimensions (length 2)
294
+ data: Multi-dimensional numpy array to convert
295
+ target_coords: Available target coordinates {dim_name: coordinate_index}
296
+ target_dims: Target dimension names to consider for matching
349
297
 
350
298
  Returns:
351
- DataArray with two dimensions
352
- """
353
- scenario_length = len(coords['scenario'])
354
- time_length = len(coords['time'])
355
-
356
- if data.ndim == 1:
357
- # For 1D array, create 2D array based on which dimension it matches
358
- if data.shape[0] == time_length:
359
- # Broadcast across scenarios
360
- values = np.repeat(data[:, np.newaxis], scenario_length, axis=1)
361
- return xr.DataArray(values, coords=coords, dims=dims)
362
- elif data.shape[0] == scenario_length:
363
- # Broadcast across time
364
- values = np.repeat(data[np.newaxis, :], time_length, axis=0)
365
- return xr.DataArray(values, coords=coords, dims=dims)
366
- else:
367
- raise ConversionError(f"1D array length {data.shape[0]} doesn't match either dimension")
368
-
369
- elif data.ndim == 2:
370
- # For 2D array, shape must match dimensions
371
- expected_shape = (time_length, scenario_length)
372
- if data.shape != expected_shape:
373
- raise ConversionError(f"2D array shape {data.shape} doesn't match expected shape {expected_shape}")
374
- return xr.DataArray(data, coords=coords, dims=dims)
375
-
376
- else:
377
- raise ConversionError(f'Expected 1D or 2D array for two dimensions, got {data.ndim}D')
378
-
379
- @staticmethod
380
- def _convert_series(data: pd.Series, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
381
- """
382
- Convert pandas Series to xarray DataArray.
383
-
384
- Args:
385
- data: pandas Series to convert
386
- coords: Target coordinates
387
- dims: Target dimensions
299
+ DataArray with array dimensions mapped to target dimensions by shape
388
300
 
389
- Returns:
390
- DataArray from the pandas Series
301
+ Raises:
302
+ ConversionError: If array shape cannot be uniquely mapped to target dimensions,
303
+ or if no target dimensions provided for multi-element array
391
304
  """
392
- # Handle single dimension case
393
- if len(dims) == 1:
394
- dim_name = dims[0]
395
-
396
- # Check if series index matches the dimension
397
- if data.index.equals(coords[dim_name]):
398
- return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
399
- else:
400
- raise ConversionError(
401
- f"Series index doesn't match {dim_name} coordinates.\n"
402
- f'Series index: {data.index}\n'
403
- f'Target {dim_name} coordinates: {coords[dim_name]}'
404
- )
405
-
406
- # Handle two dimensions case
407
- elif len(dims) == 2:
408
- # Check if dimensions are time and scenario
409
- if dims != ('time', 'scenario'):
410
- raise ConversionError(
411
- f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}'
412
- )
413
-
414
- # Case 1: Series is indexed by time
415
- if data.index.equals(coords['time']):
416
- # Broadcast across scenarios
417
- values = np.repeat(data.values[:, np.newaxis], len(coords['scenario']), axis=1)
418
- return xr.DataArray(values.copy(), coords=coords, dims=dims)
419
-
420
- # Case 2: Series is indexed by scenario
421
- elif data.index.equals(coords['scenario']):
422
- # Broadcast across time
423
- values = np.repeat(data.values[np.newaxis, :], len(coords['time']), axis=0)
424
- return xr.DataArray(values.copy(), coords=coords, dims=dims)
425
-
426
- else:
305
+ # Handle edge case: no target dimensions
306
+ if len(target_dims) == 0:
307
+ if data.size != 1:
427
308
  raise ConversionError(
428
- "Series index must match either 'time' or 'scenario' coordinates.\n"
429
- f'Series index: {data.index}\n'
430
- f'Target time coordinates: {coords["time"]}\n'
431
- f'Target scenario coordinates: {coords["scenario"]}'
309
+ f'Cannot convert multi-element array without target dimensions. '
310
+ f'Array has {data.size} elements with shape {data.shape}.'
432
311
  )
312
+ return xr.DataArray(data.item())
433
313
 
434
- else:
435
- raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}')
436
-
437
- @staticmethod
438
- def _convert_dataframe(data: pd.DataFrame, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray:
439
- """
440
- Convert pandas DataFrame to xarray DataArray.
441
- Only allows time as index and scenarios as columns.
442
-
443
- Args:
444
- data: pandas DataFrame to convert
445
- coords: Target coordinates
446
- dims: Target dimensions
314
+ array_shape = data.shape
315
+ coordinate_lengths = {dim: len(target_coords[dim]) for dim in target_dims}
447
316
 
448
- Returns:
449
- DataArray from the pandas DataFrame
450
- """
451
- # Single dimension case
452
- if len(dims) == 1:
453
- # If DataFrame has one column, treat it like a Series
454
- if len(data.columns) == 1:
455
- series = data.iloc[:, 0]
456
- return DataConverter._convert_series(series, coords, dims)
317
+ # Find all valid dimension permutations that match the array shape
318
+ valid_mappings = []
319
+ for dim_permutation in permutations(target_dims, data.ndim):
320
+ shape_matches = all(
321
+ array_shape[i] == coordinate_lengths[dim_permutation[i]] for i in range(len(dim_permutation))
322
+ )
323
+ if shape_matches:
324
+ valid_mappings.append(dim_permutation)
457
325
 
326
+ # Validate mapping results
327
+ if len(valid_mappings) == 0:
458
328
  raise ConversionError(
459
- f'When converting DataFrame to single-dimension DataArray, DataFrame must have exactly one column, got {len(data.columns)}'
329
+ f'Array shape {array_shape} cannot be mapped to any combination of target '
330
+ f'coordinate lengths: {coordinate_lengths}. Consider reshaping the array '
331
+ f'or adjusting target coordinates.'
460
332
  )
461
333
 
462
- # Two dimensions case
463
- elif len(dims) == 2:
464
- # Check if dimensions are time and scenario
465
- if dims != ('time', 'scenario'):
466
- raise ConversionError(
467
- f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}'
468
- )
469
-
470
- # DataFrame must have time as index and scenarios as columns
471
- if data.index.equals(coords['time']) and data.columns.equals(coords['scenario']):
472
- # Create DataArray with proper dimension order
473
- return xr.DataArray(data.values.copy(), coords=coords, dims=dims)
474
- else:
475
- raise ConversionError(
476
- 'DataFrame must have time as index and scenarios as columns.\n'
477
- f'DataFrame index: {data.index}\n'
478
- f'DataFrame columns: {data.columns}\n'
479
- f'Target time coordinates: {coords["time"]}\n'
480
- f'Target scenario coordinates: {coords["scenario"]}'
481
- )
482
-
483
- else:
484
- raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}')
485
-
486
-
487
- class TimeSeriesData:
488
- # TODO: Move to Interface.py
489
- def __init__(self, data: TimestepData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None):
490
- """
491
- timeseries class for transmit timeseries AND special characteristics of timeseries,
492
- i.g. to define weights needed in calculation_type 'aggregated'
493
- EXAMPLE solar:
494
- you have several solar timeseries. These should not be overweighted
495
- compared to the remaining timeseries (i.g. heat load, price)!
496
- fixed_relative_profile_solar1 = TimeSeriesData(sol_array_1, type = 'solar')
497
- fixed_relative_profile_solar2 = TimeSeriesData(sol_array_2, type = 'solar')
498
- fixed_relative_profile_solar3 = TimeSeriesData(sol_array_3, type = 'solar')
499
- --> this 3 series of same type share one weight, i.e. internally assigned each weight = 1/3
500
- (instead of standard weight = 1)
501
-
502
- Args:
503
- data: The timeseries data, which can be a scalar, array, or numpy array.
504
- agg_group: The group this TimeSeriesData is a part of. agg_weight is split between members of a group. Default is None.
505
- agg_weight: The weight for calculation_type 'aggregated', should be between 0 and 1. Default is None.
506
-
507
- Raises:
508
- Exception: If both agg_group and agg_weight are set, an exception is raised.
509
- """
510
- self.data = data
511
- self.agg_group = agg_group
512
- self.agg_weight = agg_weight
513
- if (agg_group is not None) and (agg_weight is not None):
514
- raise ValueError('Either <agg_group> or explicit <agg_weigth> can be used. Not both!')
515
- self.label: Optional[str] = None
516
-
517
- def __repr__(self):
518
- # Get the constructor arguments and their current values
519
- init_signature = inspect.signature(self.__init__)
520
- init_args = init_signature.parameters
521
-
522
- # Create a dictionary with argument names and their values
523
- args_str = ', '.join(f'{name}={repr(getattr(self, name, None))}' for name in init_args if name != 'self')
524
- return f'{self.__class__.__name__}({args_str})'
525
-
526
- def __str__(self):
527
- return str(self.data)
528
-
529
-
530
- class TimeSeries:
531
- """
532
- A class representing time series data with active and stored states.
533
-
534
- TimeSeries provides a way to store time-indexed data and work with temporal subsets.
535
- It supports arithmetic operations, aggregation, and JSON serialization.
334
+ if len(valid_mappings) > 1:
335
+ raise ConversionError(
336
+ f'Array shape {array_shape} matches multiple dimension combinations: '
337
+ f'{valid_mappings}. Cannot uniquely determine dimension mapping. '
338
+ f'Consider using explicit dimension specification.'
339
+ )
536
340
 
537
- Attributes:
538
- name (str): The name of the time series
539
- aggregation_weight (Optional[float]): Weight used for aggregation
540
- aggregation_group (Optional[str]): Group name for shared aggregation weighting
541
- has_extra_timestep (bool): Whether this series needs an extra timestep
542
- """
341
+ # Create DataArray with the uniquely determined mapping
342
+ matched_dims = valid_mappings[0]
343
+ matched_coords = {dim: target_coords[dim] for dim in matched_dims}
543
344
 
544
- @classmethod
545
- def from_datasource(
546
- cls,
547
- data: NumericDataTS,
548
- name: str,
549
- timesteps: pd.DatetimeIndex,
550
- scenarios: Optional[pd.Index] = None,
551
- aggregation_weight: Optional[float] = None,
552
- aggregation_group: Optional[str] = None,
553
- has_extra_timestep: bool = False,
554
- ) -> 'TimeSeries':
555
- """
556
- Initialize the TimeSeries from multiple data sources.
345
+ return xr.DataArray(data.copy(), coords=matched_coords, dims=matched_dims)
557
346
 
558
- Args:
559
- data: The time series data
560
- name: The name of the TimeSeries
561
- timesteps: The timesteps of the TimeSeries
562
- scenarios: The scenarios of the TimeSeries
563
- aggregation_weight: The weight in aggregation calculations
564
- aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing
565
- has_extra_timestep: Whether this series requires an extra timestep
566
-
567
- Returns:
568
- A new TimeSeries instance
347
+ @staticmethod
348
+ def _broadcast_dataarray_to_target_specification(
349
+ source_data: xr.DataArray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...]
350
+ ) -> xr.DataArray:
569
351
  """
570
- return cls(
571
- DataConverter.as_dataarray(data, timesteps, scenarios),
572
- name,
573
- aggregation_weight,
574
- aggregation_group,
575
- has_extra_timestep,
576
- )
352
+ Broadcast DataArray to conform to target coordinate and dimension specification.
577
353
 
578
- @classmethod
579
- def from_json(cls, data: Optional[Dict[str, Any]] = None, path: Optional[str] = None) -> 'TimeSeries':
580
- """
581
- Load a TimeSeries from a dictionary or json file.
354
+ Performs comprehensive validation and broadcasting to ensure the result exactly
355
+ matches the target specification. Handles scalar expansion, dimension validation,
356
+ coordinate compatibility checking, and broadcasting to additional dimensions.
582
357
 
583
358
  Args:
584
- data: Dictionary containing TimeSeries data
585
- path: Path to a JSON file containing TimeSeries data
359
+ source_data: Source DataArray to broadcast
360
+ target_coords: Target coordinates {dim_name: coordinate_index}
361
+ target_dims: Target dimension names in desired order
586
362
 
587
363
  Returns:
588
- A new TimeSeries instance
364
+ DataArray broadcast to target specification with proper dimension ordering
589
365
 
590
366
  Raises:
591
- ValueError: If both path and data are provided or neither is provided
592
- """
593
- if (path is None and data is None) or (path is not None and data is not None):
594
- raise ValueError("Exactly one of 'path' or 'data' must be provided")
595
-
596
- if path is not None:
597
- with open(path, 'r') as f:
598
- data = json.load(f)
599
-
600
- # Convert ISO date strings to datetime objects
601
- data['data']['coords']['time']['data'] = pd.to_datetime(data['data']['coords']['time']['data'])
602
-
603
- # Create the TimeSeries instance
604
- return cls(
605
- data=xr.DataArray.from_dict(data['data']),
606
- name=data['name'],
607
- aggregation_weight=data['aggregation_weight'],
608
- aggregation_group=data['aggregation_group'],
609
- has_extra_timestep=data['has_extra_timestep'],
610
- )
611
-
612
- def __init__(
613
- self,
614
- data: xr.DataArray,
615
- name: str,
616
- aggregation_weight: Optional[float] = None,
617
- aggregation_group: Optional[str] = None,
618
- has_extra_timestep: bool = False,
619
- ):
367
+ ConversionError: If broadcasting is impossible due to incompatible dimensions
368
+ or coordinate mismatches
620
369
  """
621
- Initialize a TimeSeries with a DataArray.
622
-
623
- Args:
624
- data: The DataArray containing time series data
625
- name: The name of the TimeSeries
626
- aggregation_weight: The weight in aggregation calculations
627
- aggregation_group: Group this TimeSeries belongs to for weight sharing
628
- has_extra_timestep: Whether this series requires an extra timestep
629
-
630
- Raises:
631
- ValueError: If data has unsupported dimensions
632
- """
633
- allowed_dims = {'time', 'scenario'}
634
- if not set(data.dims).issubset(allowed_dims):
635
- raise ValueError(f'DataArray dimensions must be subset of {allowed_dims}. Got {data.dims}')
370
+ # Validate: cannot reduce dimensions
371
+ if len(source_data.dims) > len(target_dims):
372
+ raise ConversionError(
373
+ f'Cannot reduce DataArray dimensionality from {len(source_data.dims)} '
374
+ f'to {len(target_dims)} dimensions. Source dims: {source_data.dims}, '
375
+ f'target dims: {target_dims}'
376
+ )
636
377
 
637
- self.name = name
638
- self.aggregation_weight = aggregation_weight
639
- self.aggregation_group = aggregation_group
640
- self.has_extra_timestep = has_extra_timestep
378
+ # Validate: all source dimensions must exist in target
379
+ missing_dims = set(source_data.dims) - set(target_dims)
380
+ if missing_dims:
381
+ raise ConversionError(
382
+ f'Source DataArray has dimensions {missing_dims} not present in target dimensions {target_dims}'
383
+ )
641
384
 
642
- # Data management
643
- self._stored_data = data.copy(deep=True)
644
- self._backup = self._stored_data.copy(deep=True)
385
+ # Validate: coordinate compatibility for overlapping dimensions
386
+ for dim in source_data.dims:
387
+ if dim in source_data.coords and dim in target_coords:
388
+ source_coords = source_data.coords[dim]
389
+ target_coords_for_dim = target_coords[dim]
645
390
 
646
- # Selection state
647
- self._selected_timesteps: Optional[pd.DatetimeIndex] = None
648
- self._selected_scenarios: Optional[pd.Index] = None
391
+ if not np.array_equal(source_coords.values, target_coords_for_dim.values):
392
+ raise ConversionError(
393
+ f'Coordinate mismatch for dimension "{dim}". '
394
+ f'Source and target coordinates have different values.'
395
+ )
649
396
 
650
- # Flag for whether this series has various dimensions
651
- self.has_time_dim = 'time' in data.dims
652
- self.has_scenario_dim = 'scenario' in data.dims
397
+ # Create target template for broadcasting
398
+ target_shape = [len(target_coords[dim]) for dim in target_dims]
399
+ target_template = xr.DataArray(np.empty(target_shape), coords=target_coords, dims=target_dims)
653
400
 
654
- def reset(self) -> None:
655
- """
656
- Reset selections to include all timesteps and scenarios.
657
- This is equivalent to clearing all selections.
658
- """
659
- self.set_selection(None, None)
401
+ # Perform broadcasting and ensure proper dimension ordering
402
+ broadcasted = source_data.broadcast_like(target_template)
403
+ return broadcasted.transpose(*target_dims)
660
404
 
661
- def restore_data(self) -> None:
662
- """
663
- Restore stored_data from the backup and reset active timesteps.
405
+ @classmethod
406
+ def to_dataarray(
407
+ cls,
408
+ data: int
409
+ | float
410
+ | bool
411
+ | np.integer
412
+ | np.floating
413
+ | np.bool_
414
+ | np.ndarray
415
+ | pd.Series
416
+ | pd.DataFrame
417
+ | xr.DataArray,
418
+ coords: dict[str, pd.Index] | None = None,
419
+ ) -> xr.DataArray:
664
420
  """
665
- self._stored_data = self._backup.copy(deep=True)
666
- self.reset()
421
+ Convert various data types to xarray.DataArray with specified target coordinates.
667
422
 
668
- def to_json(self, path: Optional[pathlib.Path] = None) -> Dict[str, Any]:
669
- """
670
- Save the TimeSeries to a dictionary or JSON file.
423
+ This is the main conversion method that intelligently handles different input types
424
+ and ensures the result conforms to the specified coordinate structure through
425
+ smart dimension matching and broadcasting.
671
426
 
672
427
  Args:
673
- path: Optional path to save JSON file
674
-
675
- Returns:
676
- Dictionary representation of the TimeSeries
677
- """
678
- data = {
679
- 'name': self.name,
680
- 'aggregation_weight': self.aggregation_weight,
681
- 'aggregation_group': self.aggregation_group,
682
- 'has_extra_timestep': self.has_extra_timestep,
683
- 'data': self.selected_data.to_dict(),
684
- }
685
-
686
- # Convert datetime objects to ISO strings
687
- data['data']['coords']['time']['data'] = [date.isoformat() for date in data['data']['coords']['time']['data']]
688
-
689
- # Save to file if path is provided
690
- if path is not None:
691
- indent = 4 if len(self.selected_timesteps) <= 480 else None
692
- with open(path, 'w', encoding='utf-8') as f:
693
- json.dump(data, f, indent=indent, ensure_ascii=False)
694
-
695
- return data
696
-
697
- @property
698
- def stats(self) -> str:
699
- """
700
- Return a statistical summary of the active data.
428
+ data: Input data to convert. Supported types:
429
+ - Scalars: int, float, bool, np.integer, np.floating, np.bool_
430
+ - Arrays: np.ndarray (1D and multi-dimensional)
431
+ - Pandas: pd.Series, pd.DataFrame
432
+ - xarray: xr.DataArray
433
+ coords: Target coordinate specification as {dimension_name: coordinate_index}.
434
+ All coordinate indices must be pandas.Index objects.
701
435
 
702
436
  Returns:
703
- String representation of data statistics
704
- """
705
- return get_numeric_stats(self.selected_data, padd=0, by_scenario=True)
437
+ DataArray conforming to the target coordinate specification,
438
+ with input data appropriately matched and broadcast
706
439
 
707
- @property
708
- def all_equal(self) -> bool:
709
- """Check if all values in the series are equal."""
710
- return np.unique(self.selected_data.values).size == 1
711
-
712
- @property
713
- def selected_data(self) -> xr.DataArray:
714
- """
715
- Get a view of stored_data based on current selections.
716
- This computes the view dynamically based on the current selection state.
717
- """
718
- return self._stored_data.sel(**self._valid_selector)
719
-
720
- @property
721
- def selected_timesteps(self) -> Optional[pd.DatetimeIndex]:
722
- """Get the current active timesteps, or None if no time dimension."""
723
- if not self.has_time_dim:
724
- return None
725
- if self._selected_timesteps is None:
726
- return self._stored_data.indexes['time']
727
- return self._selected_timesteps
728
-
729
- @property
730
- def active_scenarios(self) -> Optional[pd.Index]:
731
- """Get the current active scenarios, or None if no scenario dimension."""
732
- if not self.has_scenario_dim:
733
- return None
734
- if self._selected_scenarios is None:
735
- return self._stored_data.indexes['scenario']
736
- return self._selected_scenarios
440
+ Raises:
441
+ ConversionError: If data type is unsupported, conversion fails,
442
+ or broadcasting to target coordinates is impossible
737
443
 
738
- @property
739
- def stored_data(self) -> xr.DataArray:
740
- """Get a copy of the full stored data."""
741
- return self._stored_data.copy()
444
+ Examples:
445
+ # Scalar broadcasting
446
+ >>> coords = {'x': pd.Index([1, 2, 3]), 'y': pd.Index(['a', 'b'])}
447
+ >>> converter.to_dataarray(42, coords)
448
+ # Returns: DataArray with shape (3, 2), all values = 42
742
449
 
743
- def update_stored_data(self, value: xr.DataArray) -> None:
744
- """
745
- Update stored_data and refresh selected_data.
450
+ # Series index matching
451
+ >>> series = pd.Series([10, 20, 30], index=[1, 2, 3])
452
+ >>> converter.to_dataarray(series, coords)
453
+ # Returns: DataArray matched to 'x' dimension, broadcast to 'y'
746
454
 
747
- Args:
748
- value: New data to store
455
+ # Array shape matching
456
+ >>> array = np.array([[1, 2], [3, 4], [5, 6]]) # Shape (3, 2)
457
+ >>> converter.to_dataarray(array, coords)
458
+ # Returns: DataArray with dimensions ('x', 'y') based on shape
749
459
  """
750
- new_data = DataConverter.as_dataarray(
751
- value,
752
- timesteps=self.selected_timesteps if self.has_time_dim else None,
753
- scenarios=self.active_scenarios if self.has_scenario_dim else None,
754
- )
460
+ # Prepare and validate target specification
461
+ if coords is None:
462
+ coords = {}
755
463
 
756
- # Skip if data is unchanged to avoid overwriting backup
757
- if new_data.equals(self._stored_data):
758
- return
464
+ validated_coords, target_dims = cls._validate_and_prepare_target_coordinates(coords)
759
465
 
760
- self._stored_data = new_data
761
- self.set_selection(None, None) # Reset selections to full dataset
466
+ # Convert input data to intermediate DataArray based on type
467
+ if isinstance(data, (int, float, bool, np.integer, np.floating, np.bool_)):
468
+ # Scalar values - create scalar DataArray
469
+ intermediate = xr.DataArray(data.item() if hasattr(data, 'item') else data)
762
470
 
763
- def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None:
764
- """
765
- Set active subset for timesteps and scenarios.
766
-
767
- Args:
768
- timesteps: Timesteps to activate, or None to clear. Ignored if series has no time dimension.
769
- scenarios: Scenarios to activate, or None to clear. Ignored if series has no scenario dimension.
770
- """
771
- # Only update timesteps if the series has time dimension
772
- if self.has_time_dim:
773
- if timesteps is None or timesteps.equals(self._stored_data.indexes['time']):
774
- self._selected_timesteps = None
775
- else:
776
- self._selected_timesteps = timesteps
777
-
778
- # Only update scenarios if the series has scenario dimension
779
- if self.has_scenario_dim:
780
- if scenarios is None or scenarios.equals(self._stored_data.indexes['scenario']):
781
- self._selected_scenarios = None
471
+ elif isinstance(data, np.ndarray):
472
+ # NumPy arrays - dispatch based on dimensionality
473
+ if data.ndim == 0:
474
+ # 0-dimensional array (scalar)
475
+ intermediate = xr.DataArray(data.item())
476
+ elif data.ndim == 1:
477
+ # 1-dimensional array
478
+ intermediate = cls._match_1d_array_by_length(data, validated_coords, target_dims)
782
479
  else:
783
- self._selected_scenarios = scenarios
784
-
785
- @property
786
- def sel(self):
787
- """Direct access to the selected_data's sel method for convenience."""
788
- return self.selected_data.sel
789
-
790
- @property
791
- def isel(self):
792
- """Direct access to the selected_data's isel method for convenience."""
793
- return self.selected_data.isel
794
-
795
- @property
796
- def _valid_selector(self) -> Dict[str, pd.Index]:
797
- """Get the current selection as a dictionary."""
798
- selector = {}
799
-
800
- # Only include time in selector if series has time dimension
801
- if self.has_time_dim and self._selected_timesteps is not None:
802
- selector['time'] = self._selected_timesteps
803
-
804
- # Only include scenario in selector if series has scenario dimension
805
- if self.has_scenario_dim and self._selected_scenarios is not None:
806
- selector['scenario'] = self._selected_scenarios
807
-
808
- return selector
809
-
810
- def _apply_operation(self, other, op):
811
- """Apply an operation between this TimeSeries and another object."""
812
- if isinstance(other, TimeSeries):
813
- other = other.selected_data
814
- return op(self.selected_data, other)
815
-
816
- def __add__(self, other):
817
- return self._apply_operation(other, lambda x, y: x + y)
818
-
819
- def __sub__(self, other):
820
- return self._apply_operation(other, lambda x, y: x - y)
821
-
822
- def __mul__(self, other):
823
- return self._apply_operation(other, lambda x, y: x * y)
824
-
825
- def __truediv__(self, other):
826
- return self._apply_operation(other, lambda x, y: x / y)
827
-
828
- def __radd__(self, other):
829
- return other + self.selected_data
830
-
831
- def __rsub__(self, other):
832
- return other - self.selected_data
833
-
834
- def __rmul__(self, other):
835
- return other * self.selected_data
836
-
837
- def __rtruediv__(self, other):
838
- return other / self.selected_data
839
-
840
- def __neg__(self) -> xr.DataArray:
841
- return -self.selected_data
842
-
843
- def __pos__(self) -> xr.DataArray:
844
- return +self.selected_data
845
-
846
- def __abs__(self) -> xr.DataArray:
847
- return abs(self.selected_data)
848
-
849
- def __gt__(self, other):
850
- """
851
- Compare if this TimeSeries is greater than another.
852
-
853
- Args:
854
- other: Another TimeSeries to compare with
855
-
856
- Returns:
857
- True if all values in this TimeSeries are greater than other
858
- """
859
- if isinstance(other, TimeSeries):
860
- return self.selected_data > other.selected_data
861
- return self.selected_data > other
862
-
863
- def __ge__(self, other):
864
- """
865
- Compare if this TimeSeries is greater than or equal to another.
866
-
867
- Args:
868
- other: Another TimeSeries to compare with
869
-
870
- Returns:
871
- True if all values in this TimeSeries are greater than or equal to other
872
- """
873
- if isinstance(other, TimeSeries):
874
- return self.selected_data >= other.selected_data
875
- return self.selected_data >= other
876
-
877
- def __lt__(self, other):
878
- """
879
- Compare if this TimeSeries is less than another.
880
-
881
- Args:
882
- other: Another TimeSeries to compare with
883
-
884
- Returns:
885
- True if all values in this TimeSeries are less than other
886
- """
887
- if isinstance(other, TimeSeries):
888
- return self.selected_data < other.selected_data
889
- return self.selected_data < other
890
-
891
- def __le__(self, other):
892
- """
893
- Compare if this TimeSeries is less than or equal to another.
894
-
895
- Args:
896
- other: Another TimeSeries to compare with
897
-
898
- Returns:
899
- True if all values in this TimeSeries are less than or equal to other
900
- """
901
- if isinstance(other, TimeSeries):
902
- return self.selected_data <= other.selected_data
903
- return self.selected_data <= other
904
-
905
- def __eq__(self, other):
906
- """
907
- Compare if this TimeSeries is equal to another.
908
-
909
- Args:
910
- other: Another TimeSeries to compare with
911
-
912
- Returns:
913
- True if all values in this TimeSeries are equal to other
914
- """
915
- if isinstance(other, TimeSeries):
916
- return self.selected_data == other.selected_data
917
- return self.selected_data == other
918
-
919
- def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
920
- """
921
- Handle NumPy universal functions.
922
-
923
- This allows NumPy functions to work with TimeSeries objects.
924
- """
925
- # Convert any TimeSeries inputs to their selected_data
926
- inputs = [x.selected_data if isinstance(x, TimeSeries) else x for x in inputs]
927
- return getattr(ufunc, method)(*inputs, **kwargs)
928
-
929
- def __repr__(self):
930
- """
931
- Get a string representation of the TimeSeries.
932
-
933
- Returns:
934
- String showing TimeSeries details
935
- """
936
- attrs = {
937
- 'name': self.name,
938
- 'aggregation_weight': self.aggregation_weight,
939
- 'aggregation_group': self.aggregation_group,
940
- 'has_extra_timestep': self.has_extra_timestep,
941
- 'shape': self.selected_data.shape,
942
- }
943
-
944
- attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items())
945
- return f'TimeSeries({attr_str})'
946
-
947
- def __str__(self):
948
- """
949
- Get a human-readable string representation.
950
-
951
- Returns:
952
- Descriptive string with statistics
953
- """
954
- return f'TimeSeries "{self.name}":\n{textwrap.indent(self.stats, " ")}'
955
-
956
-
957
- class TimeSeriesCollection:
958
- """
959
- Simplified central manager for time series data with reference tracking.
960
-
961
- Provides a way to store time series data and work with subsets of dimensions
962
- that automatically update all references when changed.
963
- """
964
-
965
- def __init__(
966
- self,
967
- timesteps: pd.DatetimeIndex,
968
- scenarios: Optional[pd.Index] = None,
969
- hours_of_last_timestep: Optional[float] = None,
970
- hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None,
971
- ):
972
- """Initialize a TimeSeriesCollection."""
973
- self._full_timesteps = self._validate_timesteps(timesteps)
974
- self._full_scenarios = self._validate_scenarios(scenarios)
975
-
976
- self._full_timesteps_extra = self._create_timesteps_with_extra(
977
- self._full_timesteps,
978
- self._calculate_hours_of_final_timestep(
979
- self._full_timesteps, hours_of_final_timestep=hours_of_last_timestep
980
- ),
981
- )
982
- self._full_hours_per_timestep = self.calculate_hours_per_timestep(
983
- self._full_timesteps_extra, self._full_scenarios
984
- )
985
-
986
- self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps(
987
- timesteps, hours_of_previous_timesteps
988
- ) # TODO: Make dynamic
989
-
990
- # Series that need extra timestep
991
- self._has_extra_timestep: set = set()
992
-
993
- # Storage for TimeSeries objects
994
- self._time_series: Dict[str, TimeSeries] = {}
995
-
996
- # Active subset selectors
997
- self._selected_timesteps: Optional[pd.DatetimeIndex] = None
998
- self._selected_scenarios: Optional[pd.Index] = None
999
- self._selected_timesteps_extra: Optional[pd.DatetimeIndex] = None
1000
- self._selected_hours_per_timestep: Optional[xr.DataArray] = None
1001
-
1002
- def add_time_series(
1003
- self,
1004
- name: str,
1005
- data: Union[NumericDataTS, TimeSeries],
1006
- has_time_dim: bool = True,
1007
- has_scenario_dim: bool = True,
1008
- aggregation_weight: Optional[float] = None,
1009
- aggregation_group: Optional[str] = None,
1010
- has_extra_timestep: bool = False,
1011
- ) -> TimeSeries:
1012
- """
1013
- Add a new TimeSeries to the allocator.
1014
-
1015
- Args:
1016
- name: Name of the time series
1017
- data: Data for the time series (can be raw data or an existing TimeSeries)
1018
- has_time_dim: Whether the TimeSeries has a time dimension
1019
- has_scenario_dim: Whether the TimeSeries has a scenario dimension
1020
- aggregation_weight: Weight used for aggregation
1021
- aggregation_group: Group name for shared aggregation weighting
1022
- has_extra_timestep: Whether this series needs an extra timestep
1023
-
1024
- Returns:
1025
- The created TimeSeries object
1026
- """
1027
- if name in self._time_series:
1028
- raise KeyError(f"TimeSeries '{name}' already exists in allocator")
1029
- if not has_time_dim and has_extra_timestep:
1030
- raise ValueError('A not time-indexed TimeSeries cannot have an extra timestep')
1031
-
1032
- # Choose which timesteps to use
1033
- if has_time_dim:
1034
- target_timesteps = self.timesteps_extra if has_extra_timestep else self.timesteps
1035
- else:
1036
- target_timesteps = None
1037
-
1038
- target_scenarios = self.scenarios if has_scenario_dim else None
1039
-
1040
- # Create or adapt the TimeSeries object
1041
- if isinstance(data, TimeSeries):
1042
- # Use the existing TimeSeries but update its parameters
1043
- time_series = data
1044
- # Update the stored data to use our timesteps and scenarios
1045
- data_array = DataConverter.as_dataarray(
1046
- time_series.stored_data, timesteps=target_timesteps, scenarios=target_scenarios
1047
- )
1048
- time_series = TimeSeries(
1049
- data=data_array,
1050
- name=name,
1051
- aggregation_weight=aggregation_weight or time_series.aggregation_weight,
1052
- aggregation_group=aggregation_group or time_series.aggregation_group,
1053
- has_extra_timestep=has_extra_timestep or time_series.has_extra_timestep,
1054
- )
1055
- else:
1056
- # Create a new TimeSeries from raw data
1057
- time_series = TimeSeries.from_datasource(
1058
- data=data,
1059
- name=name,
1060
- timesteps=target_timesteps,
1061
- scenarios=target_scenarios,
1062
- aggregation_weight=aggregation_weight,
1063
- aggregation_group=aggregation_group,
1064
- has_extra_timestep=has_extra_timestep,
1065
- )
1066
-
1067
- # Add to storage
1068
- self._time_series[name] = time_series
1069
-
1070
- # Track if it needs extra timestep
1071
- if has_extra_timestep:
1072
- self._has_extra_timestep.add(name)
480
+ # Multi-dimensional array
481
+ intermediate = cls._match_multidim_array_by_shape_permutation(data, validated_coords, target_dims)
1073
482
 
1074
- # Return the TimeSeries object
1075
- return time_series
1076
-
1077
- def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None:
1078
- """
1079
- Set active subset for timesteps and scenarios.
1080
-
1081
- Args:
1082
- timesteps: Timesteps to activate, or None to clear
1083
- scenarios: Scenarios to activate, or None to clear
1084
- """
1085
- if timesteps is None:
1086
- self._selected_timesteps = None
1087
- self._selected_timesteps_extra = None
1088
- else:
1089
- self._selected_timesteps = self._validate_timesteps(timesteps, self._full_timesteps)
1090
- self._selected_timesteps_extra = self._create_timesteps_with_extra(
1091
- timesteps, self._calculate_hours_of_final_timestep(timesteps, self._full_timesteps)
1092
- )
1093
-
1094
- if scenarios is None:
1095
- self._selected_scenarios = None
1096
- else:
1097
- self._selected_scenarios = self._validate_scenarios(scenarios, self._full_scenarios)
483
+ elif isinstance(data, pd.Series):
484
+ # Pandas Series - validate and match by index
485
+ if isinstance(data.index, pd.MultiIndex):
486
+ raise ConversionError('MultiIndex Series are not supported. Please use a single-level index.')
487
+ intermediate = cls._match_series_by_index_alignment(data, validated_coords, target_dims)
1098
488
 
1099
- self._selected_hours_per_timestep = self.calculate_hours_per_timestep(self.timesteps_extra, self.scenarios)
489
+ elif isinstance(data, pd.DataFrame):
490
+ # Pandas DataFrame - validate and convert
491
+ if isinstance(data.index, pd.MultiIndex):
492
+ raise ConversionError('MultiIndex DataFrames are not supported. Please use a single-level index.')
493
+ if len(data.columns) == 0 or data.empty:
494
+ raise ConversionError('DataFrame must have at least one column and cannot be empty.')
1100
495
 
1101
- # Apply the selection to all TimeSeries objects
1102
- for ts_name, ts in self._time_series.items():
1103
- if ts.has_time_dim:
1104
- timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps
496
+ if len(data.columns) == 1:
497
+ # Single-column DataFrame - treat as Series
498
+ series_data = data.iloc[:, 0]
499
+ intermediate = cls._match_series_by_index_alignment(series_data, validated_coords, target_dims)
1105
500
  else:
1106
- timesteps = None
1107
-
1108
- ts.set_selection(timesteps=timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None)
1109
- self._propagate_selection_to_time_series()
501
+ # Multi-column DataFrame - treat as multi-dimensional array
502
+ intermediate = cls._match_multidim_array_by_shape_permutation(
503
+ data.to_numpy(), validated_coords, target_dims
504
+ )
1110
505
 
1111
- def as_dataset(self, with_extra_timestep: bool = True, with_constants: bool = True) -> xr.Dataset:
1112
- """
1113
- Convert the TimeSeriesCollection to a xarray Dataset, containing the data of each TimeSeries.
506
+ elif isinstance(data, xr.DataArray):
507
+ # Existing DataArray - use as-is
508
+ intermediate = data.copy()
1114
509
 
1115
- Args:
1116
- with_extra_timestep: Whether to exclude the extra timesteps.
1117
- Effectively, this removes the last timestep for certain TimeSeries, but mitigates the presence of NANs in others.
1118
- with_constants: Whether to exclude TimeSeries with a constant value from the dataset.
1119
- """
1120
- if self.scenarios is None:
1121
- ds = xr.Dataset(coords={'time': self.timesteps_extra})
1122
510
  else:
1123
- ds = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra})
1124
-
1125
- for ts in self._time_series.values():
1126
- if not with_constants and ts.all_equal:
1127
- continue
1128
- ds[ts.name] = ts.selected_data
1129
-
1130
- if not with_extra_timestep:
1131
- return ds.sel(time=self.timesteps)
1132
-
1133
- return ds
1134
-
1135
- @property
1136
- def timesteps(self) -> pd.DatetimeIndex:
1137
- """Get the current active timesteps."""
1138
- if self._selected_timesteps is None:
1139
- return self._full_timesteps
1140
- return self._selected_timesteps
1141
-
1142
- @property
1143
- def timesteps_extra(self) -> pd.DatetimeIndex:
1144
- """Get the current active timesteps with extra timestep."""
1145
- if self._selected_timesteps_extra is None:
1146
- return self._full_timesteps_extra
1147
- return self._selected_timesteps_extra
1148
-
1149
- @property
1150
- def hours_per_timestep(self) -> xr.DataArray:
1151
- """Get the current active hours per timestep."""
1152
- if self._selected_hours_per_timestep is None:
1153
- return self._full_hours_per_timestep
1154
- return self._selected_hours_per_timestep
1155
-
1156
- @property
1157
- def scenarios(self) -> Optional[pd.Index]:
1158
- """Get the current active scenarios."""
1159
- if self._selected_scenarios is None:
1160
- return self._full_scenarios
1161
- return self._selected_scenarios
1162
-
1163
- def _propagate_selection_to_time_series(self) -> None:
1164
- """Apply the current selection to all TimeSeries objects."""
1165
- for ts_name, ts in self._time_series.items():
1166
- if ts.has_time_dim:
1167
- timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps
1168
- else:
1169
- timesteps = None
511
+ # Unsupported data type
512
+ supported_types = [
513
+ 'int',
514
+ 'float',
515
+ 'bool',
516
+ 'np.integer',
517
+ 'np.floating',
518
+ 'np.bool_',
519
+ 'np.ndarray',
520
+ 'pd.Series',
521
+ 'pd.DataFrame',
522
+ 'xr.DataArray',
523
+ ]
524
+ raise ConversionError(
525
+ f'Unsupported data type: {type(data).__name__}. Supported types: {", ".join(supported_types)}'
526
+ )
1170
527
 
1171
- ts.set_selection(timesteps=timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None)
528
+ # Broadcast intermediate result to target specification
529
+ return cls._broadcast_dataarray_to_target_specification(intermediate, validated_coords, target_dims)
1172
530
 
1173
- def __getitem__(self, name: str) -> TimeSeries:
531
+ @staticmethod
532
+ def _validate_and_prepare_target_coordinates(
533
+ coords: dict[str, pd.Index],
534
+ ) -> tuple[dict[str, pd.Index], tuple[str, ...]]:
1174
535
  """
1175
- Get a reference to a time series or data array.
1176
-
1177
- Args:
1178
- name: Name of the data array or time series
536
+ Validate and prepare target coordinate specification for DataArray creation.
1179
537
 
1180
- Returns:
1181
- TimeSeries object if it exists, otherwise DataArray with current selection applied
1182
- """
1183
- # First check if this is a TimeSeries
1184
- if name in self._time_series:
1185
- # Return the TimeSeries object (it will handle selection internally)
1186
- return self._time_series[name]
1187
- raise ValueError(f'No TimeSeries named "{name}" found')
1188
-
1189
- def __contains__(self, value) -> bool:
1190
- if isinstance(value, str):
1191
- return value in self._time_series
1192
- elif isinstance(value, TimeSeries):
1193
- return value.name in self._time_series
1194
- raise TypeError(f'Invalid type for __contains__ of {self.__class__.__name__}: {type(value)}')
1195
-
1196
- def __iter__(self) -> Iterator[TimeSeries]:
1197
- """Iterate over TimeSeries objects."""
1198
- return iter(self._time_series.values())
1199
-
1200
- def update_time_series(self, name: str, data: TimestepData) -> TimeSeries:
1201
- """
1202
- Update an existing TimeSeries with new data.
538
+ Performs comprehensive validation of coordinate inputs and prepares them
539
+ for use in DataArray construction with appropriate naming and type checking.
1203
540
 
1204
541
  Args:
1205
- name: Name of the TimeSeries to update
1206
- data: New data to assign
542
+ coords: Raw coordinate specification {dimension_name: coordinate_index}
1207
543
 
1208
544
  Returns:
1209
- The updated TimeSeries
545
+ Tuple of (validated_coordinates_dict, dimension_names_tuple)
1210
546
 
1211
547
  Raises:
1212
- KeyError: If no TimeSeries with the given name exists
548
+ ConversionError: If any coordinates are invalid, improperly typed,
549
+ or have inconsistent naming
1213
550
  """
1214
- if name not in self._time_series:
1215
- raise KeyError(f"No TimeSeries named '{name}' found")
1216
-
1217
- # Get the TimeSeries
1218
- ts = self._time_series[name]
551
+ validated_coords = {}
552
+ dimension_names = []
1219
553
 
1220
- # Determine which timesteps to use if the series has a time dimension
1221
- if ts.has_time_dim:
1222
- target_timesteps = self.timesteps_extra if name in self._has_extra_timestep else self.timesteps
1223
- else:
1224
- target_timesteps = None
1225
-
1226
- # Convert data to proper format
1227
- data_array = DataConverter.as_dataarray(
1228
- data, timesteps=target_timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None
1229
- )
1230
-
1231
- # Update the TimeSeries
1232
- ts.update_stored_data(data_array)
1233
-
1234
- return ts
1235
-
1236
- def calculate_aggregation_weights(self) -> Dict[str, float]:
1237
- """Calculate and return aggregation weights for all time series."""
1238
- group_weights = self._calculate_group_weights()
1239
-
1240
- weights = {}
1241
- for name, ts in self._time_series.items():
1242
- if ts.aggregation_group is not None:
1243
- # Use group weight
1244
- weights[name] = group_weights.get(ts.aggregation_group, 1)
1245
- else:
1246
- # Use individual weight or default to 1
1247
- weights[name] = ts.aggregation_weight or 1
1248
-
1249
- if np.all(np.isclose(list(weights.values()), 1, atol=1e-6)):
1250
- logger.info('All Aggregation weights were set to 1')
1251
-
1252
- return weights
1253
-
1254
- def _calculate_group_weights(self) -> Dict[str, float]:
1255
- """Calculate weights for aggregation groups."""
1256
- # Count series in each group
1257
- groups = [ts.aggregation_group for ts in self._time_series.values() if ts.aggregation_group is not None]
1258
- group_counts = Counter(groups)
1259
-
1260
- # Calculate weight for each group (1/count)
1261
- return {group: 1 / count for group, count in group_counts.items()}
1262
-
1263
- @staticmethod
1264
- def _validate_timesteps(
1265
- timesteps: pd.DatetimeIndex, present_timesteps: Optional[pd.DatetimeIndex] = None
1266
- ) -> pd.DatetimeIndex:
1267
- """
1268
- Validate timesteps format and rename if needed.
1269
- Args:
1270
- timesteps: The timesteps to validate
1271
- present_timesteps: The timesteps that are present in the dataset
1272
-
1273
- Raises:
1274
- ValueError: If timesteps is not a pandas DatetimeIndex
1275
- ValueError: If timesteps is not at least 2 timestamps
1276
- ValueError: If timesteps has a different name than 'time'
1277
- ValueError: If timesteps is not sorted
1278
- ValueError: If timesteps contains duplicates
1279
- ValueError: If timesteps is not a subset of present_timesteps
1280
- """
1281
- if not isinstance(timesteps, pd.DatetimeIndex):
1282
- raise TypeError('timesteps must be a pandas DatetimeIndex')
1283
-
1284
- if len(timesteps) < 2:
1285
- raise ValueError('timesteps must contain at least 2 timestamps')
1286
-
1287
- # Ensure timesteps has the required name
1288
- if timesteps.name != 'time':
1289
- logger.debug('Renamed timesteps to "time" (was "%s")', timesteps.name)
1290
- timesteps.name = 'time'
1291
-
1292
- # Ensure timesteps is sorted
1293
- if not timesteps.is_monotonic_increasing:
1294
- raise ValueError('timesteps must be sorted')
1295
-
1296
- # Ensure timesteps has no duplicates
1297
- if len(timesteps) != len(timesteps.drop_duplicates()):
1298
- raise ValueError('timesteps must not contain duplicates')
1299
-
1300
- # Ensure timesteps is a subset of present_timesteps
1301
- if present_timesteps is not None and not set(timesteps).issubset(set(present_timesteps)):
1302
- raise ValueError('timesteps must be a subset of present_timesteps')
1303
-
1304
- return timesteps
1305
-
1306
- @staticmethod
1307
- def _validate_scenarios(scenarios: pd.Index, present_scenarios: Optional[pd.Index] = None) -> Optional[pd.Index]:
1308
- """
1309
- Validate scenario format and rename if needed.
1310
- Args:
1311
- scenarios: The scenarios to validate
1312
- present_scenarios: The present_scenarios that are present in the dataset
1313
-
1314
- Raises:
1315
- ValueError: If timesteps is not a pandas DatetimeIndex
1316
- ValueError: If timesteps is not at least 2 timestamps
1317
- ValueError: If timesteps has a different name than 'time'
1318
- ValueError: If timesteps is not sorted
1319
- ValueError: If timesteps contains duplicates
1320
- ValueError: If timesteps is not a subset of present_timesteps
1321
- """
1322
- if scenarios is None:
1323
- return None
1324
-
1325
- if not isinstance(scenarios, pd.Index):
1326
- logger.warning('Converting scenarios to pandas.Index')
1327
- scenarios = pd.Index(scenarios, name='scenario')
1328
-
1329
- # Ensure timesteps has the required name
1330
- if scenarios.name != 'scenario':
1331
- logger.debug('Renamed scenarios to "scneario" (was "%s")', scenarios.name)
1332
- scenarios.name = 'scenario'
1333
-
1334
- # Ensure timesteps is a subset of present_timesteps
1335
- if present_scenarios is not None and not set(scenarios).issubset(set(present_scenarios)):
1336
- raise ValueError('scenarios must be a subset of present_scenarios')
1337
-
1338
- return scenarios
1339
-
1340
- @staticmethod
1341
- def _create_timesteps_with_extra(timesteps: pd.DatetimeIndex, hours_of_last_timestep: float) -> pd.DatetimeIndex:
1342
- """Create timesteps with an extra step at the end."""
1343
- last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time')
1344
- return pd.DatetimeIndex(timesteps.append(last_date), name='time')
1345
-
1346
- @staticmethod
1347
- def _calculate_hours_of_previous_timesteps(
1348
- timesteps: pd.DatetimeIndex, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]]
1349
- ) -> Union[float, np.ndarray]:
1350
- """Calculate duration of regular timesteps."""
1351
- if hours_of_previous_timesteps is not None:
1352
- return hours_of_previous_timesteps
1353
-
1354
- # Calculate from the first interval
1355
- first_interval = timesteps[1] - timesteps[0]
1356
- return first_interval.total_seconds() / 3600 # Convert to hours
1357
-
1358
- @staticmethod
1359
- def _calculate_hours_of_final_timestep(
1360
- timesteps: pd.DatetimeIndex,
1361
- timesteps_superset: Optional[pd.DatetimeIndex] = None,
1362
- hours_of_final_timestep: Optional[float] = None,
1363
- ) -> float:
1364
- """
1365
- Calculate duration of the final timestep.
1366
- If timesteps_subset is provided, the final timestep is calculated for this subset.
1367
- The hours_of_final_timestep is only used if the final timestep cant be determined from the timesteps.
1368
-
1369
- Args:
1370
- timesteps: The full timesteps
1371
- timesteps_subset: The subset of timesteps
1372
- hours_of_final_timestep: The duration of the final timestep, if already known
1373
-
1374
- Returns:
1375
- The duration of the final timestep in hours
554
+ for dim_name, coord_index in coords.items():
555
+ # Type validation
556
+ if not isinstance(coord_index, pd.Index):
557
+ raise ConversionError(
558
+ f'Coordinate for dimension "{dim_name}" must be a pandas.Index, got {type(coord_index).__name__}'
559
+ )
1376
560
 
1377
- Raises:
1378
- ValueError: If the provided timesteps_subset does not end before the timesteps superset
1379
- """
1380
- if timesteps_superset is None:
1381
- if hours_of_final_timestep is not None:
1382
- return hours_of_final_timestep
1383
- return (timesteps[-1] - timesteps[-2]) / pd.Timedelta(hours=1)
561
+ # Non-empty validation
562
+ if len(coord_index) == 0:
563
+ raise ConversionError(f'Coordinate for dimension "{dim_name}" cannot be empty')
1384
564
 
1385
- final_timestep = timesteps[-1]
565
+ # Ensure coordinate index has consistent naming
566
+ if coord_index.name != dim_name:
567
+ coord_index = coord_index.rename(dim_name)
1386
568
 
1387
- if timesteps_superset[-1] == final_timestep:
1388
- if hours_of_final_timestep is not None:
1389
- return hours_of_final_timestep
1390
- return (timesteps_superset[-1] - timesteps_superset[-2]) / pd.Timedelta(hours=1)
569
+ # Special validation for time dimensions (common pattern)
570
+ if dim_name == 'time' and not isinstance(coord_index, pd.DatetimeIndex):
571
+ raise ConversionError(
572
+ f'Dimension named "time" should use DatetimeIndex for proper '
573
+ f'time-series functionality, got {type(coord_index).__name__}'
574
+ )
1391
575
 
1392
- elif timesteps_superset[-1] <= final_timestep:
1393
- raise ValueError(
1394
- f'The provided timesteps ({timesteps}) end after the provided timesteps_superset ({timesteps_superset})'
576
+ validated_coords[dim_name] = coord_index
577
+ dimension_names.append(dim_name)
578
+
579
+ return validated_coords, tuple(dimension_names)
580
+
581
+
582
+ def get_dataarray_stats(arr: xr.DataArray) -> dict:
583
+ """Generate statistical summary of a DataArray."""
584
+ stats = {}
585
+ if arr.dtype.kind in 'biufc': # bool, int, uint, float, complex
586
+ try:
587
+ stats.update(
588
+ {
589
+ 'min': float(arr.min().values),
590
+ 'max': float(arr.max().values),
591
+ 'mean': float(arr.mean().values),
592
+ 'median': float(arr.median().values),
593
+ 'std': float(arr.std().values),
594
+ 'count': int(arr.count().values), # non-null count
595
+ }
1395
596
  )
1396
- else:
1397
- # Get the first timestep in the superset that is after the final timestep of the subset
1398
- extra_timestep = timesteps_superset[timesteps_superset > final_timestep].min()
1399
- return (extra_timestep - final_timestep) / pd.Timedelta(hours=1)
1400
597
 
1401
- @staticmethod
1402
- def calculate_hours_per_timestep(
1403
- timesteps_extra: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None
1404
- ) -> xr.DataArray:
1405
- """Calculate duration of each timestep."""
1406
- # Calculate differences between consecutive timestamps
1407
- hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1)
598
+ # Add null count only if there are nulls
599
+ null_count = int(arr.isnull().sum().values)
600
+ if null_count > 0:
601
+ stats['nulls'] = null_count
1408
602
 
1409
- return DataConverter.as_dataarray(
1410
- hours_per_step,
1411
- timesteps=timesteps_extra[:-1],
1412
- scenarios=scenarios,
1413
- ).rename('hours_per_step')
603
+ except Exception:
604
+ pass
1414
605
 
606
+ return stats
1415
607
 
1416
- def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10, by_scenario: bool = False) -> str:
1417
- """
1418
- Calculates the mean, median, min, max, and standard deviation of a numeric DataArray.
608
+
609
+ def drop_constant_arrays(ds: xr.Dataset, dim: str = 'time', drop_arrays_without_dim: bool = True) -> xr.Dataset:
610
+ """Drop variables with constant values along a dimension.
1419
611
 
1420
612
  Args:
1421
- data: The DataArray to analyze
1422
- decimals: Number of decimal places to show
1423
- padd: Padding for alignment
1424
- by_scenario: Whether to break down stats by scenario
613
+ ds: Input dataset to filter.
614
+ dim: Dimension along which to check for constant values.
615
+ drop_arrays_without_dim: If True, also drop variables that don't have the specified dimension.
1425
616
 
1426
617
  Returns:
1427
- String representation of data statistics
618
+ Dataset with constant variables removed.
1428
619
  """
1429
- format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f'
1430
-
1431
- # If by_scenario is True and there's a scenario dimension with multiple values
1432
- if by_scenario and 'scenario' in data.dims and data.sizes['scenario'] > 1:
1433
- results = []
1434
- for scenario in data.coords['scenario'].values:
1435
- scenario_data = data.sel(scenario=scenario)
1436
- if np.unique(scenario_data).size == 1:
1437
- results.append(f' {scenario}: {scenario_data.max().item():{format_spec}} (constant)')
1438
- else:
1439
- mean = scenario_data.mean().item()
1440
- median = scenario_data.median().item()
1441
- min_val = scenario_data.min().item()
1442
- max_val = scenario_data.max().item()
1443
- std = scenario_data.std().item()
1444
- results.append(
1445
- f' {scenario}: {mean:{format_spec}} (mean), {median:{format_spec}} (median), '
1446
- f'{min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
1447
- )
1448
- return '\n'.join(['By scenario:'] + results)
1449
-
1450
- # Standard logic for non-scenario data or aggregated stats
1451
- if np.unique(data).size == 1:
1452
- return f'{data.max().item():{format_spec}} (constant)'
1453
-
1454
- mean = data.mean().item()
1455
- median = data.median().item()
1456
- min_val = data.min().item()
1457
- max_val = data.max().item()
1458
- std = data.std().item()
1459
-
1460
- return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)'
1461
-
620
+ drop_vars = []
621
+
622
+ for name, da in ds.data_vars.items():
623
+ # Skip variables without the dimension
624
+ if dim not in da.dims:
625
+ if drop_arrays_without_dim:
626
+ drop_vars.append(name)
627
+ continue
628
+
629
+ # Check if variable is constant along the dimension
630
+ if (da.max(dim, skipna=True) == da.min(dim, skipna=True)).all().item():
631
+ drop_vars.append(name)
632
+
633
+ if drop_vars:
634
+ drop_vars = sorted(drop_vars)
635
+ logger.debug(
636
+ f'Dropping {len(drop_vars)} constant/dimension-less arrays: {drop_vars[:5]}{"..." if len(drop_vars) > 5 else ""}'
637
+ )
1462
638
 
1463
- def extract_data(
1464
- data: Optional[Union[int, float, xr.DataArray, TimeSeries]],
1465
- if_none: Any = None
1466
- ) -> Any:
1467
- """
1468
- Convert data to xr.DataArray.
639
+ return ds.drop_vars(drop_vars)
1469
640
 
1470
- Args:
1471
- data: The data to convert (scalar, array, or DataArray)
1472
- if_none: The value to return if data is None
1473
641
 
1474
- Returns:
1475
- DataArray with the converted data, or the value specified by if_none
1476
- """
1477
- if data is None:
1478
- return if_none
1479
- if isinstance(data, TimeSeries):
1480
- return data.selected_data
1481
- if isinstance(data, xr.DataArray):
1482
- return data
1483
- if isinstance(data, (int, float, np.integer, np.floating)):
1484
- return data
1485
- raise TypeError(f'Unsupported data type: {type(data).__name__}')
642
+ # Backward compatibility aliases
643
+ # TODO: Needed?
644
+ NonTemporalDataUser = PeriodicDataUser
645
+ NonTemporalData = PeriodicData