flixopt 3.0.1__py3-none-any.whl → 6.0.0rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. flixopt/__init__.py +57 -49
  2. flixopt/carrier.py +159 -0
  3. flixopt/clustering/__init__.py +51 -0
  4. flixopt/clustering/base.py +1746 -0
  5. flixopt/clustering/intercluster_helpers.py +201 -0
  6. flixopt/color_processing.py +372 -0
  7. flixopt/comparison.py +819 -0
  8. flixopt/components.py +848 -270
  9. flixopt/config.py +853 -496
  10. flixopt/core.py +111 -98
  11. flixopt/effects.py +294 -284
  12. flixopt/elements.py +484 -223
  13. flixopt/features.py +220 -118
  14. flixopt/flow_system.py +2026 -389
  15. flixopt/interface.py +504 -286
  16. flixopt/io.py +1718 -55
  17. flixopt/linear_converters.py +291 -230
  18. flixopt/modeling.py +304 -181
  19. flixopt/network_app.py +2 -1
  20. flixopt/optimization.py +788 -0
  21. flixopt/optimize_accessor.py +373 -0
  22. flixopt/plot_result.py +143 -0
  23. flixopt/plotting.py +1177 -1034
  24. flixopt/results.py +1331 -372
  25. flixopt/solvers.py +12 -4
  26. flixopt/statistics_accessor.py +2412 -0
  27. flixopt/stats_accessor.py +75 -0
  28. flixopt/structure.py +954 -120
  29. flixopt/topology_accessor.py +676 -0
  30. flixopt/transform_accessor.py +2277 -0
  31. flixopt/types.py +120 -0
  32. flixopt-6.0.0rc7.dist-info/METADATA +290 -0
  33. flixopt-6.0.0rc7.dist-info/RECORD +36 -0
  34. {flixopt-3.0.1.dist-info → flixopt-6.0.0rc7.dist-info}/WHEEL +1 -1
  35. flixopt/aggregation.py +0 -382
  36. flixopt/calculation.py +0 -672
  37. flixopt/commons.py +0 -51
  38. flixopt/utils.py +0 -86
  39. flixopt-3.0.1.dist-info/METADATA +0 -209
  40. flixopt-3.0.1.dist-info/RECORD +0 -26
  41. {flixopt-3.0.1.dist-info → flixopt-6.0.0rc7.dist-info}/licenses/LICENSE +0 -0
  42. {flixopt-3.0.1.dist-info → flixopt-6.0.0rc7.dist-info}/top_level.txt +0 -0
flixopt/io.py CHANGED
@@ -1,19 +1,29 @@
1
1
  from __future__ import annotations
2
2
 
3
- import importlib.util
3
+ import inspect
4
4
  import json
5
5
  import logging
6
+ import os
6
7
  import pathlib
7
8
  import re
9
+ import sys
10
+ import warnings
11
+ from collections import defaultdict
12
+ from contextlib import contextmanager
8
13
  from dataclasses import dataclass
9
- from typing import TYPE_CHECKING, Literal
14
+ from typing import TYPE_CHECKING, Any
10
15
 
16
+ import numpy as np
17
+ import pandas as pd
11
18
  import xarray as xr
12
19
  import yaml
13
20
 
14
21
  if TYPE_CHECKING:
15
22
  import linopy
16
23
 
24
+ from .flow_system import FlowSystem
25
+ from .types import Numeric_TPS
26
+
17
27
  logger = logging.getLogger('flixopt')
18
28
 
19
29
 
@@ -34,7 +44,331 @@ def remove_none_and_empty(obj):
34
44
  return obj
35
45
 
36
46
 
37
- def _save_to_yaml(data, output_file='formatted_output.yaml'):
47
+ def round_nested_floats(obj: dict | list | float | int | Any, decimals: int = 2) -> dict | list | float | int | Any:
48
+ """Recursively round floating point numbers in nested data structures and convert it to python native types.
49
+
50
+ This function traverses nested data structures (dictionaries, lists) and rounds
51
+ any floating point numbers to the specified number of decimal places. It handles
52
+ various data types including NumPy arrays and xarray DataArrays by converting
53
+ them to lists with rounded values.
54
+
55
+ Args:
56
+ obj: The object to process. Can be a dict, list, float, int, numpy.ndarray,
57
+ xarray.DataArray, or any other type.
58
+ decimals (int, optional): Number of decimal places to round to. Defaults to 2.
59
+
60
+ Returns:
61
+ The processed object with the same structure as the input, but with all floating point numbers rounded to the specified precision. NumPy arrays and xarray DataArrays are converted to lists.
62
+
63
+ Examples:
64
+ >>> data = {'a': 3.14159, 'b': [1.234, 2.678]}
65
+ >>> round_nested_floats(data, decimals=2)
66
+ {'a': 3.14, 'b': [1.23, 2.68]}
67
+
68
+ >>> import numpy as np
69
+ >>> arr = np.array([1.234, 5.678])
70
+ >>> round_nested_floats(arr, decimals=1)
71
+ [1.2, 5.7]
72
+ """
73
+ if isinstance(obj, dict):
74
+ return {k: round_nested_floats(v, decimals) for k, v in obj.items()}
75
+ elif isinstance(obj, list):
76
+ return [round_nested_floats(v, decimals) for v in obj]
77
+ elif isinstance(obj, np.floating):
78
+ return round(float(obj), decimals)
79
+ elif isinstance(obj, np.integer):
80
+ return int(obj)
81
+ elif isinstance(obj, np.bool_):
82
+ return bool(obj)
83
+ elif isinstance(obj, float):
84
+ return round(obj, decimals)
85
+ elif isinstance(obj, int):
86
+ return obj
87
+ elif isinstance(obj, np.ndarray):
88
+ return np.round(obj, decimals).tolist()
89
+ elif isinstance(obj, xr.DataArray):
90
+ return obj.round(decimals).values.tolist()
91
+ return obj
92
+
93
+
94
+ # ============================================================================
95
+ # Centralized JSON and YAML I/O Functions
96
+ # ============================================================================
97
+
98
+
99
+ def load_json(path: str | pathlib.Path) -> dict | list:
100
+ """
101
+ Load data from a JSON file.
102
+
103
+ Args:
104
+ path: Path to the JSON file.
105
+
106
+ Returns:
107
+ Loaded data (typically dict or list).
108
+
109
+ Raises:
110
+ FileNotFoundError: If the file does not exist.
111
+ json.JSONDecodeError: If the file is not valid JSON.
112
+ """
113
+ path = pathlib.Path(path)
114
+ with open(path, encoding='utf-8') as f:
115
+ return json.load(f)
116
+
117
+
118
+ def save_json(
119
+ data: dict | list,
120
+ path: str | pathlib.Path,
121
+ indent: int = 4,
122
+ ensure_ascii: bool = False,
123
+ **kwargs: Any,
124
+ ) -> None:
125
+ """
126
+ Save data to a JSON file with consistent formatting.
127
+
128
+ Args:
129
+ data: Data to save (dict or list).
130
+ path: Path to save the JSON file.
131
+ indent: Number of spaces for indentation (default: 4).
132
+ ensure_ascii: If False, allow Unicode characters (default: False).
133
+ **kwargs: Additional arguments to pass to json.dump().
134
+ """
135
+ path = pathlib.Path(path)
136
+ with open(path, 'w', encoding='utf-8') as f:
137
+ json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)
138
+
139
+
140
+ def load_yaml(path: str | pathlib.Path) -> dict | list:
141
+ """
142
+ Load data from a YAML file.
143
+
144
+ Args:
145
+ path: Path to the YAML file.
146
+
147
+ Returns:
148
+ Loaded data (typically dict or list), or empty dict if file is empty.
149
+
150
+ Raises:
151
+ FileNotFoundError: If the file does not exist.
152
+ yaml.YAMLError: If the file is not valid YAML.
153
+ Note: Returns {} for empty YAML files instead of None.
154
+ """
155
+ path = pathlib.Path(path)
156
+ with open(path, encoding='utf-8') as f:
157
+ return yaml.safe_load(f) or {}
158
+
159
+
160
+ def _load_yaml_unsafe(path: str | pathlib.Path) -> dict | list:
161
+ """
162
+ INTERNAL: Load YAML allowing arbitrary tags. Do not use on untrusted input.
163
+
164
+ This function exists only for loading internally-generated files that may
165
+ contain custom YAML tags. Never use this on user-provided files.
166
+
167
+ Args:
168
+ path: Path to the YAML file.
169
+
170
+ Returns:
171
+ Loaded data (typically dict or list), or empty dict if file is empty.
172
+ """
173
+ path = pathlib.Path(path)
174
+ with open(path, encoding='utf-8') as f:
175
+ return yaml.unsafe_load(f) or {}
176
+
177
+
178
+ def _create_compact_dumper():
179
+ """
180
+ Create a YAML dumper class with custom representer for compact numeric lists.
181
+
182
+ Returns:
183
+ A yaml.SafeDumper subclass configured to format numeric lists inline.
184
+ """
185
+
186
+ def represent_list(dumper, data):
187
+ """
188
+ Custom representer for lists to format them inline (flow style)
189
+ but only if they contain only numbers or nested numeric lists.
190
+ """
191
+ if data and all(
192
+ isinstance(item, (int, float, np.integer, np.floating))
193
+ or (isinstance(item, list) and all(isinstance(x, (int, float, np.integer, np.floating)) for x in item))
194
+ for item in data
195
+ ):
196
+ return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=True)
197
+ return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=False)
198
+
199
+ # Create custom dumper with the representer
200
+ class CompactDumper(yaml.SafeDumper):
201
+ pass
202
+
203
+ CompactDumper.add_representer(list, represent_list)
204
+ return CompactDumper
205
+
206
+
207
+ def save_yaml(
208
+ data: dict | list,
209
+ path: str | pathlib.Path,
210
+ indent: int = 4,
211
+ width: int = 1000,
212
+ allow_unicode: bool = True,
213
+ sort_keys: bool = False,
214
+ compact_numeric_lists: bool = False,
215
+ **kwargs: Any,
216
+ ) -> None:
217
+ """
218
+ Save data to a YAML file with consistent formatting.
219
+
220
+ Args:
221
+ data: Data to save (dict or list).
222
+ path: Path to save the YAML file.
223
+ indent: Number of spaces for indentation (default: 4).
224
+ width: Maximum line width (default: 1000).
225
+ allow_unicode: If True, allow Unicode characters (default: True).
226
+ sort_keys: If True, sort dictionary keys (default: False).
227
+ compact_numeric_lists: If True, format numeric lists inline for better readability (default: False).
228
+ **kwargs: Additional arguments to pass to yaml.dump().
229
+ """
230
+ path = pathlib.Path(path)
231
+
232
+ if compact_numeric_lists:
233
+ with open(path, 'w', encoding='utf-8') as f:
234
+ yaml.dump(
235
+ data,
236
+ f,
237
+ Dumper=_create_compact_dumper(),
238
+ indent=indent,
239
+ width=width,
240
+ allow_unicode=allow_unicode,
241
+ sort_keys=sort_keys,
242
+ default_flow_style=False,
243
+ **kwargs,
244
+ )
245
+ else:
246
+ with open(path, 'w', encoding='utf-8') as f:
247
+ yaml.safe_dump(
248
+ data,
249
+ f,
250
+ indent=indent,
251
+ width=width,
252
+ allow_unicode=allow_unicode,
253
+ sort_keys=sort_keys,
254
+ default_flow_style=False,
255
+ **kwargs,
256
+ )
257
+
258
+
259
+ def format_yaml_string(
260
+ data: dict | list,
261
+ indent: int = 4,
262
+ width: int = 1000,
263
+ allow_unicode: bool = True,
264
+ sort_keys: bool = False,
265
+ compact_numeric_lists: bool = False,
266
+ **kwargs: Any,
267
+ ) -> str:
268
+ """
269
+ Format data as a YAML string with consistent formatting.
270
+
271
+ This function provides the same formatting as save_yaml() but returns a string
272
+ instead of writing to a file. Useful for logging or displaying YAML data.
273
+
274
+ Args:
275
+ data: Data to format (dict or list).
276
+ indent: Number of spaces for indentation (default: 4).
277
+ width: Maximum line width (default: 1000).
278
+ allow_unicode: If True, allow Unicode characters (default: True).
279
+ sort_keys: If True, sort dictionary keys (default: False).
280
+ compact_numeric_lists: If True, format numeric lists inline for better readability (default: False).
281
+ **kwargs: Additional arguments to pass to yaml.dump().
282
+
283
+ Returns:
284
+ Formatted YAML string.
285
+ """
286
+ if compact_numeric_lists:
287
+ return yaml.dump(
288
+ data,
289
+ Dumper=_create_compact_dumper(),
290
+ indent=indent,
291
+ width=width,
292
+ allow_unicode=allow_unicode,
293
+ sort_keys=sort_keys,
294
+ default_flow_style=False,
295
+ **kwargs,
296
+ )
297
+ else:
298
+ return yaml.safe_dump(
299
+ data,
300
+ indent=indent,
301
+ width=width,
302
+ allow_unicode=allow_unicode,
303
+ sort_keys=sort_keys,
304
+ default_flow_style=False,
305
+ **kwargs,
306
+ )
307
+
308
+
309
+ def load_config_file(path: str | pathlib.Path) -> dict:
310
+ """
311
+ Load a configuration file, automatically detecting JSON or YAML format.
312
+
313
+ This function intelligently tries to load the file based on its extension,
314
+ with fallback support if the primary format fails.
315
+
316
+ Supported extensions:
317
+ - .json: Tries JSON first, falls back to YAML
318
+ - .yaml, .yml: Tries YAML first, falls back to JSON
319
+ - Others: Tries YAML, then JSON
320
+
321
+ Args:
322
+ path: Path to the configuration file.
323
+
324
+ Returns:
325
+ Loaded configuration as a dictionary.
326
+
327
+ Raises:
328
+ FileNotFoundError: If the file does not exist.
329
+ ValueError: If neither JSON nor YAML parsing succeeds.
330
+ """
331
+ path = pathlib.Path(path)
332
+
333
+ if not path.exists():
334
+ raise FileNotFoundError(f'Configuration file not found: {path}')
335
+
336
+ # Try based on file extension
337
+ # Normalize extension to lowercase for case-insensitive matching
338
+ suffix = path.suffix.lower()
339
+
340
+ if suffix == '.json':
341
+ try:
342
+ return load_json(path)
343
+ except json.JSONDecodeError:
344
+ logger.warning(f'Failed to parse {path} as JSON, trying YAML')
345
+ try:
346
+ return load_yaml(path)
347
+ except yaml.YAMLError as e:
348
+ raise ValueError(f'Failed to parse {path} as JSON or YAML') from e
349
+
350
+ elif suffix in ['.yaml', '.yml']:
351
+ try:
352
+ return load_yaml(path)
353
+ except yaml.YAMLError:
354
+ logger.warning(f'Failed to parse {path} as YAML, trying JSON')
355
+ try:
356
+ return load_json(path)
357
+ except json.JSONDecodeError as e:
358
+ raise ValueError(f'Failed to parse {path} as YAML or JSON') from e
359
+
360
+ else:
361
+ # Unknown extension, try YAML first (more common for config)
362
+ try:
363
+ return load_yaml(path)
364
+ except yaml.YAMLError:
365
+ try:
366
+ return load_json(path)
367
+ except json.JSONDecodeError as e:
368
+ raise ValueError(f'Failed to parse {path} as YAML or JSON') from e
369
+
370
+
371
+ def _save_yaml_multiline(data, output_file='formatted_output.yaml'):
38
372
  """
39
373
  Save dictionary data to YAML with proper multi-line string formatting.
40
374
  Handles complex string patterns including backticks, special characters,
@@ -62,14 +396,14 @@ def _save_to_yaml(data, output_file='formatted_output.yaml'):
62
396
  # Use plain style for simple strings
63
397
  return dumper.represent_scalar('tag:yaml.org,2002:str', data)
64
398
 
65
- # Add the string representer to SafeDumper
66
- yaml.add_representer(str, represent_str, Dumper=yaml.SafeDumper)
67
-
68
399
  # Configure dumper options for better formatting
69
400
  class CustomDumper(yaml.SafeDumper):
70
401
  def increase_indent(self, flow=False, indentless=False):
71
402
  return super().increase_indent(flow, False)
72
403
 
404
+ # Bind representer locally to CustomDumper to avoid global side effects
405
+ CustomDumper.add_representer(str, represent_str)
406
+
73
407
  # Write to file with settings that ensure proper formatting
74
408
  with open(output_file, 'w', encoding='utf-8') as file:
75
409
  yaml.dump(
@@ -80,7 +414,7 @@ def _save_to_yaml(data, output_file='formatted_output.yaml'):
80
414
  default_flow_style=False, # Use block style for mappings
81
415
  width=1000, # Set a reasonable line width
82
416
  allow_unicode=True, # Support Unicode characters
83
- indent=2, # Set consistent indentation
417
+ indent=4, # Set consistent indentation
84
418
  )
85
419
 
86
420
 
@@ -169,7 +503,7 @@ def document_linopy_model(model: linopy.Model, path: pathlib.Path | None = None)
169
503
  }
170
504
 
171
505
  if model.status == 'warning':
172
- logger.critical(f'The model has a warning status {model.status=}. Trying to extract infeasibilities')
506
+ logger.warning(f'The model has a warning status {model.status=}. Trying to extract infeasibilities')
173
507
  try:
174
508
  import io
175
509
  from contextlib import redirect_stdout
@@ -182,7 +516,7 @@ def document_linopy_model(model: linopy.Model, path: pathlib.Path | None = None)
182
516
 
183
517
  documentation['infeasible_constraints'] = f.getvalue()
184
518
  except NotImplementedError:
185
- logger.critical(
519
+ logger.warning(
186
520
  'Infeasible constraints could not get retrieved. This functionality is only availlable with gurobi'
187
521
  )
188
522
  documentation['infeasible_constraints'] = 'Not possible to retrieve infeasible constraints'
@@ -190,7 +524,7 @@ def document_linopy_model(model: linopy.Model, path: pathlib.Path | None = None)
190
524
  if path is not None:
191
525
  if path.suffix not in ['.yaml', '.yml']:
192
526
  raise ValueError(f'Invalid file extension for path {path}. Only .yaml and .yml are supported')
193
- _save_to_yaml(documentation, str(path))
527
+ _save_yaml_multiline(documentation, str(path))
194
528
 
195
529
  return documentation
196
530
 
@@ -199,7 +533,7 @@ def save_dataset_to_netcdf(
199
533
  ds: xr.Dataset,
200
534
  path: str | pathlib.Path,
201
535
  compression: int = 0,
202
- engine: Literal['netcdf4', 'scipy', 'h5netcdf'] = 'h5netcdf',
536
+ stack_vars: bool = True,
203
537
  ) -> None:
204
538
  """
205
539
  Save a dataset to a netcdf file. Store all attrs as JSON strings in 'attrs' attributes.
@@ -208,6 +542,8 @@ def save_dataset_to_netcdf(
208
542
  ds: Dataset to save.
209
543
  path: Path to save the dataset to.
210
544
  compression: Compression level for the dataset (0-9). 0 means no compression. 5 is a good default.
545
+ stack_vars: If True (default), stack variables with equal dims for faster I/O.
546
+ Variables are automatically unstacked when loading with load_dataset_from_netcdf.
211
547
 
212
548
  Raises:
213
549
  ValueError: If the path has an invalid file extension.
@@ -216,70 +552,502 @@ def save_dataset_to_netcdf(
216
552
  if path.suffix not in ['.nc', '.nc4']:
217
553
  raise ValueError(f'Invalid file extension for path {path}. Only .nc and .nc4 are supported')
218
554
 
219
- apply_encoding = False
220
- if compression != 0:
221
- if importlib.util.find_spec(engine) is not None:
222
- apply_encoding = True
223
- else:
224
- logger.warning(
225
- f'Dataset was exported without compression due to missing dependency "{engine}".'
226
- f'Install {engine} via `pip install {engine}`.'
227
- )
228
-
229
555
  ds = ds.copy(deep=True)
556
+
557
+ # Stack variables with equal dims for faster I/O
558
+ if stack_vars:
559
+ ds = _stack_equal_vars(ds)
560
+
230
561
  ds.attrs = {'attrs': json.dumps(ds.attrs)}
231
562
 
232
563
  # Convert all DataArray attrs to JSON strings
233
- for var_name, data_var in ds.data_vars.items():
234
- if data_var.attrs: # Only if there are attrs
235
- ds[var_name].attrs = {'attrs': json.dumps(data_var.attrs)}
564
+ # Use ds.variables to avoid slow _construct_dataarray calls
565
+ variables = ds.variables
566
+ coord_names = set(ds.coords)
567
+ for var_name in variables:
568
+ if var_name in coord_names:
569
+ continue
570
+ var = variables[var_name]
571
+ if var.attrs: # Only if there are attrs
572
+ var.attrs = {'attrs': json.dumps(var.attrs)}
236
573
 
237
574
  # Also handle coordinate attrs if they exist
238
- for coord_name, coord_var in ds.coords.items():
239
- if hasattr(coord_var, 'attrs') and coord_var.attrs:
240
- ds[coord_name].attrs = {'attrs': json.dumps(coord_var.attrs)}
575
+ for coord_name in ds.coords:
576
+ var = variables[coord_name]
577
+ if var.attrs:
578
+ var.attrs = {'attrs': json.dumps(var.attrs)}
579
+
580
+ # Suppress numpy binary compatibility warnings from netCDF4 (numpy 1->2 transition)
581
+ with warnings.catch_warnings():
582
+ warnings.filterwarnings('ignore', category=RuntimeWarning, message='numpy.ndarray size changed')
583
+ ds.to_netcdf(
584
+ path,
585
+ encoding=None
586
+ if compression == 0
587
+ else {name: {'zlib': True, 'complevel': compression} for name in variables if name not in coord_names},
588
+ engine='netcdf4',
589
+ )
590
+
591
+
592
+ def _reduce_constant_arrays(ds: xr.Dataset) -> xr.Dataset:
593
+ """
594
+ Reduce constant dimensions in arrays for more efficient storage.
595
+
596
+ For each array, checks each dimension and removes it if values are constant
597
+ along that dimension. This handles cases like:
598
+ - Shape (8760,) all identical → scalar
599
+ - Shape (8760, 2) constant along time → shape (2,)
600
+ - Shape (8760, 2, 3) constant along time → shape (2, 3)
601
+
602
+ This is useful for datasets saved with older versions where data was
603
+ broadcast to full dimensions.
604
+
605
+ Args:
606
+ ds: Dataset with potentially constant arrays.
607
+
608
+ Returns:
609
+ Dataset with constant dimensions reduced.
610
+ """
611
+ new_data_vars = {}
612
+ variables = ds.variables
613
+ coord_names = set(ds.coords)
241
614
 
242
- ds.to_netcdf(
243
- path,
244
- encoding=None
245
- if not apply_encoding
246
- else {data_var: {'zlib': True, 'complevel': compression} for data_var in ds.data_vars},
247
- engine=engine,
248
- )
615
+ for name in variables:
616
+ if name in coord_names:
617
+ continue
618
+ var = variables[name]
619
+ dims = var.dims
620
+ data = var.values
621
+
622
+ if not dims or data.size == 0:
623
+ new_data_vars[name] = var
624
+ continue
625
+
626
+ # Try to reduce each dimension using numpy operations
627
+ reduced_data = data
628
+ reduced_dims = list(dims)
629
+
630
+ for _axis, dim in enumerate(dims):
631
+ if dim not in reduced_dims:
632
+ continue # Already removed
633
+
634
+ current_axis = reduced_dims.index(dim)
635
+ # Check if constant along this axis using numpy
636
+ first_slice = np.take(reduced_data, 0, axis=current_axis)
637
+ # Broadcast first_slice to compare
638
+ expanded = np.expand_dims(first_slice, axis=current_axis)
639
+ is_constant = np.allclose(reduced_data, expanded, equal_nan=True)
640
+
641
+ if is_constant:
642
+ # Remove this dimension by taking first slice
643
+ reduced_data = first_slice
644
+ reduced_dims.pop(current_axis)
645
+
646
+ new_data_vars[name] = xr.Variable(tuple(reduced_dims), reduced_data, attrs=var.attrs)
647
+
648
+ return xr.Dataset(new_data_vars, coords=ds.coords, attrs=ds.attrs)
649
+
650
+
651
+ def _stack_equal_vars(ds: xr.Dataset, stacked_dim: str = '__stacked__') -> xr.Dataset:
652
+ """
653
+ Stack data_vars with equal dims into single DataArrays with a stacked dimension.
654
+
655
+ This reduces the number of data_vars in a dataset by grouping variables that
656
+ share the same dimensions. Each group is concatenated along a new stacked
657
+ dimension, with the original variable names stored as coordinates.
658
+
659
+ This can significantly improve I/O performance for datasets with many
660
+ variables that share the same shape.
661
+
662
+ Args:
663
+ ds: Input dataset
664
+ stacked_dim: Base name for the stacking dimensions (default: '__stacked__')
665
+
666
+ Returns:
667
+ Dataset with fewer variables (equal-dim vars stacked together).
668
+ Stacked variables are named 'stacked_{dims}' and have a coordinate
669
+ '{stacked_dim}_{dims}' containing the original variable names.
670
+ """
671
+ # Use ds.variables to avoid slow _construct_dataarray calls
672
+ variables = ds.variables
673
+ coord_names = set(ds.coords)
674
+
675
+ # Group data variables by their dimensions (preserve insertion order for deterministic stacking)
676
+ groups = defaultdict(list)
677
+ for name in variables:
678
+ if name not in coord_names:
679
+ groups[variables[name].dims].append(name)
680
+
681
+ new_data_vars = {}
682
+ for dims, var_names in groups.items():
683
+ if len(var_names) == 1:
684
+ # Single variable - use Variable directly
685
+ new_data_vars[var_names[0]] = variables[var_names[0]]
686
+ else:
687
+ dim_suffix = '_'.join(dims) if dims else 'scalar'
688
+ group_stacked_dim = f'{stacked_dim}_{dim_suffix}'
689
+
690
+ # Stack using numpy directly - much faster than xr.concat
691
+ # All variables in this group have the same dims/shape
692
+ arrays = [variables[name].values for name in var_names]
693
+ stacked_data = np.stack(arrays, axis=0)
694
+
695
+ # Capture per-variable attrs before stacking
696
+ per_variable_attrs = {name: dict(variables[name].attrs) for name in var_names}
697
+
698
+ # Create new Variable with stacked dimension first
699
+ stacked_var = xr.Variable(
700
+ dims=(group_stacked_dim,) + dims,
701
+ data=stacked_data,
702
+ attrs={'__per_variable_attrs__': per_variable_attrs},
703
+ )
704
+ new_data_vars[f'stacked_{dim_suffix}'] = stacked_var
705
+
706
+ # Build result dataset preserving coordinates
707
+ result = xr.Dataset(new_data_vars, coords=ds.coords, attrs=ds.attrs)
708
+
709
+ # Add the stacking coordinates (variable names)
710
+ for dims, var_names in groups.items():
711
+ if len(var_names) > 1:
712
+ dim_suffix = '_'.join(dims) if dims else 'scalar'
713
+ group_stacked_dim = f'{stacked_dim}_{dim_suffix}'
714
+ result = result.assign_coords({group_stacked_dim: var_names})
715
+
716
+ return result
717
+
718
+
719
+ def _unstack_vars(ds: xr.Dataset, stacked_prefix: str = '__stacked__') -> xr.Dataset:
720
+ """
721
+ Reverse of _stack_equal_vars - unstack back to individual variables.
722
+
723
+ Args:
724
+ ds: Dataset with stacked variables (from _stack_equal_vars)
725
+ stacked_prefix: Prefix used for stacking dimensions (default: '__stacked__')
726
+
727
+ Returns:
728
+ Dataset with individual variables restored from stacked arrays.
729
+ """
730
+ new_data_vars = {}
731
+ variables = ds.variables
732
+ coord_names = set(ds.coords)
733
+
734
+ for name in variables:
735
+ if name in coord_names:
736
+ continue
737
+ var = variables[name]
738
+ # Find stacked dimension (if any)
739
+ stacked_dim = None
740
+ stacked_dim_idx = None
741
+ for i, d in enumerate(var.dims):
742
+ if d.startswith(stacked_prefix):
743
+ stacked_dim = d
744
+ stacked_dim_idx = i
745
+ break
746
+
747
+ if stacked_dim is not None:
748
+ # Get labels from the stacked coordinate
749
+ labels = ds.coords[stacked_dim].values
750
+ # Get remaining dims (everything except stacked dim)
751
+ remaining_dims = var.dims[:stacked_dim_idx] + var.dims[stacked_dim_idx + 1 :]
752
+ # Get per-variable attrs if available
753
+ per_variable_attrs = var.attrs.get('__per_variable_attrs__', {})
754
+ # Extract each slice using numpy indexing (much faster than .sel())
755
+ data = var.values
756
+ for idx, label in enumerate(labels):
757
+ # Use numpy indexing to get the slice
758
+ sliced_data = np.take(data, idx, axis=stacked_dim_idx)
759
+ # Restore original attrs if available
760
+ restored_attrs = per_variable_attrs.get(str(label), {})
761
+ new_data_vars[str(label)] = xr.Variable(remaining_dims, sliced_data, attrs=restored_attrs)
762
+ else:
763
+ new_data_vars[name] = var
764
+
765
+ # Preserve non-dimension coordinates (filter out stacked dim coords)
766
+ preserved_coords = {k: v for k, v in ds.coords.items() if not k.startswith(stacked_prefix)}
767
+ return xr.Dataset(new_data_vars, coords=preserved_coords, attrs=ds.attrs)
249
768
 
250
769
 
251
770
  def load_dataset_from_netcdf(path: str | pathlib.Path) -> xr.Dataset:
252
771
  """
253
772
  Load a dataset from a netcdf file. Load all attrs from 'attrs' attributes.
254
773
 
774
+ Automatically unstacks variables that were stacked during saving with
775
+ save_dataset_to_netcdf(stack_vars=True).
776
+
255
777
  Args:
256
778
  path: Path to load the dataset from.
257
779
 
258
780
  Returns:
259
- Dataset: Loaded dataset with restored attrs.
781
+ Dataset: Loaded dataset with restored attrs and unstacked variables.
260
782
  """
261
- ds = xr.load_dataset(str(path), engine='h5netcdf')
783
+ # Suppress numpy binary compatibility warnings from netCDF4 (numpy 1->2 transition)
784
+ with warnings.catch_warnings():
785
+ warnings.filterwarnings('ignore', category=RuntimeWarning, message='numpy.ndarray size changed')
786
+ ds = xr.load_dataset(str(path), engine='netcdf4')
262
787
 
263
788
  # Restore Dataset attrs
264
789
  if 'attrs' in ds.attrs:
265
790
  ds.attrs = json.loads(ds.attrs['attrs'])
266
791
 
267
- # Restore DataArray attrs
268
- for var_name, data_var in ds.data_vars.items():
269
- if 'attrs' in data_var.attrs:
270
- ds[var_name].attrs = json.loads(data_var.attrs['attrs'])
792
+ # Restore DataArray attrs (before unstacking, as stacked vars have no individual attrs)
793
+ # Use ds.variables to avoid slow _construct_dataarray calls
794
+ variables = ds.variables
795
+ for var_name in variables:
796
+ var = variables[var_name]
797
+ if 'attrs' in var.attrs:
798
+ var.attrs = json.loads(var.attrs['attrs'])
799
+
800
+ # Unstack variables if they were stacked during saving
801
+ # Detection: check if any dataset dimension starts with '__stacked__'
802
+ if any(dim.startswith('__stacked__') for dim in ds.dims):
803
+ ds = _unstack_vars(ds)
804
+
805
+ return ds
806
+
807
+
808
+ # Parameter rename mappings for backwards compatibility conversion
809
+ # Format: {old_name: new_name}
810
+ PARAMETER_RENAMES = {
811
+ # Effect parameters
812
+ 'minimum_operation': 'minimum_temporal',
813
+ 'maximum_operation': 'maximum_temporal',
814
+ 'minimum_invest': 'minimum_periodic',
815
+ 'maximum_invest': 'maximum_periodic',
816
+ 'minimum_investment': 'minimum_periodic',
817
+ 'maximum_investment': 'maximum_periodic',
818
+ 'minimum_operation_per_hour': 'minimum_per_hour',
819
+ 'maximum_operation_per_hour': 'maximum_per_hour',
820
+ # InvestParameters
821
+ 'fix_effects': 'effects_of_investment',
822
+ 'specific_effects': 'effects_of_investment_per_size',
823
+ 'divest_effects': 'effects_of_retirement',
824
+ 'piecewise_effects': 'piecewise_effects_of_investment',
825
+ # Flow/OnOffParameters
826
+ 'flow_hours_total_max': 'flow_hours_max',
827
+ 'flow_hours_total_min': 'flow_hours_min',
828
+ 'on_hours_total_max': 'on_hours_max',
829
+ 'on_hours_total_min': 'on_hours_min',
830
+ 'switch_on_total_max': 'switch_on_max',
831
+ # Bus
832
+ 'excess_penalty_per_flow_hour': 'imbalance_penalty_per_flow_hour',
833
+ # Component parameters (Source/Sink)
834
+ 'source': 'outputs',
835
+ 'sink': 'inputs',
836
+ 'prevent_simultaneous_sink_and_source': 'prevent_simultaneous_flow_rates',
837
+ # LinearConverter flow/efficiency parameters (pre-v4 files)
838
+ # These are needed for very old files that use short flow names
839
+ 'Q_fu': 'fuel_flow',
840
+ 'P_el': 'electrical_flow',
841
+ 'Q_th': 'thermal_flow',
842
+ 'Q_ab': 'heat_source_flow',
843
+ 'eta': 'thermal_efficiency',
844
+ 'eta_th': 'thermal_efficiency',
845
+ 'eta_el': 'electrical_efficiency',
846
+ 'COP': 'cop',
847
+ # Storage
848
+ # Note: 'lastValueOfSim' → 'equals_final' is a value change, not a key change
849
+ # Class renames (v4.2.0)
850
+ 'FullCalculation': 'Optimization',
851
+ 'AggregatedCalculation': 'ClusteredOptimization',
852
+ 'SegmentedCalculation': 'SegmentedOptimization',
853
+ 'CalculationResults': 'Results',
854
+ 'SegmentedCalculationResults': 'SegmentedResults',
855
+ 'Aggregation': 'Clustering',
856
+ 'AggregationParameters': 'ClusteringParameters',
857
+ 'AggregationModel': 'ClusteringModel',
858
+ # OnOffParameters → StatusParameters (class and attribute names)
859
+ 'OnOffParameters': 'StatusParameters',
860
+ 'on_off_parameters': 'status_parameters',
861
+ # StatusParameters attribute renames (applies to both Flow-level and Component-level)
862
+ 'effects_per_switch_on': 'effects_per_startup',
863
+ 'effects_per_running_hour': 'effects_per_active_hour',
864
+ 'consecutive_on_hours_min': 'min_uptime',
865
+ 'consecutive_on_hours_max': 'max_uptime',
866
+ 'consecutive_off_hours_min': 'min_downtime',
867
+ 'consecutive_off_hours_max': 'max_downtime',
868
+ 'force_switch_on': 'force_startup_tracking',
869
+ 'on_hours_min': 'active_hours_min',
870
+ 'on_hours_max': 'active_hours_max',
871
+ 'switch_on_max': 'startup_limit',
872
+ # TimeSeriesData
873
+ 'agg_group': 'aggregation_group',
874
+ 'agg_weight': 'aggregation_weight',
875
+ }
876
+
877
+ # Value renames (for specific parameter values that changed)
878
+ VALUE_RENAMES = {
879
+ 'initial_charge_state': {'lastValueOfSim': 'equals_final'},
880
+ }
881
+
882
+
883
+ # Keys that should NOT have their child keys renamed (they reference flow labels)
884
+ _FLOW_LABEL_REFERENCE_KEYS = {'piecewises', 'conversion_factors'}
885
+
886
+ # Keys that ARE flow parameters on components (should be renamed)
887
+ _FLOW_PARAMETER_KEYS = {'Q_fu', 'P_el', 'Q_th', 'Q_ab', 'eta', 'eta_th', 'eta_el', 'COP'}
888
+
889
+
890
+ def _rename_keys_recursive(
891
+ obj: Any,
892
+ key_renames: dict[str, str],
893
+ value_renames: dict[str, dict],
894
+ skip_flow_renames: bool = False,
895
+ ) -> Any:
896
+ """Recursively rename keys and values in nested data structures.
897
+
898
+ Args:
899
+ obj: The object to process (dict, list, or scalar)
900
+ key_renames: Mapping of old key names to new key names
901
+ value_renames: Mapping of key names to {old_value: new_value} dicts
902
+ skip_flow_renames: If True, skip renaming flow parameter keys (for inside piecewises)
903
+
904
+ Returns:
905
+ The processed object with renamed keys and values
906
+ """
907
+ if isinstance(obj, dict):
908
+ new_dict = {}
909
+ for key, value in obj.items():
910
+ # Determine if we should skip flow renames for children
911
+ child_skip_flow_renames = skip_flow_renames or key in _FLOW_LABEL_REFERENCE_KEYS
912
+
913
+ # Rename the key if needed (skip flow params if in reference context)
914
+ if skip_flow_renames and key in _FLOW_PARAMETER_KEYS:
915
+ new_key = key # Don't rename flow labels inside piecewises etc.
916
+ else:
917
+ new_key = key_renames.get(key, key)
918
+
919
+ # Process the value recursively
920
+ new_value = _rename_keys_recursive(value, key_renames, value_renames, child_skip_flow_renames)
921
+
922
+ # Check if this key has value renames (lookup by renamed key, fallback to old key)
923
+ vr_key = new_key if new_key in value_renames else key
924
+ if vr_key in value_renames and isinstance(new_value, str):
925
+ new_value = value_renames[vr_key].get(new_value, new_value)
926
+
927
+ # Handle __class__ values - rename class names
928
+ if key == '__class__' and isinstance(new_value, str):
929
+ new_value = key_renames.get(new_value, new_value)
930
+
931
+ new_dict[new_key] = new_value
932
+ return new_dict
933
+
934
+ elif isinstance(obj, list):
935
+ return [_rename_keys_recursive(item, key_renames, value_renames, skip_flow_renames) for item in obj]
936
+
937
+ else:
938
+ return obj
939
+
940
+
941
+ def convert_old_dataset(
942
+ ds: xr.Dataset,
943
+ key_renames: dict[str, str] | None = None,
944
+ value_renames: dict[str, dict] | None = None,
945
+ reduce_constants: bool = True,
946
+ ) -> xr.Dataset:
947
+ """Convert an old FlowSystem dataset to the current format.
948
+
949
+ This function performs two conversions:
950
+ 1. Renames parameters in the reference structure to current naming conventions
951
+ 2. Reduces constant arrays to minimal dimensions (e.g., broadcasted scalars back to scalars)
952
+
953
+ This is useful for loading FlowSystem files saved with older versions of flixopt.
954
+
955
+ Args:
956
+ ds: The dataset to convert
957
+ key_renames: Custom key renames to apply. If None, uses PARAMETER_RENAMES.
958
+ value_renames: Custom value renames to apply. If None, uses VALUE_RENAMES.
959
+ reduce_constants: If True (default), reduce constant arrays to minimal dimensions.
960
+ Old files may have scalars broadcasted to full (time, period, scenario) shape.
961
+
962
+ Returns:
963
+ The converted dataset
964
+
965
+ Examples:
966
+ Convert an old netCDF file to new format:
967
+
968
+ ```python
969
+ from flixopt import io
970
+
971
+ # Load old file
972
+ ds = io.load_dataset_from_netcdf('old_flow_system.nc4')
973
+
974
+ # Convert to current format
975
+ ds = io.convert_old_dataset(ds)
976
+
977
+ # Now load as FlowSystem
978
+ from flixopt import FlowSystem
979
+
980
+ fs = FlowSystem.from_dataset(ds)
981
+ ```
982
+ """
983
+ if key_renames is None:
984
+ key_renames = PARAMETER_RENAMES
985
+ if value_renames is None:
986
+ value_renames = VALUE_RENAMES
987
+
988
+ # Convert the attrs (reference_structure)
989
+ ds.attrs = _rename_keys_recursive(ds.attrs, key_renames, value_renames)
990
+
991
+ # Reduce constant arrays to minimal dimensions
992
+ if reduce_constants:
993
+ ds = _reduce_constant_arrays(ds)
994
+
995
+ return ds
996
+
997
+
998
+ def convert_old_netcdf(
999
+ input_path: str | pathlib.Path,
1000
+ output_path: str | pathlib.Path | None = None,
1001
+ compression: int = 0,
1002
+ ) -> xr.Dataset:
1003
+ """Load an old FlowSystem netCDF file and convert to new parameter names.
1004
+
1005
+ This is a convenience function that combines loading, conversion, and
1006
+ optionally saving the converted dataset.
1007
+
1008
+ Args:
1009
+ input_path: Path to the old netCDF file
1010
+ output_path: If provided, save the converted dataset to this path.
1011
+ If None, only returns the converted dataset without saving.
1012
+ compression: Compression level (0-9) for saving. Only used if output_path is provided.
1013
+
1014
+ Returns:
1015
+ The converted dataset
1016
+
1017
+ Examples:
1018
+ Convert and save to new file:
1019
+
1020
+ ```python
1021
+ from flixopt import io
1022
+
1023
+ # Convert old file to new format
1024
+ ds = io.convert_old_netcdf('old_system.nc4', 'new_system.nc')
1025
+ ```
1026
+
1027
+ Convert and load as FlowSystem:
271
1028
 
272
- # Restore coordinate attrs
273
- for coord_name, coord_var in ds.coords.items():
274
- if hasattr(coord_var, 'attrs') and 'attrs' in coord_var.attrs:
275
- ds[coord_name].attrs = json.loads(coord_var.attrs['attrs'])
1029
+ ```python
1030
+ from flixopt import FlowSystem, io
1031
+
1032
+ ds = io.convert_old_netcdf('old_system.nc4')
1033
+ fs = FlowSystem.from_dataset(ds)
1034
+ ```
1035
+ """
1036
+ # Load and convert
1037
+ ds = load_dataset_from_netcdf(input_path)
1038
+ ds = convert_old_dataset(ds)
1039
+
1040
+ # Optionally save
1041
+ if output_path is not None:
1042
+ save_dataset_to_netcdf(ds, output_path, compression=compression)
1043
+ logger.info(f'Converted {input_path} -> {output_path}')
276
1044
 
277
1045
  return ds
278
1046
 
279
1047
 
280
1048
  @dataclass
281
- class CalculationResultsPaths:
282
- """Container for all paths related to saving CalculationResults."""
1049
+ class ResultsPaths:
1050
+ """Container for all paths related to saving Results."""
283
1051
 
284
1052
  folder: pathlib.Path
285
1053
  name: str
@@ -308,18 +1076,24 @@ class CalculationResultsPaths:
308
1076
  'model_documentation': self.model_documentation,
309
1077
  }
310
1078
 
311
- def create_folders(self, parents: bool = False) -> None:
1079
+ def create_folders(self, parents: bool = False, exist_ok: bool = True) -> None:
312
1080
  """Ensure the folder exists.
1081
+
313
1082
  Args:
314
- parents: Whether to create the parent folders if they do not exist.
1083
+ parents: If True, create parent directories as needed. If False, parent must exist.
1084
+ exist_ok: If True, do not raise error if folder already exists. If False, raise FileExistsError.
1085
+
1086
+ Raises:
1087
+ FileNotFoundError: If parents=False and parent directory doesn't exist.
1088
+ FileExistsError: If exist_ok=False and folder already exists.
315
1089
  """
316
- if not self.folder.exists():
317
- try:
318
- self.folder.mkdir(parents=parents)
319
- except FileNotFoundError as e:
320
- raise FileNotFoundError(
321
- f'Folder {self.folder} and its parent do not exist. Please create them first.'
322
- ) from e
1090
+ try:
1091
+ self.folder.mkdir(parents=parents, exist_ok=exist_ok)
1092
+ except FileNotFoundError as e:
1093
+ raise FileNotFoundError(
1094
+ f'Cannot create folder {self.folder}: parent directory does not exist. '
1095
+ f'Use parents=True to create parent directories.'
1096
+ ) from e
323
1097
 
324
1098
  def update(self, new_name: str | None = None, new_folder: pathlib.Path | None = None) -> None:
325
1099
  """Update name and/or folder and refresh all paths."""
@@ -330,3 +1104,892 @@ class CalculationResultsPaths:
330
1104
  raise FileNotFoundError(f'Folder {new_folder} does not exist or is not a directory.')
331
1105
  self.folder = new_folder
332
1106
  self._update_paths()
1107
+
1108
+
1109
+ def numeric_to_str_for_repr(
1110
+ value: Numeric_TPS,
1111
+ precision: int = 1,
1112
+ atol: float = 1e-10,
1113
+ ) -> str:
1114
+ """Format value for display in repr methods.
1115
+
1116
+ For single values or uniform arrays, returns the formatted value.
1117
+ For arrays with variation, returns a range showing min-max.
1118
+
1119
+ Args:
1120
+ value: Numeric value or container (DataArray, array, Series, DataFrame)
1121
+ precision: Number of decimal places (default: 1)
1122
+ atol: Absolute tolerance for considering values equal (default: 1e-10)
1123
+
1124
+ Returns:
1125
+ Formatted string representation:
1126
+ - Single/uniform values: "100.0"
1127
+ - Nearly uniform values: "~100.0" (values differ slightly but display similarly)
1128
+ - Varying values: "50.0-150.0" (shows range from min to max)
1129
+
1130
+ Raises:
1131
+ TypeError: If value cannot be converted to numeric format
1132
+ """
1133
+ # Handle simple scalar types
1134
+ if isinstance(value, (int, float, np.integer, np.floating)):
1135
+ return f'{float(value):.{precision}f}'
1136
+
1137
+ # Extract array data for variation checking
1138
+ arr = None
1139
+ if isinstance(value, xr.DataArray):
1140
+ arr = value.values.flatten()
1141
+ elif isinstance(value, (np.ndarray, pd.Series)):
1142
+ arr = np.asarray(value).flatten()
1143
+ elif isinstance(value, pd.DataFrame):
1144
+ arr = value.values.flatten()
1145
+ else:
1146
+ # Fallback for unknown types
1147
+ try:
1148
+ return f'{float(value):.{precision}f}'
1149
+ except (TypeError, ValueError) as e:
1150
+ raise TypeError(f'Cannot format value of type {type(value).__name__} for repr') from e
1151
+
1152
+ # Normalize dtype and handle empties
1153
+ arr = arr.astype(float, copy=False)
1154
+ if arr.size == 0:
1155
+ return '?'
1156
+
1157
+ # Filter non-finite values
1158
+ finite = arr[np.isfinite(arr)]
1159
+ if finite.size == 0:
1160
+ return 'nan'
1161
+
1162
+ # Check for single value
1163
+ if finite.size == 1:
1164
+ return f'{float(finite[0]):.{precision}f}'
1165
+
1166
+ # Check if all values are the same or very close
1167
+ min_val = float(np.nanmin(finite))
1168
+ max_val = float(np.nanmax(finite))
1169
+
1170
+ # First check: values are essentially identical
1171
+ if np.allclose(min_val, max_val, atol=atol):
1172
+ return f'{float(np.mean(finite)):.{precision}f}'
1173
+
1174
+ # Second check: display values are the same but actual values differ slightly
1175
+ min_str = f'{min_val:.{precision}f}'
1176
+ max_str = f'{max_val:.{precision}f}'
1177
+ if min_str == max_str:
1178
+ return f'~{min_str}'
1179
+
1180
+ # Values vary significantly - show range
1181
+ return f'{min_str}-{max_str}'
1182
+
1183
+
1184
+ def _format_value_for_repr(value) -> str:
1185
+ """Format a single value for display in repr.
1186
+
1187
+ Args:
1188
+ value: The value to format
1189
+
1190
+ Returns:
1191
+ Formatted string representation of the value
1192
+ """
1193
+ # Format numeric types using specialized formatter
1194
+ if isinstance(value, (int, float, np.integer, np.floating, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray)):
1195
+ try:
1196
+ return numeric_to_str_for_repr(value)
1197
+ except Exception:
1198
+ value_repr = repr(value)
1199
+ if len(value_repr) > 50:
1200
+ value_repr = value_repr[:47] + '...'
1201
+ return value_repr
1202
+
1203
+ # Format dicts with numeric/array values nicely
1204
+ elif isinstance(value, dict):
1205
+ try:
1206
+ formatted_items = []
1207
+ for k, v in value.items():
1208
+ if isinstance(
1209
+ v, (int, float, np.integer, np.floating, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray)
1210
+ ):
1211
+ v_str = numeric_to_str_for_repr(v)
1212
+ else:
1213
+ v_str = repr(v)
1214
+ if len(v_str) > 30:
1215
+ v_str = v_str[:27] + '...'
1216
+ formatted_items.append(f'{repr(k)}: {v_str}')
1217
+ value_repr = '{' + ', '.join(formatted_items) + '}'
1218
+ if len(value_repr) > 50:
1219
+ value_repr = value_repr[:47] + '...'
1220
+ return value_repr
1221
+ except Exception:
1222
+ value_repr = repr(value)
1223
+ if len(value_repr) > 50:
1224
+ value_repr = value_repr[:47] + '...'
1225
+ return value_repr
1226
+
1227
+ # Default repr with truncation
1228
+ else:
1229
+ value_repr = repr(value)
1230
+ if len(value_repr) > 50:
1231
+ value_repr = value_repr[:47] + '...'
1232
+ return value_repr
1233
+
1234
+
1235
+ def build_repr_from_init(
1236
+ obj: object,
1237
+ excluded_params: set[str] | None = None,
1238
+ label_as_positional: bool = True,
1239
+ skip_default_size: bool = False,
1240
+ ) -> str:
1241
+ """Build a repr string from __init__ signature, showing non-default parameter values.
1242
+
1243
+ This utility function extracts common repr logic used across flixopt classes.
1244
+ It introspects the __init__ method to build a constructor-style repr showing
1245
+ only parameters that differ from their defaults.
1246
+
1247
+ Args:
1248
+ obj: The object to create repr for
1249
+ excluded_params: Set of parameter names to exclude (e.g., {'self', 'inputs', 'outputs'})
1250
+ Default excludes 'self', 'label', and 'kwargs'
1251
+ label_as_positional: If True and 'label' param exists, show it as first positional arg
1252
+ skip_default_size: Deprecated. Previously skipped size=CONFIG.Modeling.big, now size=None is default.
1253
+
1254
+ Returns:
1255
+ Formatted repr string like: ClassName("label", param=value)
1256
+ """
1257
+ if excluded_params is None:
1258
+ excluded_params = {'self', 'label', 'kwargs'}
1259
+ else:
1260
+ # Always exclude 'self'
1261
+ excluded_params = excluded_params | {'self'}
1262
+
1263
+ try:
1264
+ # Get the constructor arguments and their current values
1265
+ init_signature = inspect.signature(obj.__init__)
1266
+ init_params = init_signature.parameters
1267
+
1268
+ # Check if this has a 'label' parameter - if so, show it first as positional
1269
+ has_label = 'label' in init_params and label_as_positional
1270
+
1271
+ # Build kwargs for non-default parameters
1272
+ kwargs_parts = []
1273
+ label_value = None
1274
+
1275
+ for param_name, param in init_params.items():
1276
+ # Skip *args and **kwargs
1277
+ if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
1278
+ continue
1279
+
1280
+ # Handle label separately if showing as positional (check BEFORE excluded_params)
1281
+ if param_name == 'label' and has_label:
1282
+ label_value = getattr(obj, param_name, None)
1283
+ continue
1284
+
1285
+ # Now check if parameter should be excluded
1286
+ if param_name in excluded_params:
1287
+ continue
1288
+
1289
+ # Get current value
1290
+ value = getattr(obj, param_name, None)
1291
+
1292
+ # Skip if value matches default
1293
+ if param.default != inspect.Parameter.empty:
1294
+ # Special handling for empty containers (even if default was None)
1295
+ if isinstance(value, (dict, list, tuple, set)) and len(value) == 0:
1296
+ if param.default is None or (
1297
+ isinstance(param.default, (dict, list, tuple, set)) and len(param.default) == 0
1298
+ ):
1299
+ continue
1300
+
1301
+ # Handle array comparisons (xarray, numpy)
1302
+ elif isinstance(value, (xr.DataArray, np.ndarray)):
1303
+ try:
1304
+ if isinstance(param.default, (xr.DataArray, np.ndarray)):
1305
+ # Compare arrays element-wise
1306
+ if isinstance(value, xr.DataArray) and isinstance(param.default, xr.DataArray):
1307
+ if value.equals(param.default):
1308
+ continue
1309
+ elif np.array_equal(value, param.default):
1310
+ continue
1311
+ elif isinstance(param.default, (int, float, np.integer, np.floating)):
1312
+ # Compare array to scalar (e.g., after transform_data converts scalar to DataArray)
1313
+ if isinstance(value, xr.DataArray):
1314
+ if np.all(value.values == float(param.default)):
1315
+ continue
1316
+ elif isinstance(value, np.ndarray):
1317
+ if np.all(value == float(param.default)):
1318
+ continue
1319
+ except Exception:
1320
+ pass # If comparison fails, include in repr
1321
+
1322
+ # Handle numeric comparisons (deals with 0 vs 0.0, int vs float)
1323
+ elif isinstance(value, (int, float, np.integer, np.floating)) and isinstance(
1324
+ param.default, (int, float, np.integer, np.floating)
1325
+ ):
1326
+ try:
1327
+ if float(value) == float(param.default):
1328
+ continue
1329
+ except (ValueError, TypeError):
1330
+ pass
1331
+
1332
+ elif value == param.default:
1333
+ continue
1334
+
1335
+ # Skip None values if default is None
1336
+ if value is None and param.default is None:
1337
+ continue
1338
+
1339
+ # Special case: hide CONFIG.Modeling.big for size parameter
1340
+ if skip_default_size and param_name == 'size':
1341
+ from .config import CONFIG
1342
+
1343
+ try:
1344
+ if isinstance(value, (int, float, np.integer, np.floating)):
1345
+ if float(value) == CONFIG.Modeling.big:
1346
+ continue
1347
+ except Exception:
1348
+ pass
1349
+
1350
+ # Format value using helper function
1351
+ value_repr = _format_value_for_repr(value)
1352
+ kwargs_parts.append(f'{param_name}={value_repr}')
1353
+
1354
+ # Build args string with label first as positional if present
1355
+ if has_label and label_value is not None:
1356
+ # Use label_full if available, otherwise label
1357
+ if hasattr(obj, 'label_full'):
1358
+ label_repr = repr(obj.label_full)
1359
+ else:
1360
+ label_repr = repr(label_value)
1361
+
1362
+ if len(label_repr) > 50:
1363
+ label_repr = label_repr[:47] + '...'
1364
+ args_str = label_repr
1365
+ if kwargs_parts:
1366
+ args_str += ', ' + ', '.join(kwargs_parts)
1367
+ else:
1368
+ args_str = ', '.join(kwargs_parts)
1369
+
1370
+ # Build final repr
1371
+ class_name = obj.__class__.__name__
1372
+
1373
+ return f'{class_name}({args_str})'
1374
+
1375
+ except Exception:
1376
+ # Fallback if introspection fails
1377
+ return f'{obj.__class__.__name__}(<repr_failed>)'
1378
+
1379
+
1380
+ def format_flow_details(obj: Any, has_inputs: bool = True, has_outputs: bool = True) -> str:
1381
+ """Format inputs and outputs as indented bullet list.
1382
+
1383
+ Args:
1384
+ obj: Object with 'inputs' and/or 'outputs' attributes
1385
+ has_inputs: Whether to check for inputs
1386
+ has_outputs: Whether to check for outputs
1387
+
1388
+ Returns:
1389
+ Formatted string with flow details (including leading newline), or empty string if no flows
1390
+ """
1391
+ flow_lines = []
1392
+
1393
+ if has_inputs and hasattr(obj, 'inputs') and obj.inputs:
1394
+ flow_lines.append(' inputs:')
1395
+ for flow in obj.inputs:
1396
+ flow_lines.append(f' * {repr(flow)}')
1397
+
1398
+ if has_outputs and hasattr(obj, 'outputs') and obj.outputs:
1399
+ flow_lines.append(' outputs:')
1400
+ for flow in obj.outputs:
1401
+ flow_lines.append(f' * {repr(flow)}')
1402
+
1403
+ return '\n' + '\n'.join(flow_lines) if flow_lines else ''
1404
+
1405
+
1406
+ def format_title_with_underline(title: str, underline_char: str = '-') -> str:
1407
+ """Format a title with underline of matching length.
1408
+
1409
+ Args:
1410
+ title: The title text
1411
+ underline_char: Character to use for underline (default: '-')
1412
+
1413
+ Returns:
1414
+ Formatted string: "Title\\n-----\\n"
1415
+ """
1416
+ return f'{title}\n{underline_char * len(title)}\n'
1417
+
1418
+
1419
+ def format_sections_with_headers(sections: dict[str, str], underline_char: str = '-') -> list[str]:
1420
+ """Format sections with underlined headers.
1421
+
1422
+ Args:
1423
+ sections: Dict mapping section headers to content
1424
+ underline_char: Character for underlining headers
1425
+
1426
+ Returns:
1427
+ List of formatted section strings
1428
+ """
1429
+ formatted_sections = []
1430
+ for section_header, section_content in sections.items():
1431
+ underline = underline_char * len(section_header)
1432
+ formatted_sections.append(f'{section_header}\n{underline}\n{section_content}')
1433
+ return formatted_sections
1434
+
1435
+
1436
+ def build_metadata_info(parts: list[str], prefix: str = ' | ') -> str:
1437
+ """Build metadata info string from parts.
1438
+
1439
+ Args:
1440
+ parts: List of metadata strings (empty strings are filtered out)
1441
+ prefix: Prefix to add if parts is non-empty
1442
+
1443
+ Returns:
1444
+ Formatted info string or empty string
1445
+ """
1446
+ # Filter out empty strings
1447
+ parts = [p for p in parts if p]
1448
+ if not parts:
1449
+ return ''
1450
+ info = ' | '.join(parts)
1451
+ return prefix + info if prefix else info
1452
+
1453
+
1454
+ @contextmanager
1455
+ def suppress_output():
1456
+ """
1457
+ Suppress all console output including C-level output from solvers.
1458
+
1459
+ WARNING: Not thread-safe. Modifies global file descriptors.
1460
+ Use only with sequential execution or multiprocessing.
1461
+ """
1462
+ # Save original file descriptors
1463
+ old_stdout_fd = os.dup(1)
1464
+ old_stderr_fd = os.dup(2)
1465
+ devnull_fd = None
1466
+
1467
+ try:
1468
+ # Open devnull
1469
+ devnull_fd = os.open(os.devnull, os.O_WRONLY)
1470
+
1471
+ # Flush Python buffers before redirecting
1472
+ sys.stdout.flush()
1473
+ sys.stderr.flush()
1474
+
1475
+ # Redirect file descriptors to devnull
1476
+ os.dup2(devnull_fd, 1)
1477
+ os.dup2(devnull_fd, 2)
1478
+
1479
+ yield
1480
+
1481
+ finally:
1482
+ # Restore original file descriptors with nested try blocks
1483
+ # to ensure all cleanup happens even if one step fails
1484
+ try:
1485
+ # Flush any buffered output in the redirected streams
1486
+ sys.stdout.flush()
1487
+ sys.stderr.flush()
1488
+ except (OSError, ValueError):
1489
+ pass # Stream might be closed or invalid
1490
+
1491
+ try:
1492
+ os.dup2(old_stdout_fd, 1)
1493
+ except OSError:
1494
+ pass # Failed to restore stdout, continue cleanup
1495
+
1496
+ try:
1497
+ os.dup2(old_stderr_fd, 2)
1498
+ except OSError:
1499
+ pass # Failed to restore stderr, continue cleanup
1500
+
1501
+ # Close all file descriptors
1502
+ for fd in [devnull_fd, old_stdout_fd, old_stderr_fd]:
1503
+ if fd is not None:
1504
+ try:
1505
+ os.close(fd)
1506
+ except OSError:
1507
+ pass # FD already closed or invalid
1508
+
1509
+
1510
+ # ============================================================================
1511
+ # FlowSystem Dataset I/O
1512
+ # ============================================================================
1513
+
1514
+
1515
+ class FlowSystemDatasetIO:
1516
+ """Unified I/O handler for FlowSystem dataset serialization and deserialization.
1517
+
1518
+ This class provides optimized methods for converting FlowSystem objects to/from
1519
+ xarray Datasets. It uses shared constants for variable prefixes and implements
1520
+ fast DataArray construction to avoid xarray's slow _construct_dataarray method.
1521
+
1522
+ Constants:
1523
+ SOLUTION_PREFIX: Prefix for solution variables ('solution|')
1524
+ CLUSTERING_PREFIX: Prefix for clustering variables ('clustering|')
1525
+
1526
+ Example:
1527
+ # Serialization (FlowSystem -> Dataset)
1528
+ ds = FlowSystemDatasetIO.to_dataset(flow_system, base_ds)
1529
+
1530
+ # Deserialization (Dataset -> FlowSystem)
1531
+ fs = FlowSystemDatasetIO.from_dataset(ds)
1532
+ """
1533
+
1534
+ # Shared prefixes for variable namespacing
1535
+ SOLUTION_PREFIX = 'solution|'
1536
+ CLUSTERING_PREFIX = 'clustering|'
1537
+
1538
+ # --- Deserialization (Dataset -> FlowSystem) ---
1539
+
1540
+ @classmethod
1541
+ def from_dataset(cls, ds: xr.Dataset) -> FlowSystem:
1542
+ """Create FlowSystem from dataset.
1543
+
1544
+ This is the main entry point for dataset restoration.
1545
+ Called by FlowSystem.from_dataset().
1546
+
1547
+ If the dataset contains solution data (variables prefixed with 'solution|'),
1548
+ the solution will be restored to the FlowSystem. Solution time coordinates
1549
+ are renamed back from 'solution_time' to 'time'.
1550
+
1551
+ Supports clustered datasets with (cluster, time) dimensions. When detected,
1552
+ creates a synthetic DatetimeIndex for compatibility and stores the clustered
1553
+ data structure for later use.
1554
+
1555
+ Args:
1556
+ ds: Dataset containing the FlowSystem data
1557
+
1558
+ Returns:
1559
+ FlowSystem instance with all components, buses, effects, and solution restored
1560
+ """
1561
+ from .flow_system import FlowSystem
1562
+
1563
+ # Parse dataset structure
1564
+ reference_structure = dict(ds.attrs)
1565
+ solution_var_names, config_var_names = cls._separate_variables(ds)
1566
+ coord_cache = {k: ds.coords[k] for k in ds.coords}
1567
+ arrays_dict = {name: cls._fast_get_dataarray(ds, name, coord_cache) for name in config_var_names}
1568
+
1569
+ # Create and populate FlowSystem
1570
+ flow_system = cls._create_flow_system(ds, reference_structure, arrays_dict, FlowSystem)
1571
+ cls._restore_elements(flow_system, reference_structure, arrays_dict, FlowSystem)
1572
+ cls._restore_solution(flow_system, ds, reference_structure, solution_var_names)
1573
+ cls._restore_clustering(flow_system, ds, reference_structure, config_var_names, arrays_dict, FlowSystem)
1574
+ cls._restore_metadata(flow_system, reference_structure, FlowSystem)
1575
+ flow_system.connect_and_transform()
1576
+ return flow_system
1577
+
1578
+ @classmethod
1579
+ def _separate_variables(cls, ds: xr.Dataset) -> tuple[dict[str, str], list[str]]:
1580
+ """Separate solution variables from config variables.
1581
+
1582
+ Args:
1583
+ ds: Source dataset
1584
+
1585
+ Returns:
1586
+ Tuple of (solution_var_names dict, config_var_names list)
1587
+ """
1588
+ solution_var_names: dict[str, str] = {} # Maps original_name -> ds_name
1589
+ config_var_names: list[str] = []
1590
+ coord_names = set(ds.coords)
1591
+
1592
+ for name in ds.variables:
1593
+ if name in coord_names:
1594
+ continue
1595
+ if name.startswith(cls.SOLUTION_PREFIX):
1596
+ solution_var_names[name[len(cls.SOLUTION_PREFIX) :]] = name
1597
+ else:
1598
+ config_var_names.append(name)
1599
+
1600
+ return solution_var_names, config_var_names
1601
+
1602
+ @staticmethod
1603
+ def _fast_get_dataarray(ds: xr.Dataset, name: str, coord_cache: dict[str, xr.DataArray]) -> xr.DataArray:
1604
+ """Construct DataArray from Variable without slow coordinate inference.
1605
+
1606
+ This bypasses the slow _construct_dataarray method (~1.5ms -> ~0.1ms per var).
1607
+
1608
+ Args:
1609
+ ds: Source dataset
1610
+ name: Variable name
1611
+ coord_cache: Pre-cached coordinate DataArrays
1612
+
1613
+ Returns:
1614
+ Constructed DataArray
1615
+ """
1616
+ variable = ds.variables[name]
1617
+ var_dims = set(variable.dims)
1618
+ # Include coordinates whose dims are a subset of the variable's dims
1619
+ # This preserves both dimension coordinates and auxiliary coordinates
1620
+ coords = {k: v for k, v in coord_cache.items() if set(v.dims).issubset(var_dims)}
1621
+ return xr.DataArray(variable, coords=coords, name=name)
1622
+
1623
+ @staticmethod
1624
+ def _create_flow_system(
1625
+ ds: xr.Dataset,
1626
+ reference_structure: dict[str, Any],
1627
+ arrays_dict: dict[str, xr.DataArray],
1628
+ cls: type[FlowSystem],
1629
+ ) -> FlowSystem:
1630
+ """Create FlowSystem instance with constructor parameters."""
1631
+ # Extract cluster index if present (clustered FlowSystem)
1632
+ clusters = ds.indexes.get('cluster')
1633
+
1634
+ # For clustered datasets, cluster_weight is (cluster,) shaped - set separately
1635
+ if clusters is not None:
1636
+ cluster_weight_for_constructor = None
1637
+ else:
1638
+ cluster_weight_for_constructor = (
1639
+ cls._resolve_dataarray_reference(reference_structure['cluster_weight'], arrays_dict)
1640
+ if 'cluster_weight' in reference_structure
1641
+ else None
1642
+ )
1643
+
1644
+ # Resolve scenario_weights only if scenario dimension exists
1645
+ scenario_weights = None
1646
+ if ds.indexes.get('scenario') is not None and 'scenario_weights' in reference_structure:
1647
+ scenario_weights = cls._resolve_dataarray_reference(reference_structure['scenario_weights'], arrays_dict)
1648
+
1649
+ # Resolve timestep_duration if present
1650
+ # For segmented systems, it's stored as a data_var; for others it's computed from timesteps_extra
1651
+ timestep_duration = None
1652
+ if 'timestep_duration' in arrays_dict:
1653
+ # Segmented systems store timestep_duration as a data_var
1654
+ timestep_duration = arrays_dict['timestep_duration']
1655
+ elif 'timestep_duration' in reference_structure:
1656
+ ref_value = reference_structure['timestep_duration']
1657
+ if isinstance(ref_value, str) and ref_value.startswith(':::'):
1658
+ timestep_duration = cls._resolve_dataarray_reference(ref_value, arrays_dict)
1659
+ else:
1660
+ # Concrete value (e.g., list from expand())
1661
+ timestep_duration = ref_value
1662
+
1663
+ # Get timesteps - convert integer index to RangeIndex for segmented systems
1664
+ time_index = ds.indexes['time']
1665
+ if not isinstance(time_index, pd.DatetimeIndex):
1666
+ time_index = pd.RangeIndex(len(time_index), name='time')
1667
+
1668
+ return cls(
1669
+ timesteps=time_index,
1670
+ periods=ds.indexes.get('period'),
1671
+ scenarios=ds.indexes.get('scenario'),
1672
+ clusters=clusters,
1673
+ hours_of_last_timestep=reference_structure.get('hours_of_last_timestep'),
1674
+ hours_of_previous_timesteps=reference_structure.get('hours_of_previous_timesteps'),
1675
+ weight_of_last_period=reference_structure.get('weight_of_last_period'),
1676
+ scenario_weights=scenario_weights,
1677
+ cluster_weight=cluster_weight_for_constructor,
1678
+ scenario_independent_sizes=reference_structure.get('scenario_independent_sizes', True),
1679
+ scenario_independent_flow_rates=reference_structure.get('scenario_independent_flow_rates', False),
1680
+ name=reference_structure.get('name'),
1681
+ timestep_duration=timestep_duration,
1682
+ )
1683
+
1684
+ @staticmethod
1685
+ def _restore_elements(
1686
+ flow_system: FlowSystem,
1687
+ reference_structure: dict[str, Any],
1688
+ arrays_dict: dict[str, xr.DataArray],
1689
+ cls: type[FlowSystem],
1690
+ ) -> None:
1691
+ """Restore components, buses, and effects to FlowSystem."""
1692
+ from .effects import Effect
1693
+ from .elements import Bus, Component
1694
+
1695
+ # Restore components
1696
+ for comp_label, comp_data in reference_structure.get('components', {}).items():
1697
+ component = cls._resolve_reference_structure(comp_data, arrays_dict)
1698
+ if not isinstance(component, Component):
1699
+ logger.critical(f'Restoring component {comp_label} failed.')
1700
+ flow_system._add_components(component)
1701
+
1702
+ # Restore buses
1703
+ for bus_label, bus_data in reference_structure.get('buses', {}).items():
1704
+ bus = cls._resolve_reference_structure(bus_data, arrays_dict)
1705
+ if not isinstance(bus, Bus):
1706
+ logger.critical(f'Restoring bus {bus_label} failed.')
1707
+ flow_system._add_buses(bus)
1708
+
1709
+ # Restore effects
1710
+ for effect_label, effect_data in reference_structure.get('effects', {}).items():
1711
+ effect = cls._resolve_reference_structure(effect_data, arrays_dict)
1712
+ if not isinstance(effect, Effect):
1713
+ logger.critical(f'Restoring effect {effect_label} failed.')
1714
+ flow_system._add_effects(effect)
1715
+
1716
+ @classmethod
1717
+ def _restore_solution(
1718
+ cls,
1719
+ flow_system: FlowSystem,
1720
+ ds: xr.Dataset,
1721
+ reference_structure: dict[str, Any],
1722
+ solution_var_names: dict[str, str],
1723
+ ) -> None:
1724
+ """Restore solution dataset if present."""
1725
+ if not reference_structure.get('has_solution', False) or not solution_var_names:
1726
+ return
1727
+
1728
+ # Use dataset subsetting (faster than individual ds[name] access)
1729
+ solution_ds_names = list(solution_var_names.values())
1730
+ solution_ds = ds[solution_ds_names]
1731
+ # Rename variables to remove 'solution|' prefix
1732
+ rename_map = {ds_name: orig_name for orig_name, ds_name in solution_var_names.items()}
1733
+ solution_ds = solution_ds.rename(rename_map)
1734
+ # Rename 'solution_time' back to 'time' if present
1735
+ if 'solution_time' in solution_ds.dims:
1736
+ solution_ds = solution_ds.rename({'solution_time': 'time'})
1737
+ flow_system.solution = solution_ds
1738
+
1739
+ @classmethod
1740
+ def _restore_clustering(
1741
+ cls,
1742
+ flow_system: FlowSystem,
1743
+ ds: xr.Dataset,
1744
+ reference_structure: dict[str, Any],
1745
+ config_var_names: list[str],
1746
+ arrays_dict: dict[str, xr.DataArray],
1747
+ fs_cls: type[FlowSystem],
1748
+ ) -> None:
1749
+ """Restore Clustering object if present."""
1750
+ if 'clustering' not in reference_structure:
1751
+ return
1752
+
1753
+ clustering_structure = json.loads(reference_structure['clustering'])
1754
+
1755
+ # Collect clustering arrays (prefixed with 'clustering|')
1756
+ clustering_arrays: dict[str, xr.DataArray] = {}
1757
+ main_var_names: list[str] = []
1758
+
1759
+ for name in config_var_names:
1760
+ if name.startswith(cls.CLUSTERING_PREFIX):
1761
+ arr = ds[name]
1762
+ arr_name = name[len(cls.CLUSTERING_PREFIX) :]
1763
+ clustering_arrays[arr_name] = arr.rename(arr_name)
1764
+ else:
1765
+ main_var_names.append(name)
1766
+
1767
+ clustering = fs_cls._resolve_reference_structure(clustering_structure, clustering_arrays)
1768
+ flow_system.clustering = clustering
1769
+
1770
+ # Reconstruct aggregated_data from FlowSystem's main data arrays
1771
+ if clustering.aggregated_data is None and main_var_names:
1772
+ from .core import drop_constant_arrays
1773
+
1774
+ main_vars = {name: arrays_dict[name] for name in main_var_names}
1775
+ clustering.aggregated_data = drop_constant_arrays(xr.Dataset(main_vars), dim='time')
1776
+
1777
+ # Restore cluster_weight from clustering's representative_weights
1778
+ if hasattr(clustering, 'representative_weights'):
1779
+ flow_system.cluster_weight = clustering.representative_weights
1780
+
1781
+ @staticmethod
1782
+ def _restore_metadata(
1783
+ flow_system: FlowSystem,
1784
+ reference_structure: dict[str, Any],
1785
+ cls: type[FlowSystem],
1786
+ ) -> None:
1787
+ """Restore carriers and variable categories."""
1788
+ from .structure import VariableCategory
1789
+
1790
+ # Restore carriers if present
1791
+ if 'carriers' in reference_structure:
1792
+ carriers_structure = json.loads(reference_structure['carriers'])
1793
+ for carrier_data in carriers_structure.values():
1794
+ carrier = cls._resolve_reference_structure(carrier_data, {})
1795
+ flow_system._carriers.add(carrier)
1796
+
1797
+ # Restore variable categories if present
1798
+ if 'variable_categories' in reference_structure:
1799
+ categories_dict = json.loads(reference_structure['variable_categories'])
1800
+ restored_categories: dict[str, VariableCategory] = {}
1801
+ for name, value in categories_dict.items():
1802
+ try:
1803
+ restored_categories[name] = VariableCategory(value)
1804
+ except ValueError:
1805
+ logger.warning(f'Unknown VariableCategory value "{value}" for "{name}", skipping')
1806
+ flow_system._variable_categories = restored_categories
1807
+
1808
+ # --- Serialization (FlowSystem -> Dataset) ---
1809
+
1810
+ @classmethod
1811
+ def to_dataset(
1812
+ cls,
1813
+ flow_system: FlowSystem,
1814
+ base_dataset: xr.Dataset,
1815
+ include_solution: bool = True,
1816
+ include_original_data: bool = True,
1817
+ ) -> xr.Dataset:
1818
+ """Convert FlowSystem-specific data to dataset.
1819
+
1820
+ This function adds FlowSystem-specific data (solution, clustering, metadata)
1821
+ to a base dataset created by the parent class's to_dataset() method.
1822
+
1823
+ Args:
1824
+ flow_system: The FlowSystem to serialize
1825
+ base_dataset: Dataset from parent class with basic structure
1826
+ include_solution: Whether to include optimization solution
1827
+ include_original_data: Whether to include clustering.original_data
1828
+
1829
+ Returns:
1830
+ Complete dataset with all FlowSystem data
1831
+ """
1832
+ from . import __version__
1833
+
1834
+ ds = base_dataset
1835
+
1836
+ # Add solution data
1837
+ ds = cls._add_solution_to_dataset(ds, flow_system.solution, include_solution)
1838
+
1839
+ # Add carriers
1840
+ ds = cls._add_carriers_to_dataset(ds, flow_system._carriers)
1841
+
1842
+ # Add clustering
1843
+ ds = cls._add_clustering_to_dataset(ds, flow_system.clustering, include_original_data)
1844
+
1845
+ # Add variable categories
1846
+ ds = cls._add_variable_categories_to_dataset(ds, flow_system._variable_categories)
1847
+
1848
+ # Add version info
1849
+ ds.attrs['flixopt_version'] = __version__
1850
+
1851
+ # Ensure model coordinates are present
1852
+ ds = cls._add_model_coords(ds, flow_system)
1853
+
1854
+ return ds
1855
+
1856
+ @classmethod
1857
+ def _add_solution_to_dataset(
1858
+ cls,
1859
+ ds: xr.Dataset,
1860
+ solution: xr.Dataset | None,
1861
+ include_solution: bool,
1862
+ ) -> xr.Dataset:
1863
+ """Add solution variables to dataset.
1864
+
1865
+ Uses ds.variables directly for fast serialization (avoids _construct_dataarray).
1866
+ """
1867
+ if include_solution and solution is not None:
1868
+ # Rename 'time' to 'solution_time' to preserve full solution
1869
+ solution_renamed = solution.rename({'time': 'solution_time'}) if 'time' in solution.dims else solution
1870
+
1871
+ # Use ds.variables directly to avoid slow _construct_dataarray calls
1872
+ # Only include data variables (not coordinates)
1873
+ data_var_names = set(solution_renamed.data_vars)
1874
+ solution_vars = {
1875
+ f'{cls.SOLUTION_PREFIX}{name}': var
1876
+ for name, var in solution_renamed.variables.items()
1877
+ if name in data_var_names
1878
+ }
1879
+ ds = ds.assign(solution_vars)
1880
+
1881
+ # Add solution_time coordinate if it exists
1882
+ if 'solution_time' in solution_renamed.coords:
1883
+ ds = ds.assign_coords(solution_time=solution_renamed.coords['solution_time'])
1884
+
1885
+ ds.attrs['has_solution'] = True
1886
+ else:
1887
+ ds.attrs['has_solution'] = False
1888
+
1889
+ return ds
1890
+
1891
+ @staticmethod
1892
+ def _add_carriers_to_dataset(ds: xr.Dataset, carriers: Any) -> xr.Dataset:
1893
+ """Add carrier definitions to dataset attributes."""
1894
+ if carriers:
1895
+ carriers_structure = {}
1896
+ for name, carrier in carriers.items():
1897
+ carrier_ref, _ = carrier._create_reference_structure()
1898
+ carriers_structure[name] = carrier_ref
1899
+ ds.attrs['carriers'] = json.dumps(carriers_structure)
1900
+
1901
+ return ds
1902
+
1903
+ @classmethod
1904
+ def _add_clustering_to_dataset(
1905
+ cls,
1906
+ ds: xr.Dataset,
1907
+ clustering: Any,
1908
+ include_original_data: bool,
1909
+ ) -> xr.Dataset:
1910
+ """Add clustering object to dataset."""
1911
+ if clustering is not None:
1912
+ clustering_ref, clustering_arrays = clustering._create_reference_structure(
1913
+ include_original_data=include_original_data
1914
+ )
1915
+ # Add clustering arrays with prefix using batch assignment
1916
+ # (individual ds[name] = arr assignments are slow)
1917
+ prefixed_arrays = {f'{cls.CLUSTERING_PREFIX}{name}': arr for name, arr in clustering_arrays.items()}
1918
+ ds = ds.assign(prefixed_arrays)
1919
+ ds.attrs['clustering'] = json.dumps(clustering_ref)
1920
+
1921
+ return ds
1922
+
1923
+ @staticmethod
1924
+ def _add_variable_categories_to_dataset(
1925
+ ds: xr.Dataset,
1926
+ variable_categories: dict,
1927
+ ) -> xr.Dataset:
1928
+ """Add variable categories to dataset attributes."""
1929
+ if variable_categories:
1930
+ categories_dict = {name: cat.value for name, cat in variable_categories.items()}
1931
+ ds.attrs['variable_categories'] = json.dumps(categories_dict)
1932
+
1933
+ return ds
1934
+
1935
+ @staticmethod
1936
+ def _add_model_coords(ds: xr.Dataset, flow_system: FlowSystem) -> xr.Dataset:
1937
+ """Ensure model coordinates are present in dataset."""
1938
+ model_coords = {'time': flow_system.timesteps}
1939
+ if flow_system.periods is not None:
1940
+ model_coords['period'] = flow_system.periods
1941
+ if flow_system.scenarios is not None:
1942
+ model_coords['scenario'] = flow_system.scenarios
1943
+ if flow_system.clusters is not None:
1944
+ model_coords['cluster'] = flow_system.clusters
1945
+
1946
+ return ds.assign_coords(model_coords)
1947
+
1948
+
1949
+ # =============================================================================
1950
+ # Public API Functions (delegate to FlowSystemDatasetIO class)
1951
+ # =============================================================================
1952
+
1953
+
1954
+ def restore_flow_system_from_dataset(ds: xr.Dataset) -> FlowSystem:
1955
+ """Create FlowSystem from dataset.
1956
+
1957
+ This is the main entry point for dataset restoration.
1958
+ Called by FlowSystem.from_dataset().
1959
+
1960
+ Args:
1961
+ ds: Dataset containing the FlowSystem data
1962
+
1963
+ Returns:
1964
+ FlowSystem instance with all components, buses, effects, and solution restored
1965
+
1966
+ See Also:
1967
+ FlowSystemDatasetIO: Class containing the implementation
1968
+ """
1969
+ return FlowSystemDatasetIO.from_dataset(ds)
1970
+
1971
+
1972
+ def flow_system_to_dataset(
1973
+ flow_system: FlowSystem,
1974
+ base_dataset: xr.Dataset,
1975
+ include_solution: bool = True,
1976
+ include_original_data: bool = True,
1977
+ ) -> xr.Dataset:
1978
+ """Convert FlowSystem-specific data to dataset.
1979
+
1980
+ This function adds FlowSystem-specific data (solution, clustering, metadata)
1981
+ to a base dataset created by the parent class's to_dataset() method.
1982
+
1983
+ Args:
1984
+ flow_system: The FlowSystem to serialize
1985
+ base_dataset: Dataset from parent class with basic structure
1986
+ include_solution: Whether to include optimization solution
1987
+ include_original_data: Whether to include clustering.original_data
1988
+
1989
+ Returns:
1990
+ Complete dataset with all FlowSystem data
1991
+
1992
+ See Also:
1993
+ FlowSystemDatasetIO: Class containing the implementation
1994
+ """
1995
+ return FlowSystemDatasetIO.to_dataset(flow_system, base_dataset, include_solution, include_original_data)