power-grid-model 1.11.48__py3-none-win_amd64.whl → 1.12.72__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of power-grid-model might be problematic. Click here for more details.

Files changed (60) hide show
  1. power_grid_model/__init__.py +54 -54
  2. power_grid_model/_core/__init__.py +3 -3
  3. power_grid_model/_core/buffer_handling.py +493 -487
  4. power_grid_model/_core/data_handling.py +195 -141
  5. power_grid_model/_core/data_types.py +143 -132
  6. power_grid_model/_core/dataset_definitions.py +109 -109
  7. power_grid_model/_core/enum.py +226 -226
  8. power_grid_model/_core/error_handling.py +206 -206
  9. power_grid_model/_core/errors.py +130 -130
  10. power_grid_model/_core/index_integer.py +17 -17
  11. power_grid_model/_core/options.py +71 -71
  12. power_grid_model/_core/power_grid_core.py +563 -563
  13. power_grid_model/_core/power_grid_dataset.py +535 -535
  14. power_grid_model/_core/power_grid_meta.py +257 -243
  15. power_grid_model/_core/power_grid_model.py +969 -686
  16. power_grid_model/_core/power_grid_model_c/__init__.py +3 -3
  17. power_grid_model/_core/power_grid_model_c/bin/power_grid_model_c.dll +0 -0
  18. power_grid_model/_core/power_grid_model_c/get_pgm_dll_path.py +63 -63
  19. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/basics.h +255 -255
  20. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/buffer.h +108 -108
  21. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset.h +316 -316
  22. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset_definitions.h +1052 -1052
  23. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/handle.h +99 -99
  24. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/meta_data.h +189 -189
  25. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/model.h +125 -125
  26. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/options.h +142 -142
  27. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/serialization.h +118 -118
  28. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c.h +36 -36
  29. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/basics.hpp +65 -65
  30. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/buffer.hpp +61 -61
  31. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/dataset.hpp +220 -220
  32. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/handle.hpp +108 -108
  33. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/meta_data.hpp +84 -84
  34. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/model.hpp +63 -63
  35. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/options.hpp +52 -52
  36. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/serialization.hpp +124 -124
  37. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/utils.hpp +81 -81
  38. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp.hpp +19 -19
  39. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfigVersion.cmake +3 -3
  40. power_grid_model/_core/power_grid_model_c/lib/power_grid_model_c.lib +0 -0
  41. power_grid_model/_core/serialization.py +317 -317
  42. power_grid_model/_core/typing.py +20 -20
  43. power_grid_model/_core/utils.py +798 -793
  44. power_grid_model/data_types.py +321 -321
  45. power_grid_model/enum.py +27 -27
  46. power_grid_model/errors.py +37 -37
  47. power_grid_model/typing.py +43 -43
  48. power_grid_model/utils.py +473 -469
  49. power_grid_model/validation/__init__.py +25 -25
  50. power_grid_model/validation/_rules.py +1171 -1176
  51. power_grid_model/validation/_validation.py +1172 -1172
  52. power_grid_model/validation/assertions.py +93 -93
  53. power_grid_model/validation/errors.py +602 -602
  54. power_grid_model/validation/utils.py +313 -314
  55. {power_grid_model-1.11.48.dist-info → power_grid_model-1.12.72.dist-info}/METADATA +4 -2
  56. power_grid_model-1.12.72.dist-info/RECORD +65 -0
  57. {power_grid_model-1.11.48.dist-info → power_grid_model-1.12.72.dist-info}/WHEEL +1 -1
  58. power_grid_model-1.11.48.dist-info/RECORD +0 -65
  59. {power_grid_model-1.11.48.dist-info → power_grid_model-1.12.72.dist-info}/entry_points.txt +0 -0
  60. {power_grid_model-1.11.48.dist-info → power_grid_model-1.12.72.dist-info}/licenses/LICENSE +0 -0
@@ -1,793 +1,798 @@
1
- # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """
6
- This file contains helper functions for library-internal use only.
7
-
8
- Disclaimer!
9
-
10
- We do not officially support this functionality and may remove features in this library at any given time!
11
- """
12
-
13
- from collections.abc import Sequence
14
- from copy import deepcopy
15
- from typing import cast
16
-
17
- import numpy as np
18
-
19
- from power_grid_model._core.data_types import (
20
- BatchColumn,
21
- BatchComponentData,
22
- BatchDataset,
23
- BatchList,
24
- ComponentData,
25
- Dataset,
26
- DenseBatchArray,
27
- DenseBatchColumnarData,
28
- DenseBatchData,
29
- IndexPointer,
30
- PythonDataset,
31
- SingleArray,
32
- SingleColumn,
33
- SingleColumnarData,
34
- SingleComponentData,
35
- SingleDataset,
36
- SinglePythonDataset,
37
- SparseBatchData,
38
- )
39
- from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
40
- from power_grid_model._core.enum import ComponentAttributeFilterOptions
41
- from power_grid_model._core.error_handling import VALIDATOR_MSG
42
- from power_grid_model._core.errors import PowerGridError
43
- from power_grid_model._core.power_grid_meta import initialize_array, power_grid_meta_data
44
- from power_grid_model._core.typing import ComponentAttributeMapping, _ComponentAttributeMappingDict
45
-
46
- SINGLE_DATASET_NDIM = 1
47
- BATCH_DATASET_NDIM = 2
48
-
49
-
50
- def is_nan(data) -> bool:
51
- """
52
- Determine if the data point is valid
53
- Args:
54
- data: a single scaler or numpy array
55
-
56
- Returns:
57
- True if all the data points are invalid
58
- False otherwise
59
- """
60
- nan_func = {
61
- np.dtype("f8"): lambda x: np.all(np.isnan(x)),
62
- np.dtype("i4"): lambda x: np.all(x == np.iinfo("i4").min),
63
- np.dtype("i1"): lambda x: np.all(x == np.iinfo("i1").min),
64
- }
65
- return bool(nan_func[data.dtype](data))
66
-
67
-
68
- def convert_batch_dataset_to_batch_list(batch_data: BatchDataset, dataset_type: DatasetType | None = None) -> BatchList:
69
- """
70
- Convert batch datasets to a list of individual batches
71
-
72
- Args:
73
- batch_data: a batch dataset for power-grid-model
74
- dataset_type: type of dataset
75
-
76
- Returns:
77
- A list of individual batches
78
- """
79
-
80
- # If the batch data is empty, return an empty list
81
- if len(batch_data) == 0:
82
- return []
83
-
84
- n_batches = get_and_verify_batch_sizes(batch_data=batch_data, dataset_type=dataset_type)
85
-
86
- # Initialize an empty list with dictionaries
87
- # Note that [{}] * n_batches would result in n copies of the same dict.
88
- list_data: BatchList = [{} for _ in range(n_batches)]
89
-
90
- # While the number of batches must be the same for each component, the structure (2d numpy array or indptr/data)
91
- # doesn't have to be. Therefore, we'll check the structure for each component and copy the data accordingly.
92
- for component, data in batch_data.items():
93
- component_data_checks(data, component)
94
- component_batches: Sequence[SingleComponentData]
95
- if is_sparse(data):
96
- component_batches = split_sparse_batch_data_in_batches(cast(SparseBatchData, data), component)
97
- else:
98
- component_batches = split_dense_batch_data_in_batches(cast(SingleComponentData, data), batch_size=n_batches)
99
- for i, batch in enumerate(component_batches):
100
- if (isinstance(batch, dict) and batch) or (isinstance(batch, np.ndarray) and batch.size > 0):
101
- list_data[i][component] = batch
102
- return list_data
103
-
104
-
105
- def get_and_verify_batch_sizes(batch_data: BatchDataset, dataset_type: DatasetType | None = None) -> int:
106
- """
107
- Determine the number of batches for each component and verify that each component has the same number of batches
108
-
109
- Args:
110
- batch_data: a batch dataset for power-grid-model
111
- dataset_type: type of dataset
112
-
113
- Returns:
114
- The number of batches
115
- """
116
-
117
- if dataset_type is None and any(is_columnar(v) and not is_sparse(v) for v in batch_data.values()):
118
- dataset_type = get_dataset_type(batch_data)
119
-
120
- n_batch_size = 0
121
- checked_components: list[ComponentType] = []
122
- for component, data in batch_data.items():
123
- n_component_batch_size = get_batch_size(data, dataset_type, component)
124
- if checked_components and n_component_batch_size != n_batch_size:
125
- if len(checked_components) == 1:
126
- checked_components_str = f"'{checked_components.pop()}'"
127
- else:
128
- str_checked_components = [str(component) for component in checked_components]
129
- checked_components_str = "/".join(sorted(str_checked_components))
130
- raise ValueError(
131
- f"Inconsistent number of batches in batch data. "
132
- f"Component '{component}' contains {n_component_batch_size} batches, "
133
- f"while {checked_components_str} contained {n_batch_size} batches."
134
- )
135
- n_batch_size = n_component_batch_size
136
- checked_components.append(component)
137
- return n_batch_size
138
-
139
-
140
- def get_batch_size(
141
- batch_data: BatchComponentData, dataset_type: DatasetType | None = None, component: ComponentType | None = None
142
- ) -> int:
143
- """
144
- Determine the number of batches and verify the data structure while we're at it. Note only batch data is supported.
145
- Note: SingleColumnarData would get treated as batch by this function.
146
-
147
- Args:
148
- batch_data: a batch array for power-grid-model
149
- dataset_type: type of dataset
150
- component: name of component
151
-
152
- Raises:
153
- ValueError: when the type for data_filter is incorrect
154
-
155
- Returns:
156
- The number of batches
157
- """
158
- component_data_checks(batch_data)
159
- if is_sparse(batch_data):
160
- indptr = batch_data["indptr"]
161
- return indptr.size - 1
162
-
163
- if not is_columnar(batch_data):
164
- sym_array = batch_data
165
- else:
166
- batch_data = cast(DenseBatchColumnarData, batch_data)
167
- if component is None or dataset_type is None:
168
- raise ValueError("Cannot deduce batch size for given columnar data without a dataset type or component")
169
- sym_attributes, _ = _get_sym_or_asym_attributes(dataset_type, component)
170
- for attribute, array in batch_data.items():
171
- if attribute in sym_attributes:
172
- break
173
- if array.ndim == SINGLE_DATASET_NDIM:
174
- raise TypeError("Incorrect dimension present in batch data.")
175
- if array.ndim == BATCH_DATASET_NDIM:
176
- return 1
177
- return array.shape[0]
178
- sym_array = next(iter(batch_data.values()))
179
-
180
- sym_array = cast(DenseBatchArray | BatchColumn, sym_array)
181
- if sym_array.ndim not in (SINGLE_DATASET_NDIM, BATCH_DATASET_NDIM):
182
- raise TypeError("Incorrect dimension present in batch data.")
183
- if sym_array.ndim == SINGLE_DATASET_NDIM:
184
- return 1
185
- return sym_array.shape[0]
186
-
187
-
188
- def _get_sym_or_asym_attributes(dataset_type: DatasetType, component: ComponentType):
189
- """Segregate into symmetric of asymmetric attribute.
190
- The asymmetric attribute is per phase value and of extra dimension.
191
-
192
- Args:
193
- dataset_type (DatasetType): dataset type
194
- component (ComponentType): component name
195
-
196
- Returns:
197
- symmetrical and asymmetrical attributes
198
- """
199
- asym_attributes = set()
200
- sym_attributes = set()
201
- for meta_dataset_type, dataset_meta in power_grid_meta_data.items():
202
- if dataset_type != meta_dataset_type:
203
- continue
204
- for component_name_meta, component_meta in dataset_meta.items():
205
- if component != component_name_meta:
206
- continue
207
- if component_meta.dtype.names is None:
208
- raise ValueError("No attributes available in meta")
209
- for attribute in component_meta.dtype.names:
210
- if component_meta.dtype[attribute].shape == (3,):
211
- asym_attributes.add(attribute)
212
- if component_meta.dtype[attribute].shape == ():
213
- sym_attributes.add(attribute)
214
- return sym_attributes, asym_attributes
215
-
216
-
217
- def _split_numpy_array_in_batches(
218
- data: DenseBatchArray | SingleArray | SingleColumn | BatchColumn,
219
- ) -> list[SingleArray] | list[SingleColumn]:
220
- """
221
- Split a single dense numpy array into one or more batches
222
-
223
- Args:
224
- data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table.
225
-
226
- Returns:
227
- A list with a single numpy structured array per batch
228
- """
229
- if data.ndim == SINGLE_DATASET_NDIM:
230
- return [data]
231
- if data.ndim in [2, 3]:
232
- return [data[i, ...] for i in range(data.shape[0])]
233
- raise ValueError("Dimension of the component data is invalid.")
234
-
235
-
236
- def split_dense_batch_data_in_batches(
237
- data: SingleComponentData | DenseBatchData, batch_size: int
238
- ) -> list[SingleComponentData]:
239
- """
240
- Split a single dense numpy array into one or more batches
241
-
242
- Args:
243
- data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table.
244
- batch_size: size of batch
245
-
246
- Returns:
247
- A list with a single component data per scenario
248
- """
249
- if isinstance(data, np.ndarray):
250
- return cast(list[SingleComponentData], _split_numpy_array_in_batches(data))
251
-
252
- scenarios_per_attribute = {
253
- attribute: _split_numpy_array_in_batches(attribute_data) for attribute, attribute_data in data.items()
254
- }
255
-
256
- return [
257
- {attribute: scenarios_per_attribute[attribute][scenario] for attribute, attribute_data in data.items()}
258
- for scenario in range(batch_size)
259
- ]
260
-
261
-
262
- def split_sparse_batch_data_in_batches(
263
- batch_data: SparseBatchData, component: ComponentType
264
- ) -> list[SingleComponentData]:
265
- """
266
- Split a single numpy array representing, a compressed sparse structure, into one or more batches
267
-
268
- Args:
269
- batch_data: Sparse batch data
270
- component: The name of the component to which the data belongs, only used for errors.
271
-
272
- Returns:
273
- A list with a single numpy structured array per batch
274
- """
275
- for key in ["indptr", "data"]:
276
- if key not in batch_data:
277
- raise KeyError(
278
- f"Missing '{key}' in sparse batch data for '{component}' "
279
- "(expected a python dictionary containing two keys: 'indptr' and 'data')."
280
- )
281
-
282
- data = batch_data["data"]
283
- indptr = batch_data["indptr"]
284
-
285
- def _split_buffer(buffer: np.ndarray, scenario: int) -> SingleArray:
286
- if not isinstance(buffer, np.ndarray) or buffer.ndim != 1:
287
- raise TypeError(
288
- f"Invalid data type {type(buffer).__name__} in sparse batch data for '{component}' "
289
- "(should be a 1D Numpy structured array (i.e. a single 'table'))."
290
- )
291
-
292
- if not isinstance(indptr, np.ndarray) or indptr.ndim != 1 or not np.issubdtype(indptr.dtype, np.integer):
293
- raise TypeError(
294
- f"Invalid indptr data type {type(indptr).__name__} in batch data for '{component}' "
295
- "(should be a 1D Numpy array (i.e. a single 'list'), "
296
- "containing indices (i.e. integers))."
297
- )
298
-
299
- if indptr[0] != 0 or indptr[-1] != len(buffer) or indptr[scenario] > indptr[scenario + 1]:
300
- raise TypeError(
301
- f"Invalid indptr in batch data for '{component}' "
302
- f"(should start with 0, end with the number of objects ({len(buffer)}) "
303
- "and be monotonic increasing)."
304
- )
305
-
306
- return buffer[indptr[scenario] : indptr[scenario + 1]]
307
-
308
- def _get_scenario(scenario: int) -> SingleComponentData:
309
- if isinstance(data, dict):
310
- return {attribute: _split_buffer(attribute_data, scenario) for attribute, attribute_data in data.items()}
311
- return _split_buffer(data, scenario)
312
-
313
- return [_get_scenario(i) for i in range(len(indptr) - 1)]
314
-
315
-
316
- def convert_dataset_to_python_dataset(data: Dataset) -> PythonDataset:
317
- """
318
- Convert internal numpy arrays to native python data
319
- If an attribute is not available (NaN value), it will not be exported.
320
-
321
- Args:
322
- data: A single or batch dataset for power-grid-model
323
- Returns:
324
- A python dict for single dataset
325
- A python list for batch dataset
326
- """
327
-
328
- # Check if the dataset is a single dataset or batch dataset
329
- # It is batch dataset if it is 2D array or a indptr/data structure
330
- is_batch: bool | None = None
331
- for component, array in data.items():
332
- is_dense_batch = isinstance(array, np.ndarray) and array.ndim == BATCH_DATASET_NDIM
333
- is_sparse_batch = isinstance(array, dict) and "indptr" in array and "data" in array
334
- if is_batch is not None and is_batch != (is_dense_batch or is_sparse_batch):
335
- raise ValueError(
336
- f"Mixed {'' if is_batch else 'non-'}batch data "
337
- f"with {'non-' if is_batch else ''}batch data ({component})."
338
- )
339
- is_batch = is_dense_batch or is_sparse_batch
340
-
341
- # If it is a batch, convert the batch data to a list of batches, then convert each batch individually.
342
- if is_batch:
343
- # We have established that this is batch data, so let's tell the type checker that this is a BatchDataset
344
- data = cast(BatchDataset, data)
345
- list_data = convert_batch_dataset_to_batch_list(data)
346
- return [convert_single_dataset_to_python_single_dataset(data=x) for x in list_data]
347
-
348
- # We have established that this is not batch data, so let's tell the type checker that this is a SingleDataset
349
- data = cast(SingleDataset, data)
350
- return convert_single_dataset_to_python_single_dataset(data=data)
351
-
352
-
353
- def convert_single_dataset_to_python_single_dataset(
354
- data: SingleDataset,
355
- ) -> SinglePythonDataset:
356
- """
357
- Convert internal numpy arrays to native python data
358
- If an attribute is not available (NaN value), it will not be exported.
359
-
360
- Args:
361
- data: A single dataset for power-grid-model
362
-
363
- Returns:
364
- A python dict for single dataset
365
- """
366
-
367
- # Convert each numpy array to a list of objects, which contains only the non-NaN attributes:
368
- # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]}
369
- def _convert_component(objects: SingleComponentData):
370
- # This should be a single data set
371
- if not isinstance(objects, np.ndarray) or objects.ndim != 1 or objects.dtype.names is None:
372
- raise ValueError("Invalid data format")
373
-
374
- return [
375
- {attribute: obj[attribute].tolist() for attribute in objects.dtype.names if not is_nan(obj[attribute])}
376
- for obj in objects
377
- ]
378
-
379
- return {component: _convert_component(objects) for component, objects in data.items()}
380
-
381
-
382
- def compatibility_convert_row_columnar_dataset(
383
- data: Dataset,
384
- data_filter: ComponentAttributeMapping,
385
- dataset_type: DatasetType,
386
- available_components: list[ComponentType] | None = None,
387
- ) -> Dataset:
388
- """Temporary function to transform row, column or mixed based datasets to a full row or column based dataset as per
389
- the data_filter. The purpose of this function is to mimic columnar data and transform back to row data without any
390
- memory footprint benefits.
391
- Note: Copies are made on a per-component basis; if a component is row based in both the input and the requested
392
- output, that componened is returned without a copy.
393
-
394
- Args:
395
- data (Dataset): dataset to convert
396
- data_filter (ComponentAttributeMapping): desired component and attribute mapping
397
- dataset_type (DatasetType): type of dataset (e.g., input, update or [sym | asym | sc]_output)
398
- available_components (list[ComponentType] | None): available components in model
399
-
400
- Returns:
401
- Dataset: converted dataset
402
- """
403
- if available_components is None:
404
- available_components = list(data.keys())
405
-
406
- processed_data_filter = process_data_filter(dataset_type, data_filter, available_components)
407
-
408
- result_data: Dataset = {}
409
- for comp_name, attrs in processed_data_filter.items():
410
- if comp_name not in data:
411
- continue
412
-
413
- sub_data = _extract_data_from_component_data(data[comp_name])
414
- converted_sub_data = _convert_data_to_row_or_columnar(
415
- data=sub_data,
416
- comp_name=comp_name,
417
- dataset_type=dataset_type,
418
- attrs=attrs,
419
- )
420
-
421
- if is_sparse(data[comp_name]):
422
- result_data[comp_name] = {"indptr": _extract_indptr(data[comp_name]), "data": converted_sub_data}
423
- else:
424
- result_data[comp_name] = converted_sub_data
425
- return result_data
426
-
427
-
428
- def _convert_data_to_row_or_columnar(
429
- data: SingleComponentData,
430
- comp_name: ComponentType,
431
- dataset_type: DatasetType,
432
- attrs: set[str] | list[str] | None | ComponentAttributeFilterOptions,
433
- ) -> SingleComponentData:
434
- """Converts row or columnar component data to row or columnar component data as requested in `attrs`."""
435
- if attrs is None:
436
- if not is_columnar(data):
437
- return data
438
- data = cast(SingleColumnarData, data)
439
- output_array = initialize_array(dataset_type, comp_name, next(iter(data.values())).shape)
440
- for k in data:
441
- output_array[k] = data[k]
442
- return output_array
443
-
444
- if isinstance(attrs, (list, set)) and len(attrs) == 0:
445
- return {}
446
- if isinstance(attrs, ComponentAttributeFilterOptions):
447
- names = cast(SingleArray, data).dtype.names if not is_columnar(data) else cast(SingleColumnarData, data).keys()
448
- if names is None:
449
- raise ValueError("No attributes available in meta")
450
- return {attr: deepcopy(data[attr]) for attr in names}
451
- return {attr: deepcopy(data[attr]) for attr in attrs}
452
-
453
-
454
- def process_data_filter(
455
- dataset_type: DatasetType,
456
- data_filter: ComponentAttributeMapping,
457
- available_components: list[ComponentType],
458
- ) -> _ComponentAttributeMappingDict:
459
- """Checks valid type for data_filter. Also checks for any invalid component names and attribute names.
460
-
461
- Args:
462
- dataset_type (DatasetType): the type of output that the user will see (as per the calculation options)
463
- data_filter (ComponentAttributeMapping): data_filter provided by user
464
- available_components (list[ComponentType]): all components available in model instance or data
465
-
466
- Returns:
467
- _ComponentAttributeMappingDict: processed data_filter in a dictionary
468
- """
469
- if data_filter is None:
470
- processed_data_filter: _ComponentAttributeMappingDict = {ComponentType[k]: None for k in available_components}
471
- elif isinstance(data_filter, ComponentAttributeFilterOptions):
472
- processed_data_filter = {ComponentType[k]: data_filter for k in available_components}
473
- elif isinstance(data_filter, (list, set)):
474
- processed_data_filter = {ComponentType[k]: None for k in data_filter}
475
- elif isinstance(data_filter, dict) and all(
476
- attrs is None or isinstance(attrs, (set, list, ComponentAttributeFilterOptions))
477
- for attrs in data_filter.values()
478
- ):
479
- processed_data_filter = data_filter
480
- else:
481
- raise ValueError(f"Invalid filter provided: {data_filter}")
482
-
483
- validate_data_filter(processed_data_filter, dataset_type, available_components)
484
- return processed_data_filter
485
-
486
-
487
- def validate_data_filter(
488
- data_filter: _ComponentAttributeMappingDict,
489
- dataset_type: DatasetType,
490
- available_components: list[ComponentType],
491
- ) -> None:
492
- """Raise error if some specified components or attributes are unknown.
493
-
494
- Args:
495
- data_filter (_ComponentAttributeMappingDict): Processed component to attribtue dictionary
496
- dataset_type (DatasetType): Type of dataset
497
- available_components (list[ComponentType]): all components available in model instance or data
498
-
499
- Raises:
500
- ValueError: when the type for data_filter is incorrect
501
- KeyError: with "unknown component types" for any unknown components
502
- KeyError: with "unknown attributes" for unknown attribute(s) for a known component
503
- """
504
- dataset_meta = power_grid_meta_data[dataset_type]
505
-
506
- for source, components in {
507
- "data_filter": data_filter.keys(),
508
- "data": available_components,
509
- }.items():
510
- unknown_components = [x for x in components if x not in dataset_meta]
511
- if unknown_components:
512
- raise KeyError(f"The following specified component types are unknown:{unknown_components} in {source}")
513
-
514
- unknown_attributes = {}
515
- for comp_name, attrs in data_filter.items():
516
- if attrs is None or isinstance(attrs, ComponentAttributeFilterOptions):
517
- continue
518
-
519
- attr_names = dataset_meta[comp_name].dtype.names
520
- diff = set(cast(set[str] | list[str], attrs))
521
- if attr_names is not None:
522
- diff = diff.difference(attr_names)
523
- if diff != set():
524
- unknown_attributes[comp_name] = diff
525
-
526
- if unknown_attributes:
527
- raise KeyError(f"The following specified attributes are unknown: {unknown_attributes} in data_filter")
528
-
529
-
530
- def is_sparse(component_data: ComponentData) -> bool:
531
- """Check if component_data is sparse or dense. Only batch data can be sparse."""
532
- return isinstance(component_data, dict) and set(component_data.keys()) == {
533
- "indptr",
534
- "data",
535
- }
536
-
537
-
538
- def is_columnar(component_data: ComponentData) -> bool:
539
- """Check if component_data is columnar or row based"""
540
- if is_sparse(component_data):
541
- return not isinstance(component_data["data"], np.ndarray)
542
- return not isinstance(component_data, np.ndarray)
543
-
544
-
545
- def is_nan_or_default(x: np.ndarray) -> np.ndarray:
546
- """
547
- Check if elements in the array are NaN or equal to the min of its dtype.
548
-
549
- Args:
550
- x: A NumPy array to check.
551
-
552
- Returns:
553
- A boolean NumPy array where each element is True if the corresponding element in x is NaN
554
- or min of its dtype, and False otherwise.
555
- """
556
- if x.dtype == np.float64:
557
- return np.isnan(x)
558
- if x.dtype in (np.int32, np.int8):
559
- return x == np.iinfo(x.dtype).min
560
- raise TypeError(f"Unsupported data type: {x.dtype}")
561
-
562
-
563
- def is_nan_or_equivalent(array) -> bool:
564
- """
565
- Check if the array contains only nan values or equivalent nan values for specific data types.
566
- This is the aggregrated version of `is_nan_or_default` for the whole array.
567
-
568
- Args:
569
- array: The array to check.
570
-
571
- Returns:
572
- bool: True if the array contains only nan or equivalent nan values, False otherwise.
573
- """
574
- return isinstance(array, np.ndarray) and bool(
575
- (array.dtype == np.float64 and np.isnan(array).all())
576
- or (array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min))
577
- )
578
-
579
-
580
- def _check_sparse_dense(component_data: ComponentData, err_msg_suffixed: str) -> ComponentData:
581
- if is_sparse(component_data):
582
- indptr = component_data["indptr"]
583
- if not isinstance(indptr, np.ndarray):
584
- raise TypeError(err_msg_suffixed.format(f"Invalid indptr type {type(indptr).__name__}. "))
585
- sub_data = component_data["data"]
586
- elif isinstance(component_data, dict) and ("indptr" in component_data or "data" in component_data):
587
- missing_element = "indptr" if "indptr" not in component_data else "data"
588
- raise KeyError(err_msg_suffixed.format(f"Missing '{missing_element}' in sparse batch data. "))
589
- else:
590
- sub_data = component_data
591
- return sub_data
592
-
593
-
594
- def _check_columnar_row(sub_data: ComponentData, err_msg_suffixed: str) -> None:
595
- if is_columnar(sub_data):
596
- if not isinstance(sub_data, dict):
597
- raise TypeError(err_msg_suffixed.format(""))
598
- for attribute, attribute_array in sub_data.items():
599
- if not isinstance(attribute_array, np.ndarray):
600
- raise TypeError(err_msg_suffixed.format(f"'{attribute}' attribute. "))
601
- if attribute_array.ndim not in [1, 2, 3]:
602
- raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {attribute_array.ndim}"))
603
- elif not isinstance(sub_data, np.ndarray):
604
- raise TypeError(err_msg_suffixed.format(f"Invalid data type {type(sub_data).__name__} "))
605
- elif isinstance(sub_data, np.ndarray) and sub_data.ndim not in [1, 2]:
606
- raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {sub_data.ndim}. "))
607
-
608
-
609
- def component_data_checks(component_data: ComponentData, component=None) -> None:
610
- """Checks if component_data is of ComponentData and raises ValueError if its not"""
611
- component_name = f"'{component}'" if component is not None else ""
612
- err_msg = f"Invalid data for {component_name} component. {{0}}"
613
- err_msg_suffixed = err_msg + "Expecting a 1D/2D Numpy structured array or a dictionary of such."
614
-
615
- sub_data = _check_sparse_dense(component_data, err_msg_suffixed)
616
- _check_columnar_row(sub_data, err_msg_suffixed)
617
-
618
-
619
- def _extract_indptr(data: ComponentData) -> IndexPointer: # pragma: no cover
620
- """returns indptr and checks if its valid
621
-
622
- Args:
623
- data (ComponentData): The component data
624
-
625
- Raises:
626
- TypeError: if indptr is invalid or is not available
627
-
628
- Returns:
629
- IndexPointer: indptr if present
630
- """
631
- if not is_sparse(data):
632
- raise TypeError("Not sparse data")
633
- indptr = data["indptr"]
634
- if not isinstance(indptr, np.ndarray):
635
- raise TypeError("indptr is not a 1D numpy array")
636
- if indptr.ndim != 1:
637
- raise TypeError("indptr is not a 1D numpy array")
638
- return indptr
639
-
640
-
641
- def _extract_columnar_data(
642
- data: ComponentData, is_batch: bool | None = None
643
- ) -> SingleColumnarData | DenseBatchColumnarData: # pragma: no cover
644
- """returns the contents of the columnar data.
645
-
646
- Args:
647
- data (ComponentData): component data
648
- is_batch (bool | None, optional): If given data is batch. Skips batch check if provided None.
649
-
650
- Raises:
651
- TypeError: if data is not columnar or invalid data
652
-
653
- Returns:
654
- SingleColumnarData | DenseBatchColumnarData: the contents of columnar data
655
- """
656
- not_columnar_data_message = "Expected columnar data"
657
-
658
- if is_batch is None:
659
- allowed_dims = [1, 2, 3]
660
- elif is_batch:
661
- allowed_dims = [2, 3]
662
- else:
663
- allowed_dims = [1, 2]
664
-
665
- sub_data = data["data"] if is_sparse(data) else data
666
-
667
- if not isinstance(sub_data, dict):
668
- raise TypeError(not_columnar_data_message)
669
- for attribute, attribute_array in sub_data.items():
670
- if not isinstance(attribute_array, np.ndarray) or not isinstance(attribute, str):
671
- raise TypeError(not_columnar_data_message)
672
- if attribute_array.ndim not in allowed_dims:
673
- raise TypeError(not_columnar_data_message)
674
- return cast(SingleColumnarData | DenseBatchColumnarData, sub_data)
675
-
676
-
677
- def _extract_row_based_data(
678
- data: ComponentData, is_batch: bool | None = None
679
- ) -> SingleArray | DenseBatchArray: # pragma: no cover
680
- """returns the contents of the row based data
681
-
682
- Args:
683
- data (ComponentData): component data
684
- is_batch (bool | None, optional): If given data is batch. Skips batch check if provided None.
685
-
686
- Raises:
687
- TypeError: if data is not row based or invalid data
688
-
689
- Returns:
690
- SingleArray | DenseBatchArray: the contents of row based data
691
- """
692
- if is_batch is None:
693
- allowed_dims = [1, 2]
694
- elif is_batch:
695
- allowed_dims = [2]
696
- else:
697
- allowed_dims = [1]
698
-
699
- sub_data = data["data"] if is_sparse(data) else data
700
-
701
- if not isinstance(sub_data, np.ndarray):
702
- raise TypeError("Expected row based data")
703
- if sub_data.ndim not in allowed_dims:
704
- raise TypeError("Expected row based data")
705
- return sub_data
706
-
707
-
708
- def _extract_data_from_component_data(data: ComponentData, is_batch: bool | None = None):
709
- return _extract_columnar_data(data, is_batch) if is_columnar(data) else _extract_row_based_data(data, is_batch)
710
-
711
-
712
- def _extract_contents_from_data(data: ComponentData):
713
- return data["data"] if is_sparse(data) else data
714
-
715
-
716
- def check_indptr_consistency(indptr: IndexPointer, batch_size: int | None, contents_size: int):
717
- """checks if an indptr is valid. Batch size check is optional.
718
-
719
- Args:
720
- indptr (IndexPointer): The indptr array
721
- batch_size (int | None): number of scenarios
722
- contents_size (int): total number of elements in all scenarios
723
-
724
- Raises:
725
- ValueError: If indptr is invalid
726
- """
727
- if indptr[0] != 0 or indptr[-1] != contents_size:
728
- raise ValueError(f"indptr should start from zero and end at size of data array. {VALIDATOR_MSG}")
729
- if np.any(np.diff(indptr) < 0):
730
- raise ValueError(f"indptr should be increasing. {VALIDATOR_MSG}")
731
- if batch_size is not None and batch_size != indptr.size - 1:
732
- raise ValueError(f"Provided batch size must be equal to actual batch size. {VALIDATOR_MSG}")
733
-
734
-
735
- def get_dataset_type(data: Dataset) -> DatasetType:
736
- """
737
- Deduce the dataset type from the provided dataset.
738
-
739
- Args:
740
- data: the dataset
741
-
742
- Raises:
743
- ValueError
744
- if the dataset type cannot be deduced because multiple dataset types match the format
745
- (probably because the data contained no supported components, e.g. was empty)
746
- PowerGridError
747
- if no dataset type matches the format of the data
748
- (probably because the data contained conflicting data formats)
749
-
750
- Returns:
751
- The dataset type.
752
- """
753
- candidates = set(power_grid_meta_data.keys())
754
-
755
- if all(is_columnar(v) for v in data.values()):
756
- raise ValueError("The dataset type could not be deduced. At least one component should have row based data.")
757
-
758
- for dataset_type, dataset_metadatas in power_grid_meta_data.items():
759
- for component, dataset_metadata in dataset_metadatas.items():
760
- if component not in data or is_columnar(data[component]):
761
- continue
762
- component_data = data[component]
763
-
764
- component_dtype = component_data["data"].dtype if is_sparse(component_data) else component_data.dtype
765
- if component_dtype is not dataset_metadata.dtype:
766
- candidates.discard(dataset_type)
767
- break
768
-
769
- if not candidates:
770
- raise PowerGridError(
771
- "The dataset type could not be deduced because no type matches the data. "
772
- "This usually means inconsistent data was provided."
773
- )
774
- if len(candidates) > 1:
775
- raise ValueError("The dataset type could not be deduced because multiple dataset types match the data.")
776
-
777
- return next(iter(candidates))
778
-
779
-
780
- def get_comp_size(comp_data: SingleColumnarData | SingleArray) -> int:
781
- """
782
- Get the number of elements in the comp_data of a single dataset.
783
-
784
- Args:
785
- comp_data: Columnar or row based data of a single batch
786
-
787
- Returns:
788
- Number of elements in the component
789
- """
790
- if not is_columnar(comp_data):
791
- return len(comp_data)
792
- comp_data = cast(SingleColumnarData, comp_data)
793
- return len(next(iter(comp_data.values())))
1
+ # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ """
6
+ This file contains helper functions for library-internal use only.
7
+
8
+ Disclaimer!
9
+
10
+ We do not officially support this functionality and may remove features in this library at any given time!
11
+ """
12
+
13
+ from collections.abc import Sequence
14
+ from copy import deepcopy
15
+ from typing import cast
16
+
17
+ import numpy as np
18
+
19
+ from power_grid_model._core.data_types import (
20
+ BatchColumn,
21
+ BatchComponentData,
22
+ BatchDataset,
23
+ BatchList,
24
+ ComponentData,
25
+ Dataset,
26
+ DenseBatchArray,
27
+ DenseBatchColumnarData,
28
+ DenseBatchData,
29
+ IndexPointer,
30
+ PythonDataset,
31
+ SingleArray,
32
+ SingleColumn,
33
+ SingleColumnarData,
34
+ SingleComponentData,
35
+ SingleDataset,
36
+ SinglePythonDataset,
37
+ SparseBatchData,
38
+ )
39
+ from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
40
+ from power_grid_model._core.enum import ComponentAttributeFilterOptions
41
+ from power_grid_model._core.error_handling import VALIDATOR_MSG
42
+ from power_grid_model._core.errors import PowerGridError
43
+ from power_grid_model._core.power_grid_meta import initialize_array, power_grid_meta_data
44
+ from power_grid_model._core.typing import ComponentAttributeMapping, ComponentAttributeMappingDict
45
+
46
+ SINGLE_DATASET_NDIM = 1
47
+ BATCH_DATASET_NDIM = 2
48
+
49
+
50
+ def is_nan(data) -> bool:
51
+ """
52
+ Determine if the data point is valid
53
+ Args:
54
+ data: a single scaler or numpy array
55
+
56
+ Returns:
57
+ True if all the data points are invalid
58
+ False otherwise
59
+ """
60
+ nan_func = {
61
+ np.dtype("f8"): lambda x: np.all(np.isnan(x)),
62
+ np.dtype("i4"): lambda x: np.all(x == np.iinfo("i4").min),
63
+ np.dtype("i1"): lambda x: np.all(x == np.iinfo("i1").min),
64
+ }
65
+ return bool(nan_func[data.dtype](data))
66
+
67
+
68
+ def convert_batch_dataset_to_batch_list(batch_data: BatchDataset, dataset_type: DatasetType | None = None) -> BatchList:
69
+ """
70
+ Convert batch datasets to a list of individual batches
71
+
72
+ Args:
73
+ batch_data: a batch dataset for power-grid-model
74
+ dataset_type: type of dataset
75
+
76
+ Returns:
77
+ A list of individual batches
78
+ """
79
+
80
+ # If the batch data is empty, return an empty list
81
+ if len(batch_data) == 0:
82
+ return []
83
+
84
+ n_batches = get_and_verify_batch_sizes(batch_data=batch_data, dataset_type=dataset_type)
85
+
86
+ # Initialize an empty list with dictionaries
87
+ # Note that [{}] * n_batches would result in n copies of the same dict.
88
+ list_data: BatchList = [{} for _ in range(n_batches)]
89
+
90
+ # While the number of batches must be the same for each component, the structure (2d numpy array or indptr/data)
91
+ # doesn't have to be. Therefore, we'll check the structure for each component and copy the data accordingly.
92
+ for component, data in batch_data.items():
93
+ component_data_checks(data, component)
94
+ component_batches: Sequence[SingleComponentData]
95
+ if is_sparse(data):
96
+ component_batches = split_sparse_batch_data_in_batches(cast(SparseBatchData, data), component)
97
+ else:
98
+ component_batches = split_dense_batch_data_in_batches(cast(SingleComponentData, data), batch_size=n_batches)
99
+ for i, batch in enumerate(component_batches):
100
+ if (isinstance(batch, dict) and batch) or (isinstance(batch, np.ndarray) and batch.size > 0):
101
+ list_data[i][component] = batch
102
+ return list_data
103
+
104
+
105
+ def get_and_verify_batch_sizes(batch_data: BatchDataset, dataset_type: DatasetType | None = None) -> int:
106
+ """
107
+ Determine the number of batches for each component and verify that each component has the same number of batches
108
+
109
+ Args:
110
+ batch_data: a batch dataset for power-grid-model
111
+ dataset_type: type of dataset
112
+
113
+ Returns:
114
+ The number of batches
115
+ """
116
+
117
+ if dataset_type is None and any(is_columnar(v) and not is_sparse(v) for v in batch_data.values()):
118
+ dataset_type = get_dataset_type(batch_data)
119
+
120
+ n_batch_size = 0
121
+ checked_components: list[ComponentType] = []
122
+ for component, data in batch_data.items():
123
+ n_component_batch_size = get_batch_size(data, dataset_type, component)
124
+ if checked_components and n_component_batch_size != n_batch_size:
125
+ if len(checked_components) == 1:
126
+ checked_components_str = f"'{checked_components.pop()}'"
127
+ else:
128
+ str_checked_components = [str(component) for component in checked_components]
129
+ checked_components_str = "/".join(sorted(str_checked_components))
130
+ raise ValueError(
131
+ f"Inconsistent number of batches in batch data. "
132
+ f"Component '{component}' contains {n_component_batch_size} batches, "
133
+ f"while {checked_components_str} contained {n_batch_size} batches."
134
+ )
135
+ n_batch_size = n_component_batch_size
136
+ checked_components.append(component)
137
+ return n_batch_size
138
+
139
+
140
+ def get_batch_size(
141
+ batch_data: BatchComponentData, dataset_type: DatasetType | None = None, component: ComponentType | None = None
142
+ ) -> int:
143
+ """
144
+ Determine the number of batches and verify the data structure while we're at it. Note only batch data is supported.
145
+ Note: SingleColumnarData would get treated as batch by this function.
146
+
147
+ Args:
148
+ batch_data: a batch array for power-grid-model
149
+ dataset_type: type of dataset
150
+ component: name of component
151
+
152
+ Raises:
153
+ ValueError: when the type for data_filter is incorrect
154
+
155
+ Returns:
156
+ The number of batches
157
+ """
158
+ component_data_checks(batch_data)
159
+ if is_sparse(batch_data):
160
+ indptr = batch_data["indptr"]
161
+ return indptr.size - 1
162
+
163
+ if not is_columnar(batch_data):
164
+ sym_array = batch_data
165
+ else:
166
+ batch_data = cast(DenseBatchColumnarData, batch_data)
167
+ if component is None or dataset_type is None:
168
+ raise ValueError("Cannot deduce batch size for given columnar data without a dataset type or component")
169
+ sym_attributes, _ = _get_sym_or_asym_attributes(dataset_type, component)
170
+ for attribute, array in batch_data.items():
171
+ if attribute in sym_attributes:
172
+ break
173
+ if array.ndim == SINGLE_DATASET_NDIM:
174
+ raise TypeError("Incorrect dimension present in batch data.")
175
+ if array.ndim == BATCH_DATASET_NDIM:
176
+ return 1
177
+ return array.shape[0]
178
+ sym_array = next(iter(batch_data.values()))
179
+
180
+ sym_array = cast(DenseBatchArray | BatchColumn, sym_array)
181
+ if sym_array.ndim not in (SINGLE_DATASET_NDIM, BATCH_DATASET_NDIM):
182
+ raise TypeError("Incorrect dimension present in batch data.")
183
+ if sym_array.ndim == SINGLE_DATASET_NDIM:
184
+ return 1
185
+ return sym_array.shape[0]
186
+
187
+
188
+ def _get_sym_or_asym_attributes(dataset_type: DatasetType, component: ComponentType):
189
+ """Segregate into symmetric of asymmetric attribute.
190
+ The asymmetric attribute is per phase value and of extra dimension.
191
+
192
+ Args:
193
+ dataset_type (DatasetType): dataset type
194
+ component (ComponentType): component name
195
+
196
+ Returns:
197
+ symmetrical and asymmetrical attributes
198
+ """
199
+ asym_attributes = set()
200
+ sym_attributes = set()
201
+ for meta_dataset_type, dataset_meta in power_grid_meta_data.items():
202
+ if dataset_type != meta_dataset_type:
203
+ continue
204
+ for component_name_meta, component_meta in dataset_meta.items():
205
+ if component != component_name_meta:
206
+ continue
207
+ if component_meta.dtype.names is None:
208
+ raise ValueError("No attributes available in meta")
209
+ for attribute in component_meta.dtype.names:
210
+ if component_meta.dtype[attribute].shape == (3,):
211
+ asym_attributes.add(attribute)
212
+ if component_meta.dtype[attribute].shape == ():
213
+ sym_attributes.add(attribute)
214
+ return sym_attributes, asym_attributes
215
+
216
+
217
+ def _split_numpy_array_in_batches(
218
+ data: DenseBatchArray | SingleArray | SingleColumn | BatchColumn,
219
+ ) -> list[SingleArray] | list[SingleColumn]:
220
+ """
221
+ Split a single dense numpy array into one or more batches
222
+
223
+ Args:
224
+ data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table.
225
+
226
+ Returns:
227
+ A list with a single numpy structured array per batch
228
+ """
229
+ if data.ndim == SINGLE_DATASET_NDIM:
230
+ return [data]
231
+ if data.ndim in [2, 3]:
232
+ return [data[i, ...] for i in range(data.shape[0])]
233
+ raise ValueError("Dimension of the component data is invalid.")
234
+
235
+
236
+ def split_dense_batch_data_in_batches(
237
+ data: SingleComponentData | DenseBatchData, batch_size: int
238
+ ) -> list[SingleComponentData]:
239
+ """
240
+ Split a single dense numpy array into one or more batches
241
+
242
+ Args:
243
+ data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table.
244
+ batch_size: size of batch
245
+
246
+ Returns:
247
+ A list with a single component data per scenario
248
+ """
249
+ if isinstance(data, np.ndarray):
250
+ return cast(list[SingleComponentData], _split_numpy_array_in_batches(data))
251
+
252
+ scenarios_per_attribute = {
253
+ attribute: _split_numpy_array_in_batches(attribute_data) for attribute, attribute_data in data.items()
254
+ }
255
+
256
+ return [
257
+ {attribute: scenarios_per_attribute[attribute][scenario] for attribute, attribute_data in data.items()}
258
+ for scenario in range(batch_size)
259
+ ]
260
+
261
+
262
+ def split_sparse_batch_data_in_batches(
263
+ batch_data: SparseBatchData, component: ComponentType
264
+ ) -> list[SingleComponentData]:
265
+ """
266
+ Split a single numpy array representing, a compressed sparse structure, into one or more batches
267
+
268
+ Args:
269
+ batch_data: Sparse batch data
270
+ component: The name of the component to which the data belongs, only used for errors.
271
+
272
+ Returns:
273
+ A list with a single numpy structured array per batch
274
+ """
275
+ for key in ["indptr", "data"]:
276
+ if key not in batch_data:
277
+ raise KeyError(
278
+ f"Missing '{key}' in sparse batch data for '{component}' "
279
+ "(expected a python dictionary containing two keys: 'indptr' and 'data')."
280
+ )
281
+
282
+ data = batch_data["data"]
283
+ indptr = batch_data["indptr"]
284
+
285
+ def _split_buffer(buffer: np.ndarray, scenario: int) -> SingleArray:
286
+ if not isinstance(buffer, np.ndarray) or buffer.ndim != 1:
287
+ raise TypeError(
288
+ f"Invalid data type {type(buffer).__name__} in sparse batch data for '{component}' "
289
+ "(should be a 1D Numpy structured array (i.e. a single 'table'))."
290
+ )
291
+
292
+ if not isinstance(indptr, np.ndarray) or indptr.ndim != 1 or not np.issubdtype(indptr.dtype, np.integer):
293
+ raise TypeError(
294
+ f"Invalid indptr data type {type(indptr).__name__} in batch data for '{component}' "
295
+ "(should be a 1D Numpy array (i.e. a single 'list'), "
296
+ "containing indices (i.e. integers))."
297
+ )
298
+
299
+ if indptr[0] != 0 or indptr[-1] != len(buffer) or indptr[scenario] > indptr[scenario + 1]:
300
+ raise TypeError(
301
+ f"Invalid indptr in batch data for '{component}' "
302
+ f"(should start with 0, end with the number of objects ({len(buffer)}) "
303
+ "and be monotonic increasing)."
304
+ )
305
+
306
+ return buffer[indptr[scenario] : indptr[scenario + 1]]
307
+
308
+ def _get_scenario(scenario: int) -> SingleComponentData:
309
+ if isinstance(data, dict):
310
+ return {attribute: _split_buffer(attribute_data, scenario) for attribute, attribute_data in data.items()}
311
+ return _split_buffer(data, scenario)
312
+
313
+ return [_get_scenario(i) for i in range(len(indptr) - 1)]
314
+
315
+
316
+ def convert_dataset_to_python_dataset(data: Dataset) -> PythonDataset:
317
+ """
318
+ Convert internal numpy arrays to native python data
319
+ If an attribute is not available (NaN value), it will not be exported.
320
+
321
+ Args:
322
+ data: A single or batch dataset for power-grid-model
323
+ Returns:
324
+ A python dict for single dataset
325
+ A python list for batch dataset
326
+ """
327
+
328
+ # Check if the dataset is a single dataset or batch dataset
329
+ # It is batch dataset if it is 2D array or a indptr/data structure
330
+ is_batch: bool | None = None
331
+ for component, array in data.items():
332
+ is_dense_batch = isinstance(array, np.ndarray) and array.ndim == BATCH_DATASET_NDIM
333
+ is_sparse_batch = isinstance(array, dict) and "indptr" in array and "data" in array
334
+ if is_batch is not None and is_batch != (is_dense_batch or is_sparse_batch):
335
+ raise ValueError(
336
+ f"Mixed {'' if is_batch else 'non-'}batch data "
337
+ f"with {'non-' if is_batch else ''}batch data ({component})."
338
+ )
339
+ is_batch = is_dense_batch or is_sparse_batch
340
+
341
+ # If it is a batch, convert the batch data to a list of batches, then convert each batch individually.
342
+ if is_batch:
343
+ # We have established that this is batch data, so let's tell the type checker that this is a BatchDataset
344
+ data = cast(BatchDataset, data)
345
+ list_data = convert_batch_dataset_to_batch_list(data)
346
+ return [convert_single_dataset_to_python_single_dataset(data=x) for x in list_data]
347
+
348
+ # We have established that this is not batch data, so let's tell the type checker that this is a SingleDataset
349
+ data = cast(SingleDataset, data)
350
+ return convert_single_dataset_to_python_single_dataset(data=data)
351
+
352
+
353
+ def convert_single_dataset_to_python_single_dataset(
354
+ data: SingleDataset,
355
+ ) -> SinglePythonDataset:
356
+ """
357
+ Convert internal numpy arrays to native python data
358
+ If an attribute is not available (NaN value), it will not be exported.
359
+
360
+ Args:
361
+ data: A single dataset for power-grid-model
362
+
363
+ Returns:
364
+ A python dict for single dataset
365
+ """
366
+
367
+ # Convert each numpy array to a list of objects, which contains only the non-NaN attributes:
368
+ # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]}
369
+ def _convert_component(objects: SingleComponentData):
370
+ # This should be a single data set
371
+ if not isinstance(objects, np.ndarray) or objects.ndim != 1 or objects.dtype.names is None:
372
+ raise ValueError("Invalid data format")
373
+
374
+ return [
375
+ {attribute: obj[attribute].tolist() for attribute in objects.dtype.names if not is_nan(obj[attribute])}
376
+ for obj in objects
377
+ ]
378
+
379
+ return {component: _convert_component(objects) for component, objects in data.items()}
380
+
381
+
382
+ def compatibility_convert_row_columnar_dataset(
383
+ data: Dataset,
384
+ data_filter: ComponentAttributeMapping,
385
+ dataset_type: DatasetType,
386
+ available_components: list[ComponentType] | None = None,
387
+ ) -> Dataset:
388
+ """Temporary function to transform row, column or mixed based datasets to a full row or column based dataset as per
389
+ the data_filter. The purpose of this function is to mimic columnar data and transform back to row data without any
390
+ memory footprint benefits.
391
+ Note: Copies are made on a per-component basis; if a component is row based in both the input and the requested
392
+ output, that componened is returned without a copy.
393
+
394
+ Args:
395
+ data (Dataset): dataset to convert
396
+ data_filter (ComponentAttributeMapping): desired component and attribute mapping
397
+ dataset_type (DatasetType): type of dataset (e.g., input, update or [sym | asym | sc]_output)
398
+ available_components (list[ComponentType] | None): available components in model
399
+
400
+ Returns:
401
+ Dataset: converted dataset
402
+ """
403
+ if available_components is None:
404
+ available_components = list(data.keys())
405
+
406
+ processed_data_filter = process_data_filter(dataset_type, data_filter, available_components)
407
+
408
+ result_data: Dataset = {}
409
+ for comp_name, attrs in processed_data_filter.items():
410
+ if comp_name not in data:
411
+ continue
412
+
413
+ sub_data = _extract_data_from_component_data(data[comp_name])
414
+ converted_sub_data = _convert_data_to_row_or_columnar(
415
+ data=sub_data,
416
+ comp_name=comp_name,
417
+ dataset_type=dataset_type,
418
+ attrs=attrs,
419
+ )
420
+
421
+ if is_sparse(data[comp_name]):
422
+ result_data[comp_name] = {"indptr": _extract_indptr(data[comp_name]), "data": converted_sub_data}
423
+ else:
424
+ result_data[comp_name] = converted_sub_data
425
+ return result_data
426
+
427
+
428
+ def _convert_data_to_row_or_columnar(
429
+ data: SingleComponentData,
430
+ comp_name: ComponentType,
431
+ dataset_type: DatasetType,
432
+ attrs: set[str] | list[str] | None | ComponentAttributeFilterOptions,
433
+ ) -> SingleComponentData:
434
+ """Converts row or columnar component data to row or columnar component data as requested in `attrs`."""
435
+ if attrs is None:
436
+ if not is_columnar(data):
437
+ return data
438
+ data = cast(SingleColumnarData, data)
439
+ output_array = initialize_array(dataset_type, comp_name, next(iter(data.values())).shape)
440
+ for k in data:
441
+ output_array[k] = data[k]
442
+ return output_array
443
+
444
+ if isinstance(attrs, (list, set)) and len(attrs) == 0:
445
+ return {}
446
+ if isinstance(attrs, ComponentAttributeFilterOptions):
447
+ names = cast(SingleArray, data).dtype.names if not is_columnar(data) else cast(SingleColumnarData, data).keys()
448
+ if names is None:
449
+ raise ValueError("No attributes available in meta")
450
+ return {attr: deepcopy(data[attr]) for attr in names}
451
+ return {attr: deepcopy(data[attr]) for attr in attrs}
452
+
453
+
454
+ def process_data_filter(
455
+ dataset_type: DatasetType,
456
+ data_filter: ComponentAttributeMapping,
457
+ available_components: list[ComponentType],
458
+ ) -> ComponentAttributeMappingDict:
459
+ """Checks valid type for data_filter. Also checks for any invalid component names and attribute names.
460
+
461
+ Args:
462
+ dataset_type (DatasetType): the type of output that the user will see (as per the calculation options)
463
+ data_filter (ComponentAttributeMapping): data_filter provided by user
464
+ available_components (list[ComponentType]): all components available in model instance or data
465
+
466
+ Returns:
467
+ ComponentAttributeMappingDict: processed data_filter in a dictionary
468
+ """
469
+ if data_filter is None:
470
+ processed_data_filter: ComponentAttributeMappingDict = {ComponentType[k]: None for k in available_components}
471
+ elif isinstance(data_filter, ComponentAttributeFilterOptions):
472
+ processed_data_filter = {ComponentType[k]: data_filter for k in available_components}
473
+ elif isinstance(data_filter, (list, set)):
474
+ processed_data_filter = {ComponentType[k]: None for k in data_filter}
475
+ elif isinstance(data_filter, dict) and all(
476
+ attrs is None or isinstance(attrs, (set, list, ComponentAttributeFilterOptions))
477
+ for attrs in data_filter.values()
478
+ ):
479
+ processed_data_filter = data_filter
480
+ else:
481
+ raise ValueError(f"Invalid filter provided: {data_filter}")
482
+
483
+ validate_data_filter(processed_data_filter, dataset_type, available_components)
484
+ return processed_data_filter
485
+
486
+
487
+ def validate_data_filter(
488
+ data_filter: ComponentAttributeMappingDict,
489
+ dataset_type: DatasetType,
490
+ available_components: list[ComponentType],
491
+ ) -> None:
492
+ """Raise error if some specified components or attributes are unknown.
493
+
494
+ Args:
495
+ data_filter (ComponentAttributeMappingDict): Processed component to attribtue dictionary
496
+ dataset_type (DatasetType): Type of dataset
497
+ available_components (list[ComponentType]): all components available in model instance or data
498
+
499
+ Raises:
500
+ ValueError: when the type for data_filter is incorrect
501
+ KeyError: with "unknown component types" for any unknown components
502
+ KeyError: with "unknown attributes" for unknown attribute(s) for a known component
503
+ """
504
+ dataset_meta = power_grid_meta_data[dataset_type]
505
+
506
+ for source, components in {
507
+ "data_filter": data_filter.keys(),
508
+ "data": available_components,
509
+ }.items():
510
+ unknown_components = [x for x in components if x not in dataset_meta]
511
+ if unknown_components:
512
+ raise KeyError(f"The following specified component types are unknown:{unknown_components} in {source}")
513
+
514
+ unknown_attributes = {}
515
+ for comp_name, attrs in data_filter.items():
516
+ if attrs is None or isinstance(attrs, ComponentAttributeFilterOptions):
517
+ continue
518
+
519
+ attr_names = dataset_meta[comp_name].dtype.names
520
+ diff = set(cast(set[str] | list[str], attrs))
521
+ if attr_names is not None:
522
+ diff = diff.difference(attr_names)
523
+ if diff != set():
524
+ unknown_attributes[comp_name] = diff
525
+
526
+ if unknown_attributes:
527
+ raise KeyError(f"The following specified attributes are unknown: {unknown_attributes} in data_filter")
528
+
529
+
530
+ def is_sparse(component_data: ComponentData) -> bool:
531
+ """Check if component_data is sparse or dense. Only batch data can be sparse."""
532
+ return isinstance(component_data, dict) and set(component_data.keys()) == {
533
+ "indptr",
534
+ "data",
535
+ }
536
+
537
+
538
+ def is_columnar(component_data: ComponentData) -> bool:
539
+ """Check if component_data is columnar or row based"""
540
+ if is_sparse(component_data):
541
+ return not isinstance(component_data["data"], np.ndarray)
542
+ return not isinstance(component_data, np.ndarray)
543
+
544
+
545
+ def is_nan_or_default(x: np.ndarray) -> np.ndarray:
546
+ """
547
+ Check if elements in the array are NaN or equal to the min of its dtype.
548
+
549
+ Args:
550
+ x: A NumPy array to check.
551
+
552
+ Returns:
553
+ A boolean NumPy array where each element is True if the corresponding element in x is NaN
554
+ or min of its dtype, and False otherwise.
555
+ """
556
+ if x.dtype == np.float64:
557
+ return np.isnan(x)
558
+ if x.dtype in (np.int32, np.int8):
559
+ return x == np.iinfo(x.dtype).min
560
+ raise TypeError(f"Unsupported data type: {x.dtype}")
561
+
562
+
563
+ def is_nan_or_equivalent(array) -> bool:
564
+ """
565
+ Check if the array contains only nan values or equivalent nan values for specific data types.
566
+ This is the aggregrated version of `is_nan_or_default` for the whole array.
567
+
568
+ Args:
569
+ array: The array to check.
570
+
571
+ Returns:
572
+ bool: True if the array contains only nan or equivalent nan values, False otherwise.
573
+ """
574
+ return isinstance(array, np.ndarray) and bool(
575
+ (array.dtype == np.float64 and np.isnan(array).all())
576
+ or (array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min))
577
+ )
578
+
579
+
580
+ def _check_sparse_dense(component_data: ComponentData, err_msg_suffixed: str) -> ComponentData:
581
+ if is_sparse(component_data):
582
+ indptr = component_data["indptr"]
583
+ if not isinstance(indptr, np.ndarray):
584
+ raise TypeError(err_msg_suffixed.format(f"Invalid indptr type {type(indptr).__name__}. "))
585
+ sub_data = component_data["data"]
586
+ elif isinstance(component_data, dict) and ("indptr" in component_data or "data" in component_data):
587
+ missing_element = "indptr" if "indptr" not in component_data else "data"
588
+ raise KeyError(err_msg_suffixed.format(f"Missing '{missing_element}' in sparse batch data. "))
589
+ else:
590
+ sub_data = component_data
591
+ return sub_data
592
+
593
+
594
+ def _check_columnar_row(sub_data: ComponentData, err_msg_suffixed: str) -> None:
595
+ if is_columnar(sub_data):
596
+ if not isinstance(sub_data, dict):
597
+ raise TypeError(err_msg_suffixed.format(""))
598
+ for attribute, attribute_array in sub_data.items():
599
+ if not isinstance(attribute_array, np.ndarray):
600
+ raise TypeError(err_msg_suffixed.format(f"'{attribute}' attribute. "))
601
+ if attribute_array.ndim not in [1, 2, 3]:
602
+ raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {attribute_array.ndim}"))
603
+ elif not isinstance(sub_data, np.ndarray):
604
+ raise TypeError(err_msg_suffixed.format(f"Invalid data type {type(sub_data).__name__} "))
605
+ elif isinstance(sub_data, np.ndarray) and sub_data.ndim not in [1, 2]:
606
+ raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {sub_data.ndim}. "))
607
+
608
+
609
+ def component_data_checks(component_data: ComponentData, component=None) -> None:
610
+ """Checks if component_data is of ComponentData and raises ValueError if its not"""
611
+ component_name = f"'{component}'" if component is not None else ""
612
+ err_msg = f"Invalid data for {component_name} component. {{0}}"
613
+ err_msg_suffixed = err_msg + "Expecting a 1D/2D Numpy structured array or a dictionary of such."
614
+
615
+ sub_data = _check_sparse_dense(component_data, err_msg_suffixed)
616
+ _check_columnar_row(sub_data, err_msg_suffixed)
617
+
618
+
619
+ def _extract_indptr(data: ComponentData) -> IndexPointer: # pragma: no cover
620
+ """returns indptr and checks if its valid
621
+
622
+ Args:
623
+ data (ComponentData): The component data
624
+
625
+ Raises:
626
+ TypeError: if indptr is invalid or is not available
627
+
628
+ Returns:
629
+ IndexPointer: indptr if present
630
+ """
631
+ if not is_sparse(data):
632
+ raise TypeError("Not sparse data")
633
+ indptr = data["indptr"]
634
+ if not isinstance(indptr, np.ndarray):
635
+ raise TypeError("indptr is not a 1D numpy array")
636
+ if indptr.ndim != 1:
637
+ raise TypeError("indptr is not a 1D numpy array")
638
+ return indptr
639
+
640
+
641
+ def _extract_columnar_data(
642
+ data: ComponentData, is_batch: bool | None = None
643
+ ) -> SingleColumnarData | DenseBatchColumnarData: # pragma: no cover
644
+ """returns the contents of the columnar data.
645
+
646
+ Args:
647
+ data (ComponentData): component data
648
+ is_batch (bool | None, optional): If given data is batch. Skips batch check if provided None.
649
+
650
+ Raises:
651
+ TypeError: if data is not columnar or invalid data
652
+
653
+ Returns:
654
+ SingleColumnarData | DenseBatchColumnarData: the contents of columnar data
655
+ """
656
+ not_columnar_data_message = "Expected columnar data"
657
+
658
+ if is_batch is None:
659
+ allowed_dims = [1, 2, 3]
660
+ elif is_batch:
661
+ allowed_dims = [2, 3]
662
+ else:
663
+ allowed_dims = [1, 2]
664
+
665
+ sub_data = data["data"] if is_sparse(data) else data
666
+
667
+ if not isinstance(sub_data, dict):
668
+ raise TypeError(not_columnar_data_message)
669
+ for attribute, attribute_array in sub_data.items():
670
+ if not isinstance(attribute_array, np.ndarray) or not isinstance(attribute, str):
671
+ raise TypeError(not_columnar_data_message)
672
+ if attribute_array.ndim not in allowed_dims:
673
+ raise TypeError(not_columnar_data_message)
674
+ return cast(SingleColumnarData | DenseBatchColumnarData, sub_data)
675
+
676
+
677
+ def _extract_row_based_data(
678
+ data: ComponentData, is_batch: bool | None = None
679
+ ) -> SingleArray | DenseBatchArray: # pragma: no cover
680
+ """returns the contents of the row based data
681
+
682
+ Args:
683
+ data (ComponentData): component data
684
+ is_batch (bool | None, optional): If given data is batch. Skips batch check if provided None.
685
+
686
+ Raises:
687
+ TypeError: if data is not row based or invalid data
688
+
689
+ Returns:
690
+ SingleArray | DenseBatchArray: the contents of row based data
691
+ """
692
+ if is_batch is None:
693
+ allowed_dims = [1, 2]
694
+ elif is_batch:
695
+ allowed_dims = [2]
696
+ else:
697
+ allowed_dims = [1]
698
+
699
+ sub_data = data["data"] if is_sparse(data) else data
700
+
701
+ if not isinstance(sub_data, np.ndarray):
702
+ raise TypeError("Expected row based data")
703
+ if sub_data.ndim not in allowed_dims:
704
+ raise TypeError("Expected row based data")
705
+ return sub_data
706
+
707
+
708
+ def _extract_data_from_component_data(data: ComponentData, is_batch: bool | None = None):
709
+ return _extract_columnar_data(data, is_batch) if is_columnar(data) else _extract_row_based_data(data, is_batch)
710
+
711
+
712
+ def _extract_contents_from_data(data: ComponentData):
713
+ return data["data"] if is_sparse(data) else data
714
+
715
+
716
+ def check_indptr_consistency(indptr: IndexPointer, batch_size: int | None, contents_size: int):
717
+ """checks if an indptr is valid. Batch size check is optional.
718
+
719
+ Args:
720
+ indptr (IndexPointer): The indptr array
721
+ batch_size (int | None): number of scenarios
722
+ contents_size (int): total number of elements in all scenarios
723
+
724
+ Raises:
725
+ ValueError: If indptr is invalid
726
+ """
727
+ if indptr[0] != 0 or indptr[-1] != contents_size:
728
+ raise ValueError(f"indptr should start from zero and end at size of data array. {VALIDATOR_MSG}")
729
+ if np.any(np.diff(indptr) < 0):
730
+ raise ValueError(f"indptr should be increasing. {VALIDATOR_MSG}")
731
+
732
+ actual_batch_size = indptr.size - 1
733
+ if batch_size is not None and batch_size != actual_batch_size:
734
+ raise ValueError(
735
+ f"Incorrect/inconsistent batch size provided: {actual_batch_size} scenarios provided "
736
+ f"but {batch_size} scenarios expected. {VALIDATOR_MSG}"
737
+ )
738
+
739
+
740
+ def get_dataset_type(data: Dataset) -> DatasetType:
741
+ """
742
+ Deduce the dataset type from the provided dataset.
743
+
744
+ Args:
745
+ data: the dataset
746
+
747
+ Raises:
748
+ ValueError
749
+ if the dataset type cannot be deduced because multiple dataset types match the format
750
+ (probably because the data contained no supported components, e.g. was empty)
751
+ PowerGridError
752
+ if no dataset type matches the format of the data
753
+ (probably because the data contained conflicting data formats)
754
+
755
+ Returns:
756
+ The dataset type.
757
+ """
758
+ candidates = set(power_grid_meta_data.keys())
759
+
760
+ if all(is_columnar(v) for v in data.values()):
761
+ raise ValueError("The dataset type could not be deduced. At least one component should have row based data.")
762
+
763
+ for dataset_type, dataset_metadatas in power_grid_meta_data.items():
764
+ for component, dataset_metadata in dataset_metadatas.items():
765
+ if component not in data or is_columnar(data[component]):
766
+ continue
767
+ component_data = data[component]
768
+
769
+ component_dtype = component_data["data"].dtype if is_sparse(component_data) else component_data.dtype
770
+ if component_dtype is not dataset_metadata.dtype:
771
+ candidates.discard(dataset_type)
772
+ break
773
+
774
+ if not candidates:
775
+ raise PowerGridError(
776
+ "The dataset type could not be deduced because no type matches the data. "
777
+ "This usually means inconsistent data was provided."
778
+ )
779
+ if len(candidates) > 1:
780
+ raise ValueError("The dataset type could not be deduced because multiple dataset types match the data.")
781
+
782
+ return next(iter(candidates))
783
+
784
+
785
+ def get_comp_size(comp_data: SingleColumnarData | SingleArray) -> int:
786
+ """
787
+ Get the number of elements in the comp_data of a single dataset.
788
+
789
+ Args:
790
+ comp_data: Columnar or row based data of a single batch
791
+
792
+ Returns:
793
+ Number of elements in the component
794
+ """
795
+ if not is_columnar(comp_data):
796
+ return len(comp_data)
797
+ comp_data = cast(SingleColumnarData, comp_data)
798
+ return len(next(iter(comp_data.values())))