power-grid-model 1.10.17__py3-none-win_amd64.whl → 1.12.119__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of power-grid-model might be problematic. Click here for more details.

Files changed (67) hide show
  1. power_grid_model/__init__.py +54 -29
  2. power_grid_model/_core/__init__.py +3 -3
  3. power_grid_model/_core/buffer_handling.py +507 -478
  4. power_grid_model/_core/data_handling.py +195 -141
  5. power_grid_model/_core/data_types.py +142 -0
  6. power_grid_model/_core/dataset_definitions.py +109 -109
  7. power_grid_model/_core/enum.py +226 -0
  8. power_grid_model/_core/error_handling.py +215 -198
  9. power_grid_model/_core/errors.py +134 -0
  10. power_grid_model/_core/index_integer.py +17 -17
  11. power_grid_model/_core/options.py +71 -69
  12. power_grid_model/_core/power_grid_core.py +577 -562
  13. power_grid_model/_core/power_grid_dataset.py +545 -490
  14. power_grid_model/_core/power_grid_meta.py +262 -244
  15. power_grid_model/_core/power_grid_model.py +1025 -687
  16. power_grid_model/_core/power_grid_model_c/__init__.py +3 -0
  17. power_grid_model/_core/power_grid_model_c/bin/power_grid_model_c.dll +0 -0
  18. power_grid_model/_core/power_grid_model_c/get_pgm_dll_path.py +63 -0
  19. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/basics.h +251 -0
  20. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/buffer.h +108 -0
  21. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset.h +332 -0
  22. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset_definitions.h +1060 -0
  23. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/handle.h +111 -0
  24. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/meta_data.h +189 -0
  25. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/model.h +130 -0
  26. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/options.h +142 -0
  27. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/serialization.h +118 -0
  28. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c.h +36 -0
  29. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/basics.hpp +65 -0
  30. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/buffer.hpp +61 -0
  31. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/dataset.hpp +224 -0
  32. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/handle.hpp +108 -0
  33. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/meta_data.hpp +84 -0
  34. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/model.hpp +63 -0
  35. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/options.hpp +52 -0
  36. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/serialization.hpp +124 -0
  37. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/utils.hpp +81 -0
  38. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp.hpp +19 -0
  39. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfig.cmake +37 -0
  40. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfigVersion.cmake +65 -0
  41. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets-release.cmake +19 -0
  42. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets.cmake +144 -0
  43. power_grid_model/_core/power_grid_model_c/lib/power_grid_model_c.lib +0 -0
  44. power_grid_model/_core/power_grid_model_c/share/LICENSE +292 -0
  45. power_grid_model/_core/power_grid_model_c/share/README.md +15 -0
  46. power_grid_model/_core/serialization.py +319 -317
  47. power_grid_model/_core/typing.py +20 -0
  48. power_grid_model/{_utils.py → _core/utils.py} +798 -783
  49. power_grid_model/data_types.py +321 -319
  50. power_grid_model/enum.py +27 -214
  51. power_grid_model/errors.py +37 -119
  52. power_grid_model/typing.py +43 -48
  53. power_grid_model/utils.py +529 -400
  54. power_grid_model/validation/__init__.py +25 -10
  55. power_grid_model/validation/{rules.py → _rules.py} +1167 -962
  56. power_grid_model/validation/{validation.py → _validation.py} +1172 -1015
  57. power_grid_model/validation/assertions.py +93 -92
  58. power_grid_model/validation/errors.py +602 -524
  59. power_grid_model/validation/utils.py +313 -318
  60. {power_grid_model-1.10.17.dist-info → power_grid_model-1.12.119.dist-info}/METADATA +162 -165
  61. power_grid_model-1.12.119.dist-info/RECORD +65 -0
  62. {power_grid_model-1.10.17.dist-info → power_grid_model-1.12.119.dist-info}/WHEEL +1 -1
  63. power_grid_model-1.12.119.dist-info/entry_points.txt +3 -0
  64. power_grid_model/_core/_power_grid_core.dll +0 -0
  65. power_grid_model-1.10.17.dist-info/RECORD +0 -32
  66. power_grid_model-1.10.17.dist-info/top_level.txt +0 -1
  67. {power_grid_model-1.10.17.dist-info → power_grid_model-1.12.119.dist-info/licenses}/LICENSE +0 -0
@@ -1,783 +1,798 @@
1
- # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """
6
- This file contains helper functions for library-internal use only.
7
-
8
- Disclaimer!
9
-
10
- We do not officially support this functionality and may remove features in this library at any given time!
11
- """
12
-
13
- from copy import deepcopy
14
- from typing import Sequence, cast
15
-
16
- import numpy as np
17
-
18
- from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
19
- from power_grid_model._core.error_handling import VALIDATOR_MSG
20
- from power_grid_model._core.power_grid_meta import initialize_array, power_grid_meta_data
21
- from power_grid_model.data_types import (
22
- BatchColumn,
23
- BatchComponentData,
24
- BatchDataset,
25
- BatchList,
26
- ComponentData,
27
- Dataset,
28
- DenseBatchArray,
29
- DenseBatchColumnarData,
30
- DenseBatchData,
31
- IndexPointer,
32
- PythonDataset,
33
- SingleArray,
34
- SingleColumn,
35
- SingleColumnarData,
36
- SingleComponentData,
37
- SingleDataset,
38
- SinglePythonDataset,
39
- SparseBatchData,
40
- )
41
- from power_grid_model.enum import ComponentAttributeFilterOptions
42
- from power_grid_model.errors import PowerGridError
43
- from power_grid_model.typing import ComponentAttributeMapping, _ComponentAttributeMappingDict
44
-
45
-
46
- def is_nan(data) -> bool:
47
- """
48
- Determine if the data point is valid
49
- Args:
50
- data: a single scaler or numpy array
51
-
52
- Returns:
53
- True if all the data points are invalid
54
- False otherwise
55
- """
56
- nan_func = {
57
- np.dtype("f8"): lambda x: np.all(np.isnan(x)),
58
- np.dtype("i4"): lambda x: np.all(x == np.iinfo("i4").min),
59
- np.dtype("i1"): lambda x: np.all(x == np.iinfo("i1").min),
60
- }
61
- return bool(nan_func[data.dtype](data))
62
-
63
-
64
- def convert_batch_dataset_to_batch_list(batch_data: BatchDataset, dataset_type: DatasetType | None = None) -> BatchList:
65
- """
66
- Convert batch datasets to a list of individual batches
67
-
68
- Args:
69
- batch_data: a batch dataset for power-grid-model
70
- dataset_type: type of dataset
71
-
72
- Returns:
73
- A list of individual batches
74
- """
75
-
76
- # If the batch data is empty, return an empty list
77
- if len(batch_data) == 0:
78
- return []
79
-
80
- n_batches = get_and_verify_batch_sizes(batch_data=batch_data, dataset_type=dataset_type)
81
-
82
- # Initialize an empty list with dictionaries
83
- # Note that [{}] * n_batches would result in n copies of the same dict.
84
- list_data: BatchList = [{} for _ in range(n_batches)]
85
-
86
- # While the number of batches must be the same for each component, the structure (2d numpy array or indptr/data)
87
- # doesn't have to be. Therefore, we'll check the structure for each component and copy the data accordingly.
88
- for component, data in batch_data.items():
89
- component_data_checks(data, component)
90
- component_batches: Sequence[SingleComponentData]
91
- if is_sparse(data):
92
- component_batches = split_sparse_batch_data_in_batches(cast(SparseBatchData, data), component)
93
- else:
94
- component_batches = split_dense_batch_data_in_batches(cast(SingleComponentData, data), batch_size=n_batches)
95
- for i, batch in enumerate(component_batches):
96
- if (isinstance(batch, dict) and batch) or (isinstance(batch, np.ndarray) and batch.size > 0):
97
- list_data[i][component] = batch
98
- return list_data
99
-
100
-
101
- def get_and_verify_batch_sizes(batch_data: BatchDataset, dataset_type: DatasetType | None = None) -> int:
102
- """
103
- Determine the number of batches for each component and verify that each component has the same number of batches
104
-
105
- Args:
106
- batch_data: a batch dataset for power-grid-model
107
- dataset_type: type of dataset
108
-
109
- Returns:
110
- The number of batches
111
- """
112
-
113
- if dataset_type is None and any(is_columnar(v) and not is_sparse(v) for v in batch_data.values()):
114
- dataset_type = get_dataset_type(batch_data)
115
-
116
- n_batch_size = 0
117
- checked_components: list[ComponentType] = []
118
- for component, data in batch_data.items():
119
- n_component_batch_size = get_batch_size(data, dataset_type, component)
120
- if checked_components and n_component_batch_size != n_batch_size:
121
- if len(checked_components) == 1:
122
- checked_components_str = f"'{checked_components.pop()}'"
123
- else:
124
- str_checked_components = [str(component) for component in checked_components]
125
- checked_components_str = "/".join(sorted(str_checked_components))
126
- raise ValueError(
127
- f"Inconsistent number of batches in batch data. "
128
- f"Component '{component}' contains {n_component_batch_size} batches, "
129
- f"while {checked_components_str} contained {n_batch_size} batches."
130
- )
131
- n_batch_size = n_component_batch_size
132
- checked_components.append(component)
133
- return n_batch_size
134
-
135
-
136
- def get_batch_size(
137
- batch_data: BatchComponentData, dataset_type: DatasetType | None = None, component: ComponentType | None = None
138
- ) -> int:
139
- """
140
- Determine the number of batches and verify the data structure while we're at it. Note only batch data is supported.
141
- Note: SingleColumnarData would get treated as batch by this function.
142
-
143
- Args:
144
- batch_data: a batch array for power-grid-model
145
- dataset_type: type of dataset
146
- component: name of component
147
-
148
- Raises:
149
- ValueError: when the type for data_filter is incorrect
150
-
151
- Returns:
152
- The number of batches
153
- """
154
- component_data_checks(batch_data)
155
- if is_sparse(batch_data):
156
- indptr = batch_data["indptr"]
157
- return indptr.size - 1
158
-
159
- if not is_columnar(batch_data):
160
- sym_array = batch_data
161
- else:
162
- batch_data = cast(DenseBatchColumnarData, batch_data)
163
- if component is None or dataset_type is None:
164
- raise ValueError("Cannot deduce batch size for given columnar data without a dataset type or component")
165
- sym_attributes, _ = _get_sym_or_asym_attributes(dataset_type, component)
166
- for attribute, array in batch_data.items():
167
- if attribute in sym_attributes:
168
- break
169
- if array.ndim == 1:
170
- raise TypeError("Incorrect dimension present in batch data.")
171
- if array.ndim == 2:
172
- return 1
173
- return array.shape[0]
174
- sym_array = next(iter(batch_data.values()))
175
-
176
- sym_array = cast(DenseBatchArray | BatchColumn, sym_array)
177
- if sym_array.ndim == 3:
178
- raise TypeError("Incorrect dimension present in batch data.")
179
- if sym_array.ndim == 1:
180
- return 1
181
- return sym_array.shape[0]
182
-
183
-
184
- def _get_sym_or_asym_attributes(dataset_type: DatasetType, component: ComponentType):
185
- """Segregate into symmetric of asymmetric attribute.
186
- The asymmetric attribute is per phase value and of extra dimension.
187
-
188
- Args:
189
- dataset_type (DatasetType): dataset type
190
- component (ComponentType): component name
191
-
192
- Returns:
193
- symmetrical and asymmetrical attributes
194
- """
195
- asym_attributes = set()
196
- sym_attributes = set()
197
- for meta_dataset_type, dataset_meta in power_grid_meta_data.items():
198
- if dataset_type != meta_dataset_type:
199
- continue
200
- for component_name_meta, component_meta in dataset_meta.items():
201
- if component != component_name_meta:
202
- continue
203
- if component_meta.dtype.names is None:
204
- raise ValueError("No attributes available in meta")
205
- for attribute in component_meta.dtype.names:
206
- if component_meta.dtype[attribute].shape == (3,):
207
- asym_attributes.add(attribute)
208
- if component_meta.dtype[attribute].shape == ():
209
- sym_attributes.add(attribute)
210
- return sym_attributes, asym_attributes
211
-
212
-
213
- def _split_numpy_array_in_batches(
214
- data: DenseBatchArray | SingleArray | SingleColumn | BatchColumn,
215
- ) -> list[SingleArray] | list[SingleColumn]:
216
- """
217
- Split a single dense numpy array into one or more batches
218
-
219
- Args:
220
- data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table.
221
-
222
- Returns:
223
- A list with a single numpy structured array per batch
224
- """
225
- if data.ndim == 1:
226
- return [data]
227
- if data.ndim in [2, 3]:
228
- return [data[i, ...] for i in range(data.shape[0])]
229
- raise ValueError("Dimension of the component data is invalid.")
230
-
231
-
232
- def split_dense_batch_data_in_batches(
233
- data: SingleComponentData | DenseBatchData, batch_size: int
234
- ) -> list[SingleComponentData]:
235
- """
236
- Split a single dense numpy array into one or more batches
237
-
238
- Args:
239
- data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table.
240
- batch_size: size of batch
241
-
242
- Returns:
243
- A list with a single component data per scenario
244
- """
245
- if isinstance(data, np.ndarray):
246
- return cast(list[SingleComponentData], _split_numpy_array_in_batches(data))
247
-
248
- scenarios_per_attribute = {
249
- attribute: _split_numpy_array_in_batches(attribute_data) for attribute, attribute_data in data.items()
250
- }
251
-
252
- return [
253
- {attribute: scenarios_per_attribute[attribute][scenario] for attribute, attribute_data in data.items()}
254
- for scenario in range(batch_size)
255
- ]
256
-
257
-
258
- def split_sparse_batch_data_in_batches(
259
- batch_data: SparseBatchData, component: ComponentType
260
- ) -> list[SingleComponentData]:
261
- """
262
- Split a single numpy array representing, a compressed sparse structure, into one or more batches
263
-
264
- Args:
265
- batch_data: Sparse batch data
266
- component: The name of the component to which the data belongs, only used for errors.
267
-
268
- Returns:
269
- A list with a single numpy structured array per batch
270
- """
271
- for key in ["indptr", "data"]:
272
- if key not in batch_data:
273
- raise KeyError(
274
- f"Missing '{key}' in sparse batch data for '{component}' "
275
- "(expected a python dictionary containing two keys: 'indptr' and 'data')."
276
- )
277
-
278
- data = batch_data["data"]
279
- indptr = batch_data["indptr"]
280
-
281
- def _split_buffer(buffer: np.ndarray, scenario: int) -> SingleArray:
282
- if not isinstance(buffer, np.ndarray) or buffer.ndim != 1:
283
- raise TypeError(
284
- f"Invalid data type {type(buffer).__name__} in sparse batch data for '{component}' "
285
- "(should be a 1D Numpy structured array (i.e. a single 'table'))."
286
- )
287
-
288
- if not isinstance(indptr, np.ndarray) or indptr.ndim != 1 or not np.issubdtype(indptr.dtype, np.integer):
289
- raise TypeError(
290
- f"Invalid indptr data type {type(indptr).__name__} in batch data for '{component}' "
291
- "(should be a 1D Numpy array (i.e. a single 'list'), "
292
- "containing indices (i.e. integers))."
293
- )
294
-
295
- if indptr[0] != 0 or indptr[-1] != len(buffer) or indptr[scenario] > indptr[scenario + 1]:
296
- raise TypeError(
297
- f"Invalid indptr in batch data for '{component}' "
298
- f"(should start with 0, end with the number of objects ({len(buffer)}) "
299
- "and be monotonic increasing)."
300
- )
301
-
302
- return buffer[indptr[scenario] : indptr[scenario + 1]]
303
-
304
- def _get_scenario(scenario: int) -> SingleComponentData:
305
- if isinstance(data, dict):
306
- return {attribute: _split_buffer(attribute_data, scenario) for attribute, attribute_data in data.items()}
307
- return _split_buffer(data, scenario)
308
-
309
- return [_get_scenario(i) for i in range(len(indptr) - 1)]
310
-
311
-
312
- def convert_dataset_to_python_dataset(data: Dataset) -> PythonDataset:
313
- """
314
- Convert internal numpy arrays to native python data
315
- If an attribute is not available (NaN value), it will not be exported.
316
-
317
- Args:
318
- data: A single or batch dataset for power-grid-model
319
- Returns:
320
- A python dict for single dataset
321
- A python list for batch dataset
322
- """
323
-
324
- # Check if the dataset is a single dataset or batch dataset
325
- # It is batch dataset if it is 2D array or a indptr/data structure
326
- is_batch: bool | None = None
327
- for component, array in data.items():
328
- is_dense_batch = isinstance(array, np.ndarray) and array.ndim == 2
329
- is_sparse_batch = isinstance(array, dict) and "indptr" in array and "data" in array
330
- if is_batch is not None and is_batch != (is_dense_batch or is_sparse_batch):
331
- raise ValueError(
332
- f"Mixed {'' if is_batch else 'non-'}batch data "
333
- f"with {'non-' if is_batch else ''}batch data ({component})."
334
- )
335
- is_batch = is_dense_batch or is_sparse_batch
336
-
337
- # If it is a batch, convert the batch data to a list of batches, then convert each batch individually.
338
- if is_batch:
339
- # We have established that this is batch data, so let's tell the type checker that this is a BatchDataset
340
- data = cast(BatchDataset, data)
341
- list_data = convert_batch_dataset_to_batch_list(data)
342
- return [convert_single_dataset_to_python_single_dataset(data=x) for x in list_data]
343
-
344
- # We have established that this is not batch data, so let's tell the type checker that this is a SingleDataset
345
- data = cast(SingleDataset, data)
346
- return convert_single_dataset_to_python_single_dataset(data=data)
347
-
348
-
349
- def convert_single_dataset_to_python_single_dataset(
350
- data: SingleDataset,
351
- ) -> SinglePythonDataset:
352
- """
353
- Convert internal numpy arrays to native python data
354
- If an attribute is not available (NaN value), it will not be exported.
355
-
356
- Args:
357
- data: A single dataset for power-grid-model
358
-
359
- Returns:
360
- A python dict for single dataset
361
- """
362
-
363
- # Convert each numpy array to a list of objects, which contains only the non-NaN attributes:
364
- # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]}
365
- def _convert_component(objects: SingleComponentData):
366
- # This should be a single data set
367
- if not isinstance(objects, np.ndarray) or objects.ndim != 1:
368
- raise ValueError("Invalid data format")
369
-
370
- return [
371
- {attribute: obj[attribute].tolist() for attribute in objects.dtype.names if not is_nan(obj[attribute])}
372
- for obj in objects
373
- ]
374
-
375
- return {component: _convert_component(objects) for component, objects in data.items()}
376
-
377
-
378
- def compatibility_convert_row_columnar_dataset(
379
- data: Dataset,
380
- data_filter: ComponentAttributeMapping,
381
- dataset_type: DatasetType,
382
- available_components: list[ComponentType] | None = None,
383
- ) -> Dataset:
384
- """Temporary function to transform row, column or mixed based datasets to a full row or column based dataset as per
385
- the data_filter. The purpose of this function is to mimic columnar data and transform back to row data without any
386
- memory footprint benefits.
387
- Note: Copies are made on a per-component basis; if a component is row based in both the input and the requested
388
- output, that componened is returned without a copy.
389
-
390
- Args:
391
- data (Dataset): dataset to convert
392
- data_filter (ComponentAttributeMapping): desired component and attribute mapping
393
- dataset_type (DatasetType): type of dataset (e.g., input, update or [sym | asym | sc]_output)
394
- available_components (list[ComponentType] | None): available components in model
395
-
396
- Returns:
397
- Dataset: converted dataset
398
- """
399
- if available_components is None:
400
- available_components = list(data.keys())
401
-
402
- processed_data_filter = process_data_filter(dataset_type, data_filter, available_components)
403
-
404
- result_data: Dataset = {}
405
- for comp_name, attrs in processed_data_filter.items():
406
- if comp_name not in data:
407
- continue
408
-
409
- sub_data = _extract_data_from_component_data(data[comp_name])
410
- converted_sub_data = _convert_data_to_row_or_columnar(
411
- data=sub_data,
412
- comp_name=comp_name,
413
- dataset_type=dataset_type,
414
- attrs=attrs,
415
- )
416
-
417
- if is_sparse(data[comp_name]):
418
- result_data[comp_name] = {"indptr": _extract_indptr(data[comp_name]), "data": converted_sub_data}
419
- else:
420
- result_data[comp_name] = converted_sub_data
421
- return result_data
422
-
423
-
424
- def _convert_data_to_row_or_columnar(
425
- data: SingleComponentData,
426
- comp_name: ComponentType,
427
- dataset_type: DatasetType,
428
- attrs: set[str] | list[str] | None | ComponentAttributeFilterOptions,
429
- ) -> SingleComponentData:
430
- """Converts row or columnar component data to row or columnar component data as requested in `attrs`."""
431
- if attrs is None:
432
- if not is_columnar(data):
433
- return data
434
- data = cast(SingleColumnarData, data)
435
- output_array = initialize_array(dataset_type, comp_name, next(iter(data.values())).shape)
436
- for k in data:
437
- output_array[k] = data[k]
438
- return output_array
439
-
440
- if isinstance(attrs, (list, set)) and len(attrs) == 0:
441
- return {}
442
- if isinstance(attrs, ComponentAttributeFilterOptions):
443
- names = cast(SingleArray, data).dtype.names if not is_columnar(data) else cast(SingleColumnarData, data).keys()
444
- return {attr: deepcopy(data[attr]) for attr in names}
445
- return {attr: deepcopy(data[attr]) for attr in attrs}
446
-
447
-
448
- def process_data_filter(
449
- dataset_type: DatasetType,
450
- data_filter: ComponentAttributeMapping,
451
- available_components: list[ComponentType],
452
- ) -> _ComponentAttributeMappingDict:
453
- """Checks valid type for data_filter. Also checks for any invalid component names and attribute names.
454
-
455
- Args:
456
- dataset_type (DatasetType): the type of output that the user will see (as per the calculation options)
457
- data_filter (ComponentAttributeMapping): data_filter provided by user
458
- available_components (list[ComponentType]): all components available in model instance or data
459
-
460
- Returns:
461
- _ComponentAttributeMappingDict: processed data_filter in a dictionary
462
- """
463
- if data_filter is None:
464
- processed_data_filter: _ComponentAttributeMappingDict = {ComponentType[k]: None for k in available_components}
465
- elif isinstance(data_filter, ComponentAttributeFilterOptions):
466
- processed_data_filter = {ComponentType[k]: data_filter for k in available_components}
467
- elif isinstance(data_filter, (list, set)):
468
- processed_data_filter = {ComponentType[k]: None for k in data_filter}
469
- elif isinstance(data_filter, dict) and all(
470
- attrs is None or isinstance(attrs, (set, list, ComponentAttributeFilterOptions))
471
- for attrs in data_filter.values()
472
- ):
473
- processed_data_filter = data_filter
474
- else:
475
- raise ValueError(f"Invalid filter provided: {data_filter}")
476
-
477
- validate_data_filter(processed_data_filter, dataset_type, available_components)
478
- return processed_data_filter
479
-
480
-
481
- def validate_data_filter(
482
- data_filter: _ComponentAttributeMappingDict,
483
- dataset_type: DatasetType,
484
- available_components: list[ComponentType],
485
- ) -> None:
486
- """Raise error if some specified components or attributes are unknown.
487
-
488
- Args:
489
- data_filter (_ComponentAttributeMappingDict): Processed component to attribtue dictionary
490
- dataset_type (DatasetType): Type of dataset
491
- available_components (list[ComponentType]): all components available in model instance or data
492
-
493
- Raises:
494
- ValueError: when the type for data_filter is incorrect
495
- KeyError: with "unknown component types" for any unknown components
496
- KeyError: with "unknown attributes" for unknown attribute(s) for a known component
497
- """
498
- dataset_meta = power_grid_meta_data[dataset_type]
499
-
500
- for source, components in {
501
- "data_filter": data_filter.keys(),
502
- "data": available_components,
503
- }.items():
504
- unknown_components = [x for x in components if x not in dataset_meta]
505
- if unknown_components:
506
- raise KeyError(f"The following specified component types are unknown:{unknown_components} in {source}")
507
-
508
- unknown_attributes = {}
509
- for comp_name, attrs in data_filter.items():
510
- if attrs is None or isinstance(attrs, ComponentAttributeFilterOptions):
511
- continue
512
-
513
- attr_names = dataset_meta[comp_name].dtype.names
514
- diff = set(cast(set[str] | list[str], attrs))
515
- if attr_names is not None:
516
- diff = diff.difference(attr_names)
517
- if diff != set():
518
- unknown_attributes[comp_name] = diff
519
-
520
- if unknown_attributes:
521
- raise KeyError(f"The following specified attributes are unknown: {unknown_attributes} in data_filter")
522
-
523
-
524
- def is_sparse(component_data: ComponentData) -> bool:
525
- """Check if component_data is sparse or dense. Only batch data can be sparse."""
526
- return isinstance(component_data, dict) and set(component_data.keys()) == {
527
- "indptr",
528
- "data",
529
- }
530
-
531
-
532
- def is_columnar(component_data: ComponentData) -> bool:
533
- """Check if component_data is columnar or row based"""
534
- if is_sparse(component_data):
535
- return not isinstance(component_data["data"], np.ndarray)
536
- return not isinstance(component_data, np.ndarray)
537
-
538
-
539
- def is_nan_or_default(x: np.ndarray) -> np.ndarray:
540
- """
541
- Check if elements in the array are NaN or equal to the min of its dtype.
542
-
543
- Args:
544
- x: A NumPy array to check.
545
-
546
- Returns:
547
- A boolean NumPy array where each element is True if the corresponding element in x is NaN
548
- or min of its dtype, and False otherwise.
549
- """
550
- if x.dtype == np.float64:
551
- return np.isnan(x)
552
- if x.dtype in (np.int32, np.int8):
553
- return x == np.iinfo(x.dtype).min
554
- raise TypeError(f"Unsupported data type: {x.dtype}")
555
-
556
-
557
- def is_nan_or_equivalent(array) -> bool:
558
- """
559
- Check if the array contains only nan values or equivalent nan values for specific data types.
560
- This is the aggregrated version of `is_nan_or_default` for the whole array.
561
-
562
- Args:
563
- array: The array to check.
564
-
565
- Returns:
566
- bool: True if the array contains only nan or equivalent nan values, False otherwise.
567
- """
568
- return isinstance(array, np.ndarray) and bool(
569
- (array.dtype == np.float64 and np.isnan(array).all())
570
- or (array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min))
571
- )
572
-
573
-
574
- def _check_sparse_dense(component_data: ComponentData, err_msg_suffixed: str) -> ComponentData:
575
- if is_sparse(component_data):
576
- indptr = component_data["indptr"]
577
- if not isinstance(indptr, np.ndarray):
578
- raise TypeError(err_msg_suffixed.format(f"Invalid indptr type {type(indptr).__name__}. "))
579
- sub_data = component_data["data"]
580
- elif isinstance(component_data, dict) and ("indptr" in component_data or "data" in component_data):
581
- missing_element = "indptr" if "indptr" not in component_data else "data"
582
- raise KeyError(err_msg_suffixed.format(f"Missing '{missing_element}' in sparse batch data. "))
583
- else:
584
- sub_data = component_data
585
- return sub_data
586
-
587
-
588
- def _check_columnar_row(sub_data: ComponentData, err_msg_suffixed: str) -> None:
589
- if is_columnar(sub_data):
590
- if not isinstance(sub_data, dict):
591
- raise TypeError(err_msg_suffixed.format(""))
592
- for attribute, attribute_array in sub_data.items():
593
- if not isinstance(attribute_array, np.ndarray):
594
- raise TypeError(err_msg_suffixed.format(f"'{attribute}' attribute. "))
595
- if attribute_array.ndim not in [1, 2, 3]:
596
- raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {attribute_array.ndim}"))
597
- elif not isinstance(sub_data, np.ndarray):
598
- raise TypeError(err_msg_suffixed.format(f"Invalid data type {type(sub_data).__name__} "))
599
- elif isinstance(sub_data, np.ndarray) and sub_data.ndim not in [1, 2]:
600
- raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {sub_data.ndim}. "))
601
-
602
-
603
- def component_data_checks(component_data: ComponentData, component=None) -> None:
604
- """Checks if component_data is of ComponentData and raises ValueError if its not"""
605
- component_name = f"'{component}'" if component is not None else ""
606
- err_msg = f"Invalid data for {component_name} component. " "{0}"
607
- err_msg_suffixed = err_msg + "Expecting a 1D/2D Numpy structured array or a dictionary of such."
608
-
609
- sub_data = _check_sparse_dense(component_data, err_msg_suffixed)
610
- _check_columnar_row(sub_data, err_msg_suffixed)
611
-
612
-
613
- def _extract_indptr(data: ComponentData) -> IndexPointer: # pragma: no cover
614
- """returns indptr and checks if its valid
615
-
616
- Args:
617
- data (ComponentData): The component data
618
-
619
- Raises:
620
- TypeError: if indptr is invalid or is not available
621
-
622
- Returns:
623
- IndexPointer: indptr if present
624
- """
625
- if not is_sparse(data):
626
- raise TypeError("Not sparse data")
627
- indptr = data["indptr"]
628
- if not isinstance(indptr, np.ndarray):
629
- raise TypeError("indptr is not a 1D numpy array")
630
- if indptr.ndim != 1:
631
- raise TypeError("indptr is not a 1D numpy array")
632
- return indptr
633
-
634
-
635
- def _extract_columnar_data(
636
- data: ComponentData, is_batch: bool | None = None
637
- ) -> SingleColumnarData | DenseBatchColumnarData: # pragma: no cover
638
- """returns the contents of the columnar data.
639
-
640
- Args:
641
- data (ComponentData): component data
642
- is_batch (bool | None, optional): If given data is batch. Skips batch check if provided None.
643
-
644
- Raises:
645
- TypeError: if data is not columnar or invalid data
646
-
647
- Returns:
648
- SingleColumnarData | DenseBatchColumnarData: the contents of columnar data
649
- """
650
- not_columnar_data_message = "Expected columnar data"
651
-
652
- if is_batch is not None:
653
- allowed_dims = [2, 3] if is_batch else [1, 2]
654
- else:
655
- allowed_dims = [1, 2, 3]
656
-
657
- sub_data = data["data"] if is_sparse(data) else data
658
-
659
- if not isinstance(sub_data, dict):
660
- raise TypeError(not_columnar_data_message)
661
- for attribute, attribute_array in sub_data.items():
662
- if not isinstance(attribute_array, np.ndarray) or not isinstance(attribute, str):
663
- raise TypeError(not_columnar_data_message)
664
- if attribute_array.ndim not in allowed_dims:
665
- raise TypeError(not_columnar_data_message)
666
- return cast(SingleColumnarData | DenseBatchColumnarData, sub_data)
667
-
668
-
669
- def _extract_row_based_data(
670
- data: ComponentData, is_batch: bool | None = None
671
- ) -> SingleArray | DenseBatchArray: # pragma: no cover
672
- """returns the contents of the row based data
673
-
674
- Args:
675
- data (ComponentData): component data
676
- is_batch (bool | None, optional): If given data is batch. Skips batch check if provided None.
677
-
678
- Raises:
679
- TypeError: if data is not row based or invalid data
680
-
681
- Returns:
682
- SingleArray | DenseBatchArray: the contents of row based data
683
- """
684
- if is_batch is not None:
685
- allowed_dims = [2] if is_batch else [1]
686
- else:
687
- allowed_dims = [1, 2]
688
-
689
- sub_data = data["data"] if is_sparse(data) else data
690
-
691
- if not isinstance(sub_data, np.ndarray):
692
- raise TypeError("Expected row based data")
693
- if sub_data.ndim not in allowed_dims:
694
- raise TypeError("Expected row based data")
695
- return sub_data
696
-
697
-
698
- def _extract_data_from_component_data(data: ComponentData, is_batch: bool | None = None):
699
- return _extract_columnar_data(data, is_batch) if is_columnar(data) else _extract_row_based_data(data, is_batch)
700
-
701
-
702
- def _extract_contents_from_data(data: ComponentData):
703
- return data["data"] if is_sparse(data) else data
704
-
705
-
706
- def check_indptr_consistency(indptr: IndexPointer, batch_size: int | None, contents_size: int):
707
- """checks if an indptr is valid. Batch size check is optional.
708
-
709
- Args:
710
- indptr (IndexPointer): The indptr array
711
- batch_size (int | None): number of scenarios
712
- contents_size (int): total number of elements in all scenarios
713
-
714
- Raises:
715
- ValueError: If indptr is invalid
716
- """
717
- if indptr[0] != 0 or indptr[-1] != contents_size:
718
- raise ValueError(f"indptr should start from zero and end at size of data array. {VALIDATOR_MSG}")
719
- if np.any(np.diff(indptr) < 0):
720
- raise ValueError(f"indptr should be increasing. {VALIDATOR_MSG}")
721
- if batch_size is not None and batch_size != indptr.size - 1:
722
- raise ValueError(f"Provided batch size must be equal to actual batch size. {VALIDATOR_MSG}")
723
-
724
-
725
- def get_dataset_type(data: Dataset) -> DatasetType:
726
- """
727
- Deduce the dataset type from the provided dataset.
728
-
729
- Args:
730
- data: the dataset
731
-
732
- Raises:
733
- ValueError
734
- if the dataset type cannot be deduced because multiple dataset types match the format
735
- (probably because the data contained no supported components, e.g. was empty)
736
- PowerGridError
737
- if no dataset type matches the format of the data
738
- (probably because the data contained conflicting data formats)
739
-
740
- Returns:
741
- The dataset type.
742
- """
743
- candidates = set(power_grid_meta_data.keys())
744
-
745
- if all(is_columnar(v) for v in data.values()):
746
- raise ValueError("The dataset type could not be deduced. At least one component should have row based data.")
747
-
748
- for dataset_type, dataset_metadatas in power_grid_meta_data.items():
749
- for component, dataset_metadata in dataset_metadatas.items():
750
- if component not in data or is_columnar(data[component]):
751
- continue
752
- component_data = data[component]
753
-
754
- component_dtype = component_data["data"].dtype if is_sparse(component_data) else component_data.dtype
755
- if component_dtype is not dataset_metadata.dtype:
756
- candidates.discard(dataset_type)
757
- break
758
-
759
- if not candidates:
760
- raise PowerGridError(
761
- "The dataset type could not be deduced because no type matches the data. "
762
- "This usually means inconsistent data was provided."
763
- )
764
- if len(candidates) > 1:
765
- raise ValueError("The dataset type could not be deduced because multiple dataset types match the data.")
766
-
767
- return next(iter(candidates))
768
-
769
-
770
- def get_comp_size(comp_data: SingleColumnarData | SingleArray) -> int:
771
- """
772
- Get the number of elements in the comp_data of a single dataset.
773
-
774
- Args:
775
- comp_data: Columnar or row based data of a single batch
776
-
777
- Returns:
778
- Number of elements in the component
779
- """
780
- if not is_columnar(comp_data):
781
- return len(comp_data)
782
- comp_data = cast(SingleColumnarData, comp_data)
783
- return len(next(iter(comp_data.values())))
1
+ # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ """
6
+ This file contains helper functions for library-internal use only.
7
+
8
+ Disclaimer!
9
+
10
+ We do not officially support this functionality and may remove features in this library at any given time!
11
+ """
12
+
13
+ from collections.abc import Sequence
14
+ from copy import deepcopy
15
+ from typing import cast
16
+
17
+ import numpy as np
18
+
19
+ from power_grid_model._core.data_types import (
20
+ BatchColumn,
21
+ BatchComponentData,
22
+ BatchDataset,
23
+ BatchList,
24
+ ComponentData,
25
+ Dataset,
26
+ DenseBatchArray,
27
+ DenseBatchColumnarData,
28
+ DenseBatchData,
29
+ IndexPointer,
30
+ PythonDataset,
31
+ SingleArray,
32
+ SingleColumn,
33
+ SingleColumnarData,
34
+ SingleComponentData,
35
+ SingleDataset,
36
+ SinglePythonDataset,
37
+ SparseBatchData,
38
+ )
39
+ from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
40
+ from power_grid_model._core.enum import ComponentAttributeFilterOptions
41
+ from power_grid_model._core.error_handling import VALIDATOR_MSG
42
+ from power_grid_model._core.errors import PowerGridError
43
+ from power_grid_model._core.power_grid_meta import initialize_array, power_grid_meta_data
44
+ from power_grid_model._core.typing import ComponentAttributeMapping, ComponentAttributeMappingDict
45
+
46
+ SINGLE_DATASET_NDIM = 1
47
+ BATCH_DATASET_NDIM = 2
48
+
49
+
50
+ def is_nan(data) -> bool:
51
+ """
52
+ Determine if the data point is valid
53
+ Args:
54
+ data: a single scaler or numpy array
55
+
56
+ Returns:
57
+ True if all the data points are invalid
58
+ False otherwise
59
+ """
60
+ nan_func = {
61
+ np.dtype("f8"): lambda x: np.all(np.isnan(x)),
62
+ np.dtype("i4"): lambda x: np.all(x == np.iinfo("i4").min),
63
+ np.dtype("i1"): lambda x: np.all(x == np.iinfo("i1").min),
64
+ }
65
+ return bool(nan_func[data.dtype](data))
66
+
67
+
68
+ def convert_batch_dataset_to_batch_list(batch_data: BatchDataset, dataset_type: DatasetType | None = None) -> BatchList:
69
+ """
70
+ Convert batch datasets to a list of individual batches
71
+
72
+ Args:
73
+ batch_data: a batch dataset for power-grid-model
74
+ dataset_type: type of dataset
75
+
76
+ Returns:
77
+ A list of individual batches
78
+ """
79
+
80
+ # If the batch data is empty, return an empty list
81
+ if len(batch_data) == 0:
82
+ return []
83
+
84
+ n_batches = get_and_verify_batch_sizes(batch_data=batch_data, dataset_type=dataset_type)
85
+
86
+ # Initialize an empty list with dictionaries
87
+ # Note that [{}] * n_batches would result in n copies of the same dict.
88
+ list_data: BatchList = [{} for _ in range(n_batches)]
89
+
90
+ # While the number of batches must be the same for each component, the structure (2d numpy array or indptr/data)
91
+ # doesn't have to be. Therefore, we'll check the structure for each component and copy the data accordingly.
92
+ for component, data in batch_data.items():
93
+ component_data_checks(data, component)
94
+ component_batches: Sequence[SingleComponentData]
95
+ if is_sparse(data):
96
+ component_batches = split_sparse_batch_data_in_batches(cast(SparseBatchData, data), component)
97
+ else:
98
+ component_batches = split_dense_batch_data_in_batches(cast(SingleComponentData, data), batch_size=n_batches)
99
+ for i, batch in enumerate(component_batches):
100
+ if (isinstance(batch, dict) and batch) or (isinstance(batch, np.ndarray) and batch.size > 0):
101
+ list_data[i][component] = batch
102
+ return list_data
103
+
104
+
105
+ def get_and_verify_batch_sizes(batch_data: BatchDataset, dataset_type: DatasetType | None = None) -> int:
106
+ """
107
+ Determine the number of batches for each component and verify that each component has the same number of batches
108
+
109
+ Args:
110
+ batch_data: a batch dataset for power-grid-model
111
+ dataset_type: type of dataset
112
+
113
+ Returns:
114
+ The number of batches
115
+ """
116
+
117
+ if dataset_type is None and any(is_columnar(v) and not is_sparse(v) for v in batch_data.values()):
118
+ dataset_type = get_dataset_type(batch_data)
119
+
120
+ n_batch_size = 0
121
+ checked_components: list[ComponentType] = []
122
+ for component, data in batch_data.items():
123
+ n_component_batch_size = get_batch_size(data, dataset_type, component)
124
+ if checked_components and n_component_batch_size != n_batch_size:
125
+ if len(checked_components) == 1:
126
+ checked_components_str = f"'{checked_components.pop()}'"
127
+ else:
128
+ str_checked_components = [str(component) for component in checked_components]
129
+ checked_components_str = "/".join(sorted(str_checked_components))
130
+ raise ValueError(
131
+ f"Inconsistent number of batches in batch data. "
132
+ f"Component '{component}' contains {n_component_batch_size} batches, "
133
+ f"while {checked_components_str} contained {n_batch_size} batches."
134
+ )
135
+ n_batch_size = n_component_batch_size
136
+ checked_components.append(component)
137
+ return n_batch_size
138
+
139
+
140
+ def get_batch_size(
141
+ batch_data: BatchComponentData, dataset_type: DatasetType | None = None, component: ComponentType | None = None
142
+ ) -> int:
143
+ """
144
+ Determine the number of batches and verify the data structure while we're at it. Note only batch data is supported.
145
+ Note: SingleColumnarData would get treated as batch by this function.
146
+
147
+ Args:
148
+ batch_data: a batch array for power-grid-model
149
+ dataset_type: type of dataset
150
+ component: name of component
151
+
152
+ Raises:
153
+ ValueError: when the type for data_filter is incorrect
154
+
155
+ Returns:
156
+ The number of batches
157
+ """
158
+ component_data_checks(batch_data)
159
+ if is_sparse(batch_data):
160
+ indptr = batch_data["indptr"]
161
+ return indptr.size - 1
162
+
163
+ if not is_columnar(batch_data):
164
+ sym_array = batch_data
165
+ else:
166
+ batch_data = cast(DenseBatchColumnarData, batch_data)
167
+ if component is None or dataset_type is None:
168
+ raise ValueError("Cannot deduce batch size for given columnar data without a dataset type or component")
169
+ sym_attributes, _ = _get_sym_or_asym_attributes(dataset_type, component)
170
+ for attribute, array in batch_data.items():
171
+ if attribute in sym_attributes:
172
+ break
173
+ if array.ndim == SINGLE_DATASET_NDIM:
174
+ raise TypeError("Incorrect dimension present in batch data.")
175
+ if array.ndim == BATCH_DATASET_NDIM:
176
+ return 1
177
+ return array.shape[0]
178
+ sym_array = next(iter(batch_data.values()))
179
+
180
+ sym_array = cast(DenseBatchArray | BatchColumn, sym_array)
181
+ if sym_array.ndim not in (SINGLE_DATASET_NDIM, BATCH_DATASET_NDIM):
182
+ raise TypeError("Incorrect dimension present in batch data.")
183
+ if sym_array.ndim == SINGLE_DATASET_NDIM:
184
+ return 1
185
+ return sym_array.shape[0]
186
+
187
+
188
+ def _get_sym_or_asym_attributes(dataset_type: DatasetType, component: ComponentType):
189
+ """Segregate into symmetric of asymmetric attribute.
190
+ The asymmetric attribute is per phase value and of extra dimension.
191
+
192
+ Args:
193
+ dataset_type (DatasetType): dataset type
194
+ component (ComponentType): component name
195
+
196
+ Returns:
197
+ symmetrical and asymmetrical attributes
198
+ """
199
+ asym_attributes = set()
200
+ sym_attributes = set()
201
+ for meta_dataset_type, dataset_meta in power_grid_meta_data.items():
202
+ if dataset_type != meta_dataset_type:
203
+ continue
204
+ for component_name_meta, component_meta in dataset_meta.items():
205
+ if component != component_name_meta:
206
+ continue
207
+ if component_meta.dtype.names is None:
208
+ raise ValueError("No attributes available in meta")
209
+ for attribute in component_meta.dtype.names:
210
+ if component_meta.dtype[attribute].shape == (3,):
211
+ asym_attributes.add(attribute)
212
+ if component_meta.dtype[attribute].shape == ():
213
+ sym_attributes.add(attribute)
214
+ return sym_attributes, asym_attributes
215
+
216
+
217
+ def _split_numpy_array_in_batches(
218
+ data: DenseBatchArray | SingleArray | SingleColumn | BatchColumn,
219
+ ) -> list[SingleArray] | list[SingleColumn]:
220
+ """
221
+ Split a single dense numpy array into one or more batches
222
+
223
+ Args:
224
+ data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table.
225
+
226
+ Returns:
227
+ A list with a single numpy structured array per batch
228
+ """
229
+ if data.ndim == SINGLE_DATASET_NDIM:
230
+ return [data]
231
+ if data.ndim in [2, 3]:
232
+ return [data[i, ...] for i in range(data.shape[0])]
233
+ raise ValueError("Dimension of the component data is invalid.")
234
+
235
+
236
+ def split_dense_batch_data_in_batches(
237
+ data: SingleComponentData | DenseBatchData, batch_size: int
238
+ ) -> list[SingleComponentData]:
239
+ """
240
+ Split a single dense numpy array into one or more batches
241
+
242
+ Args:
243
+ data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table.
244
+ batch_size: size of batch
245
+
246
+ Returns:
247
+ A list with a single component data per scenario
248
+ """
249
+ if isinstance(data, np.ndarray):
250
+ return cast(list[SingleComponentData], _split_numpy_array_in_batches(data))
251
+
252
+ scenarios_per_attribute = {
253
+ attribute: _split_numpy_array_in_batches(attribute_data) for attribute, attribute_data in data.items()
254
+ }
255
+
256
+ return [
257
+ {attribute: scenarios_per_attribute[attribute][scenario] for attribute, attribute_data in data.items()}
258
+ for scenario in range(batch_size)
259
+ ]
260
+
261
+
262
+ def split_sparse_batch_data_in_batches(
263
+ batch_data: SparseBatchData, component: ComponentType
264
+ ) -> list[SingleComponentData]:
265
+ """
266
+ Split a single numpy array representing, a compressed sparse structure, into one or more batches
267
+
268
+ Args:
269
+ batch_data: Sparse batch data
270
+ component: The name of the component to which the data belongs, only used for errors.
271
+
272
+ Returns:
273
+ A list with a single numpy structured array per batch
274
+ """
275
+ for key in ["indptr", "data"]:
276
+ if key not in batch_data:
277
+ raise KeyError(
278
+ f"Missing '{key}' in sparse batch data for '{component}' "
279
+ "(expected a python dictionary containing two keys: 'indptr' and 'data')."
280
+ )
281
+
282
+ data = batch_data["data"]
283
+ indptr = batch_data["indptr"]
284
+
285
+ def _split_buffer(buffer: np.ndarray, scenario: int) -> SingleArray:
286
+ if not isinstance(buffer, np.ndarray) or buffer.ndim != 1:
287
+ raise TypeError(
288
+ f"Invalid data type {type(buffer).__name__} in sparse batch data for '{component}' "
289
+ "(should be a 1D Numpy structured array (i.e. a single 'table'))."
290
+ )
291
+
292
+ if not isinstance(indptr, np.ndarray) or indptr.ndim != 1 or not np.issubdtype(indptr.dtype, np.integer):
293
+ raise TypeError(
294
+ f"Invalid indptr data type {type(indptr).__name__} in batch data for '{component}' "
295
+ "(should be a 1D Numpy array (i.e. a single 'list'), "
296
+ "containing indices (i.e. integers))."
297
+ )
298
+
299
+ if indptr[0] != 0 or indptr[-1] != len(buffer) or indptr[scenario] > indptr[scenario + 1]:
300
+ raise TypeError(
301
+ f"Invalid indptr in batch data for '{component}' "
302
+ f"(should start with 0, end with the number of objects ({len(buffer)}) "
303
+ "and be monotonic increasing)."
304
+ )
305
+
306
+ return buffer[indptr[scenario] : indptr[scenario + 1]]
307
+
308
+ def _get_scenario(scenario: int) -> SingleComponentData:
309
+ if isinstance(data, dict):
310
+ return {attribute: _split_buffer(attribute_data, scenario) for attribute, attribute_data in data.items()}
311
+ return _split_buffer(data, scenario)
312
+
313
+ return [_get_scenario(i) for i in range(len(indptr) - 1)]
314
+
315
+
316
+ def convert_dataset_to_python_dataset(data: Dataset) -> PythonDataset:
317
+ """
318
+ Convert internal numpy arrays to native python data
319
+ If an attribute is not available (NaN value), it will not be exported.
320
+
321
+ Args:
322
+ data: A single or batch dataset for power-grid-model
323
+ Returns:
324
+ A python dict for single dataset
325
+ A python list for batch dataset
326
+ """
327
+
328
+ # Check if the dataset is a single dataset or batch dataset
329
+ # It is batch dataset if it is 2D array or a indptr/data structure
330
+ is_batch: bool | None = None
331
+ for component, array in data.items():
332
+ is_dense_batch = isinstance(array, np.ndarray) and array.ndim == BATCH_DATASET_NDIM
333
+ is_sparse_batch = isinstance(array, dict) and "indptr" in array and "data" in array
334
+ if is_batch is not None and is_batch != (is_dense_batch or is_sparse_batch):
335
+ raise ValueError(
336
+ f"Mixed {'' if is_batch else 'non-'}batch data "
337
+ f"with {'non-' if is_batch else ''}batch data ({component})."
338
+ )
339
+ is_batch = is_dense_batch or is_sparse_batch
340
+
341
+ # If it is a batch, convert the batch data to a list of batches, then convert each batch individually.
342
+ if is_batch:
343
+ # We have established that this is batch data, so let's tell the type checker that this is a BatchDataset
344
+ data = cast(BatchDataset, data)
345
+ list_data = convert_batch_dataset_to_batch_list(data)
346
+ return [convert_single_dataset_to_python_single_dataset(data=x) for x in list_data]
347
+
348
+ # We have established that this is not batch data, so let's tell the type checker that this is a SingleDataset
349
+ data = cast(SingleDataset, data)
350
+ return convert_single_dataset_to_python_single_dataset(data=data)
351
+
352
+
353
+ def convert_single_dataset_to_python_single_dataset(
354
+ data: SingleDataset,
355
+ ) -> SinglePythonDataset:
356
+ """
357
+ Convert internal numpy arrays to native python data
358
+ If an attribute is not available (NaN value), it will not be exported.
359
+
360
+ Args:
361
+ data: A single dataset for power-grid-model
362
+
363
+ Returns:
364
+ A python dict for single dataset
365
+ """
366
+
367
+ # Convert each numpy array to a list of objects, which contains only the non-NaN attributes:
368
+ # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]}
369
+ def _convert_component(objects: SingleComponentData):
370
+ # This should be a single data set
371
+ if not isinstance(objects, np.ndarray) or objects.ndim != 1 or objects.dtype.names is None:
372
+ raise ValueError("Invalid data format")
373
+
374
+ return [
375
+ {attribute: obj[attribute].tolist() for attribute in objects.dtype.names if not is_nan(obj[attribute])}
376
+ for obj in objects
377
+ ]
378
+
379
+ return {component: _convert_component(objects) for component, objects in data.items()}
380
+
381
+
382
+ def compatibility_convert_row_columnar_dataset(
383
+ data: Dataset,
384
+ data_filter: ComponentAttributeMapping,
385
+ dataset_type: DatasetType,
386
+ available_components: list[ComponentType] | None = None,
387
+ ) -> Dataset:
388
+ """Temporary function to transform row, column or mixed based datasets to a full row or column based dataset as per
389
+ the data_filter. The purpose of this function is to mimic columnar data and transform back to row data without any
390
+ memory footprint benefits.
391
+ Note: Copies are made on a per-component basis; if a component is row based in both the input and the requested
392
+ output, that componened is returned without a copy.
393
+
394
+ Args:
395
+ data (Dataset): dataset to convert
396
+ data_filter (ComponentAttributeMapping): desired component and attribute mapping
397
+ dataset_type (DatasetType): type of dataset (e.g., input, update or [sym | asym | sc]_output)
398
+ available_components (list[ComponentType] | None): available components in model
399
+
400
+ Returns:
401
+ Dataset: converted dataset
402
+ """
403
+ if available_components is None:
404
+ available_components = list(data.keys())
405
+
406
+ processed_data_filter = process_data_filter(dataset_type, data_filter, available_components)
407
+
408
+ result_data: Dataset = {}
409
+ for comp_name, attrs in processed_data_filter.items():
410
+ if comp_name not in data:
411
+ continue
412
+
413
+ sub_data = _extract_data_from_component_data(data[comp_name])
414
+ converted_sub_data = _convert_data_to_row_or_columnar(
415
+ data=sub_data,
416
+ comp_name=comp_name,
417
+ dataset_type=dataset_type,
418
+ attrs=attrs,
419
+ )
420
+
421
+ if is_sparse(data[comp_name]):
422
+ result_data[comp_name] = {"indptr": _extract_indptr(data[comp_name]), "data": converted_sub_data}
423
+ else:
424
+ result_data[comp_name] = converted_sub_data
425
+ return result_data
426
+
427
+
428
+ def _convert_data_to_row_or_columnar(
429
+ data: SingleComponentData,
430
+ comp_name: ComponentType,
431
+ dataset_type: DatasetType,
432
+ attrs: set[str] | list[str] | None | ComponentAttributeFilterOptions,
433
+ ) -> SingleComponentData:
434
+ """Converts row or columnar component data to row or columnar component data as requested in `attrs`."""
435
+ if attrs is None:
436
+ if not is_columnar(data):
437
+ return data
438
+ data = cast(SingleColumnarData, data)
439
+ output_array = initialize_array(dataset_type, comp_name, next(iter(data.values())).shape)
440
+ for k in data:
441
+ output_array[k] = data[k]
442
+ return output_array
443
+
444
+ if isinstance(attrs, (list, set)) and len(attrs) == 0:
445
+ return {}
446
+ if isinstance(attrs, ComponentAttributeFilterOptions):
447
+ names = cast(SingleArray, data).dtype.names if not is_columnar(data) else cast(SingleColumnarData, data).keys()
448
+ if names is None:
449
+ raise ValueError("No attributes available in meta")
450
+ return {attr: deepcopy(data[attr]) for attr in names}
451
+ return {attr: deepcopy(data[attr]) for attr in attrs}
452
+
453
+
454
+ def process_data_filter(
455
+ dataset_type: DatasetType,
456
+ data_filter: ComponentAttributeMapping,
457
+ available_components: list[ComponentType],
458
+ ) -> ComponentAttributeMappingDict:
459
+ """Checks valid type for data_filter. Also checks for any invalid component names and attribute names.
460
+
461
+ Args:
462
+ dataset_type (DatasetType): the type of output that the user will see (as per the calculation options)
463
+ data_filter (ComponentAttributeMapping): data_filter provided by user
464
+ available_components (list[ComponentType]): all components available in model instance or data
465
+
466
+ Returns:
467
+ ComponentAttributeMappingDict: processed data_filter in a dictionary
468
+ """
469
+ if data_filter is None:
470
+ processed_data_filter: ComponentAttributeMappingDict = {ComponentType[k]: None for k in available_components}
471
+ elif isinstance(data_filter, ComponentAttributeFilterOptions):
472
+ processed_data_filter = {ComponentType[k]: data_filter for k in available_components}
473
+ elif isinstance(data_filter, (list, set)):
474
+ processed_data_filter = {ComponentType[k]: None for k in data_filter}
475
+ elif isinstance(data_filter, dict) and all(
476
+ attrs is None or isinstance(attrs, (set, list, ComponentAttributeFilterOptions))
477
+ for attrs in data_filter.values()
478
+ ):
479
+ processed_data_filter = data_filter
480
+ else:
481
+ raise ValueError(f"Invalid filter provided: {data_filter}")
482
+
483
+ validate_data_filter(processed_data_filter, dataset_type, available_components)
484
+ return processed_data_filter
485
+
486
+
487
+ def validate_data_filter(
488
+ data_filter: ComponentAttributeMappingDict,
489
+ dataset_type: DatasetType,
490
+ available_components: list[ComponentType],
491
+ ) -> None:
492
+ """Raise error if some specified components or attributes are unknown.
493
+
494
+ Args:
495
+ data_filter (ComponentAttributeMappingDict): Processed component to attribtue dictionary
496
+ dataset_type (DatasetType): Type of dataset
497
+ available_components (list[ComponentType]): all components available in model instance or data
498
+
499
+ Raises:
500
+ ValueError: when the type for data_filter is incorrect
501
+ KeyError: with "unknown component types" for any unknown components
502
+ KeyError: with "unknown attributes" for unknown attribute(s) for a known component
503
+ """
504
+ dataset_meta = power_grid_meta_data[dataset_type]
505
+
506
+ for source, components in {
507
+ "data_filter": data_filter.keys(),
508
+ "data": available_components,
509
+ }.items():
510
+ unknown_components = [x for x in components if x not in dataset_meta]
511
+ if unknown_components:
512
+ raise KeyError(f"The following specified component types are unknown:{unknown_components} in {source}")
513
+
514
+ unknown_attributes = {}
515
+ for comp_name, attrs in data_filter.items():
516
+ if attrs is None or isinstance(attrs, ComponentAttributeFilterOptions):
517
+ continue
518
+
519
+ attr_names = dataset_meta[comp_name].dtype.names
520
+ diff = set(cast(set[str] | list[str], attrs))
521
+ if attr_names is not None:
522
+ diff = diff.difference(attr_names)
523
+ if diff != set():
524
+ unknown_attributes[comp_name] = diff
525
+
526
+ if unknown_attributes:
527
+ raise KeyError(f"The following specified attributes are unknown: {unknown_attributes} in data_filter")
528
+
529
+
530
+ def is_sparse(component_data: ComponentData) -> bool:
531
+ """Check if component_data is sparse or dense. Only batch data can be sparse."""
532
+ return isinstance(component_data, dict) and set(component_data.keys()) == {
533
+ "indptr",
534
+ "data",
535
+ }
536
+
537
+
538
+ def is_columnar(component_data: ComponentData) -> bool:
539
+ """Check if component_data is columnar or row based"""
540
+ if is_sparse(component_data):
541
+ return not isinstance(component_data["data"], np.ndarray)
542
+ return not isinstance(component_data, np.ndarray)
543
+
544
+
545
+ def is_nan_or_default(x: np.ndarray) -> np.ndarray:
546
+ """
547
+ Check if elements in the array are NaN or equal to the min of its dtype.
548
+
549
+ Args:
550
+ x: A NumPy array to check.
551
+
552
+ Returns:
553
+ A boolean NumPy array where each element is True if the corresponding element in x is NaN
554
+ or min of its dtype, and False otherwise.
555
+ """
556
+ if x.dtype == np.float64:
557
+ return np.isnan(x)
558
+ if x.dtype in (np.int32, np.int8):
559
+ return x == np.iinfo(x.dtype).min
560
+ raise TypeError(f"Unsupported data type: {x.dtype}")
561
+
562
+
563
+ def is_nan_or_equivalent(array) -> bool:
564
+ """
565
+ Check if the array contains only nan values or equivalent nan values for specific data types.
566
+ This is the aggregrated version of `is_nan_or_default` for the whole array.
567
+
568
+ Args:
569
+ array: The array to check.
570
+
571
+ Returns:
572
+ bool: True if the array contains only nan or equivalent nan values, False otherwise.
573
+ """
574
+ return isinstance(array, np.ndarray) and bool(
575
+ (array.dtype == np.float64 and np.isnan(array).all())
576
+ or (array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min))
577
+ )
578
+
579
+
580
+ def _check_sparse_dense(component_data: ComponentData, err_msg_suffixed: str) -> ComponentData:
581
+ if is_sparse(component_data):
582
+ indptr = component_data["indptr"]
583
+ if not isinstance(indptr, np.ndarray):
584
+ raise TypeError(err_msg_suffixed.format(f"Invalid indptr type {type(indptr).__name__}. "))
585
+ sub_data = component_data["data"]
586
+ elif isinstance(component_data, dict) and ("indptr" in component_data or "data" in component_data):
587
+ missing_element = "indptr" if "indptr" not in component_data else "data"
588
+ raise KeyError(err_msg_suffixed.format(f"Missing '{missing_element}' in sparse batch data. "))
589
+ else:
590
+ sub_data = component_data
591
+ return sub_data
592
+
593
+
594
+ def _check_columnar_row(sub_data: ComponentData, err_msg_suffixed: str) -> None:
595
+ if is_columnar(sub_data):
596
+ if not isinstance(sub_data, dict):
597
+ raise TypeError(err_msg_suffixed.format(""))
598
+ for attribute, attribute_array in sub_data.items():
599
+ if not isinstance(attribute_array, np.ndarray):
600
+ raise TypeError(err_msg_suffixed.format(f"'{attribute}' attribute. "))
601
+ if attribute_array.ndim not in [1, 2, 3]:
602
+ raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {attribute_array.ndim}"))
603
+ elif not isinstance(sub_data, np.ndarray):
604
+ raise TypeError(err_msg_suffixed.format(f"Invalid data type {type(sub_data).__name__} "))
605
+ elif isinstance(sub_data, np.ndarray) and sub_data.ndim not in [1, 2]:
606
+ raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {sub_data.ndim}. "))
607
+
608
+
609
+ def component_data_checks(component_data: ComponentData, component=None) -> None:
610
+ """Checks if component_data is of ComponentData and raises ValueError if its not"""
611
+ component_name = f"'{component}'" if component is not None else ""
612
+ err_msg = f"Invalid data for {component_name} component. {{0}}"
613
+ err_msg_suffixed = err_msg + "Expecting a 1D/2D Numpy structured array or a dictionary of such."
614
+
615
+ sub_data = _check_sparse_dense(component_data, err_msg_suffixed)
616
+ _check_columnar_row(sub_data, err_msg_suffixed)
617
+
618
+
619
+ def _extract_indptr(data: ComponentData) -> IndexPointer: # pragma: no cover
620
+ """returns indptr and checks if its valid
621
+
622
+ Args:
623
+ data (ComponentData): The component data
624
+
625
+ Raises:
626
+ TypeError: if indptr is invalid or is not available
627
+
628
+ Returns:
629
+ IndexPointer: indptr if present
630
+ """
631
+ if not is_sparse(data):
632
+ raise TypeError("Not sparse data")
633
+ indptr = data["indptr"]
634
+ if not isinstance(indptr, np.ndarray):
635
+ raise TypeError("indptr is not a 1D numpy array")
636
+ if indptr.ndim != 1:
637
+ raise TypeError("indptr is not a 1D numpy array")
638
+ return indptr
639
+
640
+
641
+ def _extract_columnar_data(
642
+ data: ComponentData, is_batch: bool | None = None
643
+ ) -> SingleColumnarData | DenseBatchColumnarData: # pragma: no cover
644
+ """returns the contents of the columnar data.
645
+
646
+ Args:
647
+ data (ComponentData): component data
648
+ is_batch (bool | None, optional): If given data is batch. Skips batch check if provided None.
649
+
650
+ Raises:
651
+ TypeError: if data is not columnar or invalid data
652
+
653
+ Returns:
654
+ SingleColumnarData | DenseBatchColumnarData: the contents of columnar data
655
+ """
656
+ not_columnar_data_message = "Expected columnar data"
657
+
658
+ if is_batch is None:
659
+ allowed_dims = [1, 2, 3]
660
+ elif is_batch:
661
+ allowed_dims = [2, 3]
662
+ else:
663
+ allowed_dims = [1, 2]
664
+
665
+ sub_data = data["data"] if is_sparse(data) else data
666
+
667
+ if not isinstance(sub_data, dict):
668
+ raise TypeError(not_columnar_data_message)
669
+ for attribute, attribute_array in sub_data.items():
670
+ if not isinstance(attribute_array, np.ndarray) or not isinstance(attribute, str):
671
+ raise TypeError(not_columnar_data_message)
672
+ if attribute_array.ndim not in allowed_dims:
673
+ raise TypeError(not_columnar_data_message)
674
+ return cast(SingleColumnarData | DenseBatchColumnarData, sub_data)
675
+
676
+
677
+ def _extract_row_based_data(
678
+ data: ComponentData, is_batch: bool | None = None
679
+ ) -> SingleArray | DenseBatchArray: # pragma: no cover
680
+ """returns the contents of the row based data
681
+
682
+ Args:
683
+ data (ComponentData): component data
684
+ is_batch (bool | None, optional): If given data is batch. Skips batch check if provided None.
685
+
686
+ Raises:
687
+ TypeError: if data is not row based or invalid data
688
+
689
+ Returns:
690
+ SingleArray | DenseBatchArray: the contents of row based data
691
+ """
692
+ if is_batch is None:
693
+ allowed_dims = [1, 2]
694
+ elif is_batch:
695
+ allowed_dims = [2]
696
+ else:
697
+ allowed_dims = [1]
698
+
699
+ sub_data = data["data"] if is_sparse(data) else data
700
+
701
+ if not isinstance(sub_data, np.ndarray):
702
+ raise TypeError("Expected row based data")
703
+ if sub_data.ndim not in allowed_dims:
704
+ raise TypeError("Expected row based data")
705
+ return sub_data
706
+
707
+
708
+ def _extract_data_from_component_data(data: ComponentData, is_batch: bool | None = None):
709
+ return _extract_columnar_data(data, is_batch) if is_columnar(data) else _extract_row_based_data(data, is_batch)
710
+
711
+
712
+ def _extract_contents_from_data(data: ComponentData):
713
+ return data["data"] if is_sparse(data) else data
714
+
715
+
716
+ def check_indptr_consistency(indptr: IndexPointer, batch_size: int | None, contents_size: int):
717
+ """checks if an indptr is valid. Batch size check is optional.
718
+
719
+ Args:
720
+ indptr (IndexPointer): The indptr array
721
+ batch_size (int | None): number of scenarios
722
+ contents_size (int): total number of elements in all scenarios
723
+
724
+ Raises:
725
+ ValueError: If indptr is invalid
726
+ """
727
+ if indptr[0] != 0 or indptr[-1] != contents_size:
728
+ raise ValueError(f"indptr should start from zero and end at size of data array. {VALIDATOR_MSG}")
729
+ if np.any(np.diff(indptr) < 0):
730
+ raise ValueError(f"indptr should be increasing. {VALIDATOR_MSG}")
731
+
732
+ actual_batch_size = indptr.size - 1
733
+ if batch_size is not None and batch_size != actual_batch_size:
734
+ raise ValueError(
735
+ f"Incorrect/inconsistent batch size provided: {actual_batch_size} scenarios provided "
736
+ f"but {batch_size} scenarios expected. {VALIDATOR_MSG}"
737
+ )
738
+
739
+
740
+ def get_dataset_type(data: Dataset) -> DatasetType:
741
+ """
742
+ Deduce the dataset type from the provided dataset.
743
+
744
+ Args:
745
+ data: the dataset
746
+
747
+ Raises:
748
+ ValueError
749
+ if the dataset type cannot be deduced because multiple dataset types match the format
750
+ (probably because the data contained no supported components, e.g. was empty)
751
+ PowerGridError
752
+ if no dataset type matches the format of the data
753
+ (probably because the data contained conflicting data formats)
754
+
755
+ Returns:
756
+ The dataset type.
757
+ """
758
+ candidates = set(power_grid_meta_data.keys())
759
+
760
+ if all(is_columnar(v) for v in data.values()):
761
+ raise ValueError("The dataset type could not be deduced. At least one component should have row based data.")
762
+
763
+ for dataset_type, dataset_metadatas in power_grid_meta_data.items():
764
+ for component, dataset_metadata in dataset_metadatas.items():
765
+ if component not in data or is_columnar(data[component]):
766
+ continue
767
+ component_data = data[component]
768
+
769
+ component_dtype = component_data["data"].dtype if is_sparse(component_data) else component_data.dtype
770
+ if component_dtype is not dataset_metadata.dtype:
771
+ candidates.discard(dataset_type)
772
+ break
773
+
774
+ if not candidates:
775
+ raise PowerGridError(
776
+ "The dataset type could not be deduced because no type matches the data. "
777
+ "This usually means inconsistent data was provided."
778
+ )
779
+ if len(candidates) > 1:
780
+ raise ValueError("The dataset type could not be deduced because multiple dataset types match the data.")
781
+
782
+ return next(iter(candidates))
783
+
784
+
785
+ def get_comp_size(comp_data: SingleColumnarData | SingleArray) -> int:
786
+ """
787
+ Get the number of elements in the comp_data of a single dataset.
788
+
789
+ Args:
790
+ comp_data: Columnar or row based data of a single batch
791
+
792
+ Returns:
793
+ Number of elements in the component
794
+ """
795
+ if not is_columnar(comp_data):
796
+ return len(comp_data)
797
+ comp_data = cast(SingleColumnarData, comp_data)
798
+ return len(next(iter(comp_data.values())))