power-grid-model 1.11.23__py3-none-win_amd64.whl → 1.12.70__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. power_grid_model/__init__.py +54 -52
  2. power_grid_model/_core/__init__.py +3 -3
  3. power_grid_model/_core/buffer_handling.py +493 -478
  4. power_grid_model/_core/data_handling.py +195 -141
  5. power_grid_model/_core/data_types.py +143 -132
  6. power_grid_model/_core/dataset_definitions.py +109 -108
  7. power_grid_model/_core/enum.py +226 -226
  8. power_grid_model/_core/error_handling.py +206 -205
  9. power_grid_model/_core/errors.py +130 -126
  10. power_grid_model/_core/index_integer.py +17 -17
  11. power_grid_model/_core/options.py +71 -70
  12. power_grid_model/_core/power_grid_core.py +563 -581
  13. power_grid_model/_core/power_grid_dataset.py +535 -534
  14. power_grid_model/_core/power_grid_meta.py +257 -243
  15. power_grid_model/_core/power_grid_model.py +969 -687
  16. power_grid_model/_core/power_grid_model_c/__init__.py +3 -0
  17. power_grid_model/_core/power_grid_model_c/bin/power_grid_model_c.dll +0 -0
  18. power_grid_model/_core/power_grid_model_c/get_pgm_dll_path.py +63 -0
  19. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/basics.h +255 -0
  20. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/buffer.h +108 -0
  21. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset.h +316 -0
  22. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset_definitions.h +1052 -0
  23. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/handle.h +99 -0
  24. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/meta_data.h +189 -0
  25. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/model.h +125 -0
  26. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/options.h +142 -0
  27. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/serialization.h +118 -0
  28. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c.h +36 -0
  29. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/basics.hpp +65 -0
  30. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/buffer.hpp +61 -0
  31. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/dataset.hpp +220 -0
  32. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/handle.hpp +108 -0
  33. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/meta_data.hpp +84 -0
  34. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/model.hpp +63 -0
  35. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/options.hpp +52 -0
  36. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/serialization.hpp +124 -0
  37. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/utils.hpp +81 -0
  38. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp.hpp +19 -0
  39. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfig.cmake +37 -0
  40. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfigVersion.cmake +65 -0
  41. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets-release.cmake +19 -0
  42. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets.cmake +144 -0
  43. power_grid_model/_core/power_grid_model_c/lib/power_grid_model_c.lib +0 -0
  44. power_grid_model/_core/power_grid_model_c/share/LICENSE +292 -0
  45. power_grid_model/_core/power_grid_model_c/share/README.md +15 -0
  46. power_grid_model/_core/serialization.py +317 -317
  47. power_grid_model/_core/typing.py +20 -20
  48. power_grid_model/_core/utils.py +798 -789
  49. power_grid_model/data_types.py +321 -307
  50. power_grid_model/enum.py +27 -27
  51. power_grid_model/errors.py +37 -37
  52. power_grid_model/typing.py +43 -43
  53. power_grid_model/utils.py +473 -465
  54. power_grid_model/validation/__init__.py +25 -25
  55. power_grid_model/validation/_rules.py +1171 -1175
  56. power_grid_model/validation/_validation.py +1172 -1158
  57. power_grid_model/validation/assertions.py +93 -93
  58. power_grid_model/validation/errors.py +602 -588
  59. power_grid_model/validation/utils.py +313 -321
  60. {power_grid_model-1.11.23.dist-info → power_grid_model-1.12.70.dist-info}/METADATA +178 -180
  61. power_grid_model-1.12.70.dist-info/RECORD +65 -0
  62. {power_grid_model-1.11.23.dist-info → power_grid_model-1.12.70.dist-info}/WHEEL +1 -1
  63. power_grid_model-1.12.70.dist-info/entry_points.txt +3 -0
  64. power_grid_model/_core/_power_grid_core.dll +0 -0
  65. power_grid_model-1.11.23.dist-info/RECORD +0 -36
  66. power_grid_model-1.11.23.dist-info/top_level.txt +0 -1
  67. {power_grid_model-1.11.23.dist-info → power_grid_model-1.12.70.dist-info}/licenses/LICENSE +0 -0
@@ -1,321 +1,313 @@
1
- # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """
6
- Utilities used for validation. Only errors_to_string() is intended for end users.
7
- """
8
-
9
- import re
10
- from typing import Any, cast
11
-
12
- import numpy as np
13
-
14
- from power_grid_model import power_grid_meta_data
15
- from power_grid_model._core.dataset_definitions import (
16
- ComponentType,
17
- ComponentTypeLike as _ComponentTypeLike,
18
- ComponentTypeVar,
19
- DatasetType,
20
- _str_to_component_type,
21
- )
22
- from power_grid_model.data_types import SingleArray, SingleComponentData, SingleDataset
23
- from power_grid_model.validation.errors import ValidationError
24
-
25
-
26
- def _eval_expression(data: np.ndarray, expression: int | float | str) -> np.ndarray:
27
- """
28
- Wrapper function that checks the type of the 'expression'. If the expression is a string, it is assumed to be a
29
- field expression and the expression is validated. Otherwise it is assumed to be a numerical value and the value
30
- is casted to a numpy 'array'.
31
-
32
- Args:
33
- data: A numpy structured array
34
- expression: A numerical value, or a string, representing a (combination of) field(s)
35
-
36
- Returns: The number or an evaluation of the field name(s) in the data, always represented as a Numpy 'array'.
37
-
38
- Examples:
39
- 123 -> np.array(123)
40
- 123.4 -> np.array(123.4)
41
- 'value' -> data['value']
42
- 'foo/bar' -> data['foo'] / data[bar]
43
-
44
- """
45
- if isinstance(expression, str):
46
- return _eval_field_expression(data, expression)
47
- return np.array(expression)
48
-
49
-
50
- def _eval_field_expression(data: np.ndarray, expression: str) -> np.ndarray:
51
- """
52
- A field expression can either be the name of a field (e.g. 'field_x') in the data, or a ratio between two fields
53
- (e.g. 'field_x / field_y'). The expression is checked on validity and then the fields are checked to be present in
54
- the data. If the expression is a single field name, the field is returned. If it is a ratio, the ratio is
55
- calculated and returned. Values divided by 0 will result in nan values without warning.
56
-
57
- Args:
58
- data: A numpy structured array
59
- expression: A string, representing a (combination of) field(s)
60
-
61
- Expression should be a combination of:
62
- - field names (may contain lower case letters, numbers and underscores)
63
- - a single mathematical operator /
64
-
65
- Returns: An evaluation of the field name(s) in the data.
66
-
67
- Examples:
68
- 'value' -> data['value']
69
- 'foo/bar' -> data['foo'] / data['bar']
70
-
71
- """
72
- # Validate the expression
73
- match = re.fullmatch(r"[a-z][a-z0-9_]*(\s*/\s*[a-z][a-z0-9_]*)?", expression)
74
- if not match:
75
- raise ValueError(f"Invalid field expression '{expression}'")
76
-
77
- if data.dtype.names is None:
78
- raise ValueError("No attributes available in meta")
79
-
80
- # Find all field names and check if they exist in the dataset
81
- fields = [f.strip() for f in expression.split("/")]
82
- for field in fields:
83
- if field not in data.dtype.names:
84
- raise KeyError(f"Invalid field name {field}")
85
-
86
- if len(fields) == 1:
87
- return data[fields[0]]
88
-
89
- assert len(fields) == 2
90
- zero_div = np.logical_or(np.equal(data[fields[1]], 0.0), np.logical_not(np.isfinite(data[fields[1]])))
91
- if np.any(zero_div):
92
- result = np.full_like(data[fields[0]], np.nan)
93
- np.true_divide(data[fields[0]], data[fields[1]], out=result, where=~zero_div)
94
- return result
95
- return np.true_divide(data[fields[0]], data[fields[1]])
96
-
97
-
98
- def _update_input_data(input_data: SingleDataset, update_data: SingleDataset):
99
- """
100
- Update the input data using the available non-nan values in the update data.
101
- """
102
-
103
- merged_data = {component: array.copy() for component, array in input_data.items()}
104
- for component in update_data:
105
- _update_component_data(component, merged_data[component], update_data[component])
106
- return merged_data
107
-
108
-
109
- def _update_component_data(
110
- component: _ComponentTypeLike, input_data: SingleComponentData, update_data: SingleComponentData
111
- ) -> None:
112
- """
113
- Update the data in a single component data set, with another single component data set,
114
- indexed on the "id" field and only non-NaN values are overwritten.
115
- """
116
- if isinstance(input_data, np.ndarray) and isinstance(update_data, np.ndarray):
117
- return _update_component_array_data(component=component, input_data=input_data, update_data=update_data)
118
-
119
- raise NotImplementedError() # TODO(mgovers): add support for columnar data
120
-
121
-
122
- def _update_component_array_data(
123
- component: _ComponentTypeLike, input_data: SingleArray, update_data: SingleArray
124
- ) -> None:
125
- """
126
- Update the data in a numpy array, with another numpy array,
127
- indexed on the "id" field and only non-NaN values are overwritten.
128
- """
129
- if update_data.dtype.names is None:
130
- raise ValueError("Invalid data format")
131
-
132
- optional_ids_active = (
133
- "id" in update_data.dtype.names
134
- and np.all(update_data["id"] == np.iinfo(update_data["id"].dtype).min)
135
- and len(update_data["id"]) == len(input_data["id"])
136
- )
137
- update_data_ids = input_data["id"] if optional_ids_active else update_data["id"]
138
-
139
- for field in update_data.dtype.names:
140
- if field == "id":
141
- continue
142
- nan = _nan_type(component, field, DatasetType.update)
143
- mask = ~np.isnan(update_data[field]) if np.isnan(nan) else np.not_equal(update_data[field], nan)
144
-
145
- if mask.ndim == 2:
146
- for phase in range(mask.shape[1]):
147
- # find indexers of to-be-updated object
148
- sub_mask = mask[:, phase]
149
- idx = _get_indexer(input_data["id"], update_data_ids[sub_mask])
150
- # update
151
- input_data[field][idx, phase] = update_data[field][sub_mask, phase]
152
- else:
153
- # find indexers of to-be-updated object
154
- idx = _get_indexer(input_data["id"], update_data_ids[mask])
155
- # update
156
- input_data[field][idx] = update_data[field][mask]
157
-
158
-
159
- def errors_to_string(
160
- errors: list[ValidationError] | dict[int, list[ValidationError]] | None,
161
- name: str = "the data",
162
- details: bool = False,
163
- id_lookup: list[str] | dict[int, str] | None = None,
164
- ) -> str:
165
- """
166
- Convert a set of errors (list or dict) to a human readable string representation.
167
-
168
- Args:
169
- errors: The error objects. List for input_data only, dict for batch data.
170
- name: Human understandable name of the dataset, e.g. input_data, or update_data.
171
- details: Display object ids and error specific information.
172
- id_lookup: A list or dict (int->str) containing textual object ids
173
-
174
- Returns:
175
- A human readable string representation of a set of errors.
176
- """
177
- if errors is None or len(errors) == 0:
178
- return f"{name}: OK"
179
- if isinstance(errors, dict):
180
- return "\n".join(errors_to_string(err, f"{name}, batch #{i}", details) for i, err in sorted(errors.items()))
181
- if len(errors) == 1 and not details:
182
- return f"There is a validation error in {name}:\n\t{errors[0]}"
183
- if len(errors) == 1:
184
- msg = f"There is a validation error in {name}:\n"
185
- else:
186
- msg = f"There are {len(errors)} validation errors in {name}:\n"
187
- if details:
188
- for error in errors:
189
- msg += "\n\t" + str(error) + "\n"
190
- msg += "".join(f"\t\t{k}: {v}\n" for k, v in error.get_context(id_lookup).items())
191
- else:
192
- msg += "\n".join(f"{i + 1:>4}. {err}" for i, err in enumerate(errors))
193
- return msg
194
-
195
-
196
- def _nan_type(component: _ComponentTypeLike, field: str, data_type: DatasetType = DatasetType.input):
197
- """
198
- Helper function to retrieve the nan value for a certain field as defined in the power_grid_meta_data.
199
- """
200
- component = _str_to_component_type(component)
201
- return power_grid_meta_data[data_type][component].nans[field]
202
-
203
-
204
- def _get_indexer(source: np.ndarray, target: np.ndarray, default_value: int | None = None) -> np.ndarray:
205
- """
206
- Given array of values from a source and a target dataset.
207
- Find the position of each value in the target dataset in the context of the source dataset.
208
- This is needed to update values in the dataset by id lookup.
209
- Internally this is done by sorting the input ids, then using binary search lookup.
210
-
211
- E.g.: Find the position of each id in an update (target) dataset in the input (source) dataset
212
-
213
- >>> input_ids = [1, 2, 3, 4, 5]
214
- >>> update_ids = [3]
215
- >>> assert _get_indexer(input_ids, update_ids) == np.array([2])
216
-
217
- Args:
218
- source: array of values in the source dataset
219
- target: array of values in the target dataset
220
- default_value (optional): the default index to provide for target values not in source
221
-
222
- Returns:
223
- np.ndarray: array of positions of the values from target dataset in the source dataset
224
- if default_value is None, (source[result] == target)
225
- else, ((source[result] == target) | (source[result] == default_value))
226
-
227
- Raises:
228
- IndexError: if default_value is None and there were values in target that were not in source
229
- """
230
-
231
- permutation_sort = np.argsort(source) # complexity O(N_input * logN_input)
232
- indices = np.searchsorted(source, target, sorter=permutation_sort) # complexity O(N_update * logN_input)
233
-
234
- if default_value is None:
235
- return permutation_sort[indices]
236
-
237
- if len(source) == 0:
238
- return np.full_like(target, fill_value=default_value)
239
-
240
- clipped_indices = np.take(permutation_sort, indices, mode="clip")
241
- return np.where(source[clipped_indices] == target, permutation_sort[clipped_indices], default_value)
242
-
243
-
244
- def _set_default_value(
245
- data: SingleDataset, component: _ComponentTypeLike, field: str, default_value: int | float | np.ndarray
246
- ):
247
- """
248
- This function sets the default value in the data that is to be validated, so the default values are included in the
249
- validation.
250
-
251
- Args:
252
- data: The input/update data set for all components
253
- component: The component of interest
254
- field: The field of interest
255
- default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
256
- input validation, these default values should be included in the validation. It can be a fixed value for the
257
- entire column (int/float) or be different for each element (np.ndarray).
258
-
259
- Returns:
260
-
261
- """
262
- if np.isnan(_nan_type(component, field)):
263
- mask = np.isnan(data[component][field])
264
- else:
265
- mask = data[component][field] == _nan_type(component, field)
266
- if isinstance(default_value, np.ndarray):
267
- data[component][field][mask] = default_value[mask]
268
- else:
269
- data[component][field][mask] = default_value
270
-
271
-
272
- def _get_valid_ids(data: SingleDataset, ref_components: _ComponentTypeLike | list[ComponentTypeVar]) -> list[int]:
273
- """
274
- This function returns the valid IDs specified by all ref_components
275
-
276
- Args:
277
- data: The input/update data set for all components
278
- ref_components: The component or components in which we want to look for ids
279
-
280
- Returns:
281
- list[int]: the list of valid IDs
282
- """
283
- # For convenience, ref_component may be a string and we'll convert it to a 'list' containing that string as it's
284
- # single element.
285
- if isinstance(ref_components, (str, ComponentType)):
286
- ref_components = cast(list[ComponentTypeVar], [ref_components])
287
-
288
- # Create a set of ids by chaining the ids of all ref_components
289
- valid_ids = set()
290
- for ref_component in ref_components:
291
- if ref_component in data:
292
- nan = _nan_type(ref_component, "id")
293
- if np.isnan(nan):
294
- mask = ~np.isnan(data[ref_component]["id"])
295
- else:
296
- mask = np.not_equal(data[ref_component]["id"], nan)
297
- valid_ids.update(data[ref_component]["id"][mask])
298
-
299
- return list(valid_ids)
300
-
301
-
302
- def _get_mask(data: SingleDataset, component: _ComponentTypeLike, field: str, **filters: Any) -> np.ndarray:
303
- """
304
- Get a mask based on the specified filters. E.g. measured_terminal_type=MeasuredTerminalType.source.
305
-
306
- Args:
307
- data: The input/update data set for all components
308
- component: The component of interest
309
- field: The field of interest
310
- ref_components: The component or components in which we want to look for ids
311
- **filters: One or more filters on the dataset. E.
312
-
313
- Returns:
314
- np.ndarray: the mask
315
- """
316
- values = data[component][field]
317
- mask = np.ones(shape=values.shape, dtype=bool)
318
- for filter_field, filter_value in filters.items():
319
- mask = np.logical_and(mask, data[component][filter_field] == filter_value)
320
-
321
- return mask
1
+ # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ """
6
+ Utilities used for validation. Only errors_to_string() is intended for end users.
7
+ """
8
+
9
+ import re
10
+ from typing import Any, cast
11
+
12
+ import numpy as np
13
+
14
+ from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
15
+ from power_grid_model._core.power_grid_meta import power_grid_meta_data
16
+ from power_grid_model.data_types import SingleArray, SingleComponentData, SingleDataset
17
+ from power_grid_model.validation.errors import ValidationError
18
+
19
+
20
+ def _eval_expression(data: np.ndarray, expression: int | float | str) -> np.ndarray:
21
+ """
22
+ Wrapper function that checks the type of the 'expression'. If the expression is a string, it is assumed to be a
23
+ field expression and the expression is validated. Otherwise it is assumed to be a numerical value and the value
24
+ is casted to a numpy 'array'.
25
+
26
+ Args:
27
+ data: A numpy structured array
28
+ expression: A numerical value, or a string, representing a (combination of) field(s)
29
+
30
+ Returns: The number or an evaluation of the field name(s) in the data, always represented as a Numpy 'array'.
31
+
32
+ Examples:
33
+ 123 -> np.array(123)
34
+ 123.4 -> np.array(123.4)
35
+ 'value' -> data['value']
36
+ 'foo/bar' -> data['foo'] / data[bar]
37
+
38
+ """
39
+ if isinstance(expression, str):
40
+ return _eval_field_expression(data, expression)
41
+ return np.array(expression)
42
+
43
+
44
+ def _eval_field_expression(data: np.ndarray, expression: str) -> np.ndarray:
45
+ """
46
+ A field expression can either be the name of a field (e.g. 'field_x') in the data, or a ratio between two fields
47
+ (e.g. 'field_x / field_y'). The expression is checked on validity and then the fields are checked to be present in
48
+ the data. If the expression is a single field name, the field is returned. If it is a ratio, the ratio is
49
+ calculated and returned. Values divided by 0 will result in nan values without warning.
50
+
51
+ Args:
52
+ data: A numpy structured array
53
+ expression: A string, representing a (combination of) field(s)
54
+
55
+ Expression should be a combination of:
56
+ - field names (may contain lower case letters, numbers and underscores)
57
+ - a single mathematical operator /
58
+
59
+ Returns: An evaluation of the field name(s) in the data.
60
+
61
+ Examples:
62
+ 'value' -> data['value']
63
+ 'foo/bar' -> data['foo'] / data['bar']
64
+
65
+ """
66
+ # Validate the expression
67
+ match = re.fullmatch(r"[a-z][a-z0-9_]*(\s*/\s*[a-z][a-z0-9_]*)?", expression)
68
+ if not match:
69
+ raise ValueError(f"Invalid field expression '{expression}'")
70
+
71
+ if data.dtype.names is None:
72
+ raise ValueError("No attributes available in meta")
73
+
74
+ # Find all field names and check if they exist in the dataset
75
+ fields = [f.strip() for f in expression.split("/")]
76
+ for field in fields:
77
+ if field not in data.dtype.names:
78
+ raise KeyError(f"Invalid field name {field}")
79
+
80
+ if len(fields) == 1:
81
+ return data[fields[0]]
82
+
83
+ max_num_fields = 2
84
+ if len(fields) != max_num_fields:
85
+ raise ValueError(f"There should be exactly {max_num_fields} fields, got {len(fields)} fields")
86
+ zero_div = np.logical_or(np.equal(data[fields[1]], 0.0), np.logical_not(np.isfinite(data[fields[1]])))
87
+ if np.any(zero_div):
88
+ result = np.full_like(data[fields[0]], np.nan)
89
+ np.true_divide(data[fields[0]], data[fields[1]], out=result, where=~zero_div)
90
+ return result
91
+ return np.true_divide(data[fields[0]], data[fields[1]])
92
+
93
+
94
+ def _update_input_data(input_data: SingleDataset, update_data: SingleDataset):
95
+ """
96
+ Update the input data using the available non-nan values in the update data.
97
+ """
98
+
99
+ merged_data = {component: array.copy() for component, array in input_data.items()}
100
+ for component in update_data:
101
+ _update_component_data(component, merged_data[component], update_data[component])
102
+ return merged_data
103
+
104
+
105
+ def _update_component_data(
106
+ component: ComponentType, input_data: SingleComponentData, update_data: SingleComponentData
107
+ ) -> None:
108
+ """
109
+ Update the data in a single component data set, with another single component data set,
110
+ indexed on the "id" field and only non-NaN values are overwritten.
111
+ """
112
+ if isinstance(input_data, np.ndarray) and isinstance(update_data, np.ndarray):
113
+ return _update_component_array_data(component=component, input_data=input_data, update_data=update_data)
114
+
115
+ raise NotImplementedError # TODO(mgovers): add support for columnar data
116
+
117
+
118
+ def _update_component_array_data(component: ComponentType, input_data: SingleArray, update_data: SingleArray) -> None:
119
+ """
120
+ Update the data in a numpy array, with another numpy array,
121
+ indexed on the "id" field and only non-NaN values are overwritten.
122
+ """
123
+ batch_dataset_ndim = 2
124
+ if update_data.dtype.names is None:
125
+ raise ValueError("Invalid data format")
126
+
127
+ optional_ids_active = (
128
+ "id" in update_data.dtype.names
129
+ and np.all(update_data["id"] == np.iinfo(update_data["id"].dtype).min)
130
+ and len(update_data["id"]) == len(input_data["id"])
131
+ )
132
+ update_data_ids = input_data["id"] if optional_ids_active else update_data["id"]
133
+
134
+ for field in update_data.dtype.names:
135
+ if field == "id":
136
+ continue
137
+ nan = _nan_type(component, field, DatasetType.update)
138
+ mask = ~np.isnan(update_data[field]) if np.isnan(nan) else np.not_equal(update_data[field], nan)
139
+
140
+ if mask.ndim == batch_dataset_ndim:
141
+ for phase in range(mask.shape[1]):
142
+ # find indexers of to-be-updated object
143
+ sub_mask = mask[:, phase]
144
+ idx = _get_indexer(input_data["id"], update_data_ids[sub_mask])
145
+ # update
146
+ input_data[field][idx, phase] = update_data[field][sub_mask, phase]
147
+ else:
148
+ # find indexers of to-be-updated object
149
+ idx = _get_indexer(input_data["id"], update_data_ids[mask])
150
+ # update
151
+ input_data[field][idx] = update_data[field][mask]
152
+
153
+
154
+ def errors_to_string(
155
+ errors: list[ValidationError] | dict[int, list[ValidationError]] | None,
156
+ name: str = "the data",
157
+ details: bool = False,
158
+ id_lookup: list[str] | dict[int, str] | None = None,
159
+ ) -> str:
160
+ """
161
+ Convert a set of errors (list or dict) to a human readable string representation.
162
+
163
+ Args:
164
+ errors: The error objects. List for input_data only, dict for batch data.
165
+ name: Human understandable name of the dataset, e.g. input_data, or update_data.
166
+ details: Display object ids and error specific information.
167
+ id_lookup: A list or dict (int->str) containing textual object ids
168
+
169
+ Returns:
170
+ A human readable string representation of a set of errors.
171
+ """
172
+ if errors is None or len(errors) == 0:
173
+ return f"{name}: OK"
174
+ if isinstance(errors, dict):
175
+ return "\n".join(errors_to_string(err, f"{name}, batch #{i}", details) for i, err in sorted(errors.items()))
176
+ if len(errors) == 1 and not details:
177
+ return f"There is a validation error in {name}:\n\t{errors[0]}"
178
+ if len(errors) == 1:
179
+ msg = f"There is a validation error in {name}:\n"
180
+ else:
181
+ msg = f"There are {len(errors)} validation errors in {name}:\n"
182
+ if details:
183
+ for error in errors:
184
+ msg += "\n\t" + str(error) + "\n"
185
+ msg += "".join(f"\t\t{k}: {v}\n" for k, v in error.get_context(id_lookup).items())
186
+ else:
187
+ msg += "\n".join(f"{i + 1:>4}. {err}" for i, err in enumerate(errors))
188
+ return msg
189
+
190
+
191
+ def _nan_type(component: ComponentType, field: str, data_type: DatasetType = DatasetType.input):
192
+ """Helper function to retrieve the nan value for a certain field as defined in the power_grid_meta_data."""
193
+ return power_grid_meta_data[data_type][component].nans[field]
194
+
195
+
196
+ def _get_indexer(source: np.ndarray, target: np.ndarray, default_value: int | None = None) -> np.ndarray:
197
+ """
198
+ Given array of values from a source and a target dataset.
199
+ Find the position of each value in the target dataset in the context of the source dataset.
200
+ This is needed to update values in the dataset by id lookup.
201
+ Internally this is done by sorting the input ids, then using binary search lookup.
202
+
203
+ E.g.: Find the position of each id in an update (target) dataset in the input (source) dataset
204
+
205
+ >>> input_ids = [1, 2, 3, 4, 5]
206
+ >>> update_ids = [3]
207
+ >>> assert _get_indexer(input_ids, update_ids) == np.array([2])
208
+
209
+ Args:
210
+ source: array of values in the source dataset
211
+ target: array of values in the target dataset
212
+ default_value (optional): the default index to provide for target values not in source
213
+
214
+ Returns:
215
+ np.ndarray: array of positions of the values from target dataset in the source dataset
216
+ if default_value is None, (source[result] == target)
217
+ else, ((source[result] == target) | (source[result] == default_value))
218
+
219
+ Raises:
220
+ IndexError: if default_value is None and there were values in target that were not in source
221
+ """
222
+
223
+ permutation_sort = np.argsort(source) # complexity O(N_input * logN_input)
224
+ indices = np.searchsorted(source, target, sorter=permutation_sort) # complexity O(N_update * logN_input)
225
+
226
+ if default_value is None:
227
+ return permutation_sort[indices]
228
+
229
+ if len(source) == 0:
230
+ return np.full_like(target, fill_value=default_value)
231
+
232
+ clipped_indices = np.take(permutation_sort, indices, mode="clip")
233
+ return np.where(source[clipped_indices] == target, permutation_sort[clipped_indices], default_value)
234
+
235
+
236
+ def _set_default_value(
237
+ data: SingleDataset, component: ComponentType, field: str, default_value: int | float | np.ndarray
238
+ ):
239
+ """
240
+ This function sets the default value in the data that is to be validated, so the default values are included in the
241
+ validation.
242
+
243
+ Args:
244
+ data: The input/update data set for all components
245
+ component: The component of interest
246
+ field: The field of interest
247
+ default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
248
+ input validation, these default values should be included in the validation. It can be a fixed value for the
249
+ entire column (int/float) or be different for each element (np.ndarray).
250
+
251
+ Returns:
252
+
253
+ """
254
+ if np.isnan(_nan_type(component, field)):
255
+ mask = np.isnan(data[component][field])
256
+ else:
257
+ mask = data[component][field] == _nan_type(component, field)
258
+ if isinstance(default_value, np.ndarray):
259
+ data[component][field][mask] = default_value[mask]
260
+ else:
261
+ data[component][field][mask] = default_value
262
+
263
+
264
+ def _get_valid_ids(data: SingleDataset, ref_components: ComponentType | list[ComponentType]) -> list[int]:
265
+ """
266
+ This function returns the valid IDs specified by all ref_components.
267
+
268
+ Args:
269
+ data: The input/update data set for all components
270
+ ref_components: The component or components in which we want to look for ids
271
+
272
+ Returns:
273
+ list[int]: the list of valid IDs
274
+ """
275
+ # For convenience, ref_component may be a string and we'll convert it to a 'list' containing that string as it's
276
+ # single element.
277
+ if isinstance(ref_components, (str, ComponentType)):
278
+ ref_components = cast(list[ComponentType], [ref_components])
279
+
280
+ # Create a set of ids by chaining the ids of all ref_components
281
+ valid_ids = set()
282
+ for ref_component in ref_components:
283
+ if ref_component in data:
284
+ nan = _nan_type(ref_component, "id")
285
+ if np.isnan(nan):
286
+ mask = ~np.isnan(data[ref_component]["id"])
287
+ else:
288
+ mask = np.not_equal(data[ref_component]["id"], nan)
289
+ valid_ids.update(data[ref_component]["id"][mask])
290
+
291
+ return list(valid_ids)
292
+
293
+
294
+ def _get_mask(data: SingleDataset, component: ComponentType, field: str, **filters: Any) -> np.ndarray:
295
+ """
296
+ Get a mask based on the specified filters. E.g. measured_terminal_type=MeasuredTerminalType.source.
297
+
298
+ Args:
299
+ data: The input/update data set for all components
300
+ component: The component of interest
301
+ field: The field of interest
302
+ ref_components: The component or components in which we want to look for ids
303
+ **filters: One or more filters on the dataset. E.
304
+
305
+ Returns:
306
+ np.ndarray: the mask
307
+ """
308
+ values = data[component][field]
309
+ mask = np.ones(shape=values.shape, dtype=bool)
310
+ for filter_field, filter_value in filters.items():
311
+ mask = np.logical_and(mask, data[component][filter_field] == filter_value)
312
+
313
+ return mask