power-grid-model 1.10.17__py3-none-win_amd64.whl → 1.12.119__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of power-grid-model might be problematic. Click here for more details.

Files changed (67) hide show
  1. power_grid_model/__init__.py +54 -29
  2. power_grid_model/_core/__init__.py +3 -3
  3. power_grid_model/_core/buffer_handling.py +507 -478
  4. power_grid_model/_core/data_handling.py +195 -141
  5. power_grid_model/_core/data_types.py +142 -0
  6. power_grid_model/_core/dataset_definitions.py +109 -109
  7. power_grid_model/_core/enum.py +226 -0
  8. power_grid_model/_core/error_handling.py +215 -198
  9. power_grid_model/_core/errors.py +134 -0
  10. power_grid_model/_core/index_integer.py +17 -17
  11. power_grid_model/_core/options.py +71 -69
  12. power_grid_model/_core/power_grid_core.py +577 -562
  13. power_grid_model/_core/power_grid_dataset.py +545 -490
  14. power_grid_model/_core/power_grid_meta.py +262 -244
  15. power_grid_model/_core/power_grid_model.py +1025 -687
  16. power_grid_model/_core/power_grid_model_c/__init__.py +3 -0
  17. power_grid_model/_core/power_grid_model_c/bin/power_grid_model_c.dll +0 -0
  18. power_grid_model/_core/power_grid_model_c/get_pgm_dll_path.py +63 -0
  19. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/basics.h +251 -0
  20. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/buffer.h +108 -0
  21. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset.h +332 -0
  22. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset_definitions.h +1060 -0
  23. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/handle.h +111 -0
  24. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/meta_data.h +189 -0
  25. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/model.h +130 -0
  26. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/options.h +142 -0
  27. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/serialization.h +118 -0
  28. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c.h +36 -0
  29. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/basics.hpp +65 -0
  30. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/buffer.hpp +61 -0
  31. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/dataset.hpp +224 -0
  32. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/handle.hpp +108 -0
  33. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/meta_data.hpp +84 -0
  34. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/model.hpp +63 -0
  35. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/options.hpp +52 -0
  36. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/serialization.hpp +124 -0
  37. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/utils.hpp +81 -0
  38. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp.hpp +19 -0
  39. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfig.cmake +37 -0
  40. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfigVersion.cmake +65 -0
  41. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets-release.cmake +19 -0
  42. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets.cmake +144 -0
  43. power_grid_model/_core/power_grid_model_c/lib/power_grid_model_c.lib +0 -0
  44. power_grid_model/_core/power_grid_model_c/share/LICENSE +292 -0
  45. power_grid_model/_core/power_grid_model_c/share/README.md +15 -0
  46. power_grid_model/_core/serialization.py +319 -317
  47. power_grid_model/_core/typing.py +20 -0
  48. power_grid_model/{_utils.py → _core/utils.py} +798 -783
  49. power_grid_model/data_types.py +321 -319
  50. power_grid_model/enum.py +27 -214
  51. power_grid_model/errors.py +37 -119
  52. power_grid_model/typing.py +43 -48
  53. power_grid_model/utils.py +529 -400
  54. power_grid_model/validation/__init__.py +25 -10
  55. power_grid_model/validation/{rules.py → _rules.py} +1167 -962
  56. power_grid_model/validation/{validation.py → _validation.py} +1172 -1015
  57. power_grid_model/validation/assertions.py +93 -92
  58. power_grid_model/validation/errors.py +602 -524
  59. power_grid_model/validation/utils.py +313 -318
  60. {power_grid_model-1.10.17.dist-info → power_grid_model-1.12.119.dist-info}/METADATA +162 -165
  61. power_grid_model-1.12.119.dist-info/RECORD +65 -0
  62. {power_grid_model-1.10.17.dist-info → power_grid_model-1.12.119.dist-info}/WHEEL +1 -1
  63. power_grid_model-1.12.119.dist-info/entry_points.txt +3 -0
  64. power_grid_model/_core/_power_grid_core.dll +0 -0
  65. power_grid_model-1.10.17.dist-info/RECORD +0 -32
  66. power_grid_model-1.10.17.dist-info/top_level.txt +0 -1
  67. {power_grid_model-1.10.17.dist-info → power_grid_model-1.12.119.dist-info/licenses}/LICENSE +0 -0
@@ -1,318 +1,313 @@
1
- # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """
6
- Utilities used for validation. Only errors_to_string() is intended for end users.
7
- """
8
- import re
9
- from typing import Any, cast
10
-
11
- import numpy as np
12
-
13
- from power_grid_model import power_grid_meta_data
14
- from power_grid_model._core.dataset_definitions import (
15
- ComponentType,
16
- ComponentTypeLike,
17
- ComponentTypeVar,
18
- DatasetType,
19
- _str_to_component_type,
20
- )
21
- from power_grid_model.data_types import SingleArray, SingleComponentData, SingleDataset
22
- from power_grid_model.validation.errors import ValidationError
23
-
24
-
25
- def _eval_expression(data: np.ndarray, expression: int | float | str) -> np.ndarray:
26
- """
27
- Wrapper function that checks the type of the 'expression'. If the expression is a string, it is assumed to be a
28
- field expression and the expression is validated. Otherwise it is assumed to be a numerical value and the value
29
- is casted to a numpy 'array'.
30
-
31
- Args:
32
- data: A numpy structured array
33
- expression: A numerical value, or a string, representing a (combination of) field(s)
34
-
35
- Returns: The number or an evaluation of the field name(s) in the data, always represented as a Numpy 'array'.
36
-
37
- Examples:
38
- 123 -> np.array(123)
39
- 123.4 -> np.array(123.4)
40
- 'value' -> data['value']
41
- 'foo/bar' -> data['foo'] / data[bar]
42
-
43
- """
44
- if isinstance(expression, str):
45
- return _eval_field_expression(data, expression)
46
- return np.array(expression)
47
-
48
-
49
- def _eval_field_expression(data: np.ndarray, expression: str) -> np.ndarray:
50
- """
51
- A field expression can either be the name of a field (e.g. 'field_x') in the data, or a ratio between two fields
52
- (e.g. 'field_x / field_y'). The expression is checked on validity and then the fields are checked to be present in
53
- the data. If the expression is a single field name, the field is returned. If it is a ratio, the ratio is
54
- calculated and returned. Values divided by 0 will result in nan values without warning.
55
-
56
- Args:
57
- data: A numpy structured array
58
- expression: A string, representing a (combination of) field(s)
59
-
60
- Expression should be a combination of:
61
- - field names (may contain lower case letters, numbers and underscores)
62
- - a single mathematical operator /
63
-
64
- Returns: An evaluation of the field name(s) in the data.
65
-
66
- Examples:
67
- 'value' -> data['value']
68
- 'foo/bar' -> data['foo'] / data['bar']
69
-
70
- """
71
-
72
- # Validate the expression
73
- match = re.fullmatch(r"[a-z][a-z0-9_]*(\s*/\s*[a-z][a-z0-9_]*)?", expression)
74
- if not match:
75
- raise ValueError(f"Invalid field expression '{expression}'")
76
-
77
- # Find all field names and check if they exist in the dataset
78
- fields = [f.strip() for f in expression.split("/")]
79
- for field in fields:
80
- if field not in data.dtype.names:
81
- raise KeyError(f"Invalid field name {field}")
82
-
83
- if len(fields) == 1:
84
- return data[fields[0]]
85
-
86
- assert len(fields) == 2
87
- zero_div = np.logical_or(np.equal(data[fields[1]], 0.0), np.logical_not(np.isfinite(data[fields[1]])))
88
- if np.any(zero_div):
89
- result = np.full_like(data[fields[0]], np.nan)
90
- np.true_divide(data[fields[0]], data[fields[1]], out=result, where=~zero_div)
91
- return result
92
- return np.true_divide(data[fields[0]], data[fields[1]])
93
-
94
-
95
- def _update_input_data(input_data: SingleDataset, update_data: SingleDataset):
96
- """
97
- Update the input data using the available non-nan values in the update data.
98
- """
99
-
100
- merged_data = {component: array.copy() for component, array in input_data.items()}
101
- for component in update_data.keys():
102
- _update_component_data(component, merged_data[component], update_data[component])
103
- return merged_data
104
-
105
-
106
- def _update_component_data(
107
- component: ComponentTypeLike, input_data: SingleComponentData, update_data: SingleComponentData
108
- ) -> None:
109
- """
110
- Update the data in a single component data set, with another single component data set,
111
- indexed on the "id" field and only non-NaN values are overwritten.
112
- """
113
- if isinstance(input_data, np.ndarray) and isinstance(update_data, np.ndarray):
114
- return _update_component_array_data(component=component, input_data=input_data, update_data=update_data)
115
-
116
- raise NotImplementedError() # TODO(mgovers): add support for columnar data
117
-
118
-
119
- def _update_component_array_data(
120
- component: ComponentTypeLike, input_data: SingleArray, update_data: SingleArray
121
- ) -> None:
122
- """
123
- Update the data in a numpy array, with another numpy array,
124
- indexed on the "id" field and only non-NaN values are overwritten.
125
- """
126
- optional_ids_active = (
127
- "id" in update_data.dtype.names
128
- and np.all(update_data["id"] == np.iinfo(update_data["id"].dtype).min)
129
- and len(update_data["id"]) == len(input_data["id"])
130
- )
131
- update_data_ids = input_data["id"] if optional_ids_active else update_data["id"]
132
-
133
- for field in update_data.dtype.names:
134
- if field == "id":
135
- continue
136
- nan = _nan_type(component, field, DatasetType.update)
137
- if np.isnan(nan):
138
- mask = ~np.isnan(update_data[field])
139
- else:
140
- mask = np.not_equal(update_data[field], nan)
141
-
142
- if mask.ndim == 2:
143
- for phase in range(mask.shape[1]):
144
- # find indexers of to-be-updated object
145
- sub_mask = mask[:, phase]
146
- idx = _get_indexer(input_data["id"], update_data_ids[sub_mask])
147
- # update
148
- input_data[field][idx, phase] = update_data[field][sub_mask, phase]
149
- else:
150
- # find indexers of to-be-updated object
151
- idx = _get_indexer(input_data["id"], update_data_ids[mask])
152
- # update
153
- input_data[field][idx] = update_data[field][mask]
154
-
155
-
156
- def errors_to_string(
157
- errors: list[ValidationError] | dict[int, list[ValidationError]] | None,
158
- name: str = "the data",
159
- details: bool = False,
160
- id_lookup: list[str] | dict[int, str] | None = None,
161
- ) -> str:
162
- """
163
- Convert a set of errors (list or dict) to a human readable string representation.
164
-
165
- Args:
166
- errors: The error objects. List for input_data only, dict for batch data.
167
- name: Human understandable name of the dataset, e.g. input_data, or update_data.
168
- details: Display object ids and error specific information.
169
- id_lookup: A list or dict (int->str) containing textual object ids
170
-
171
- Returns:
172
- A human readable string representation of a set of errors.
173
- """
174
- if errors is None or len(errors) == 0:
175
- return f"{name}: OK"
176
- if isinstance(errors, dict):
177
- return "\n".join(errors_to_string(err, f"{name}, batch #{i}", details) for i, err in sorted(errors.items()))
178
- if len(errors) == 1 and not details:
179
- return f"There is a validation error in {name}:\n\t{errors[0]}"
180
- if len(errors) == 1:
181
- msg = f"There is a validation error in {name}:\n"
182
- else:
183
- msg = f"There are {len(errors)} validation errors in {name}:\n"
184
- if details:
185
- for error in errors:
186
- msg += "\n\t" + str(error) + "\n"
187
- msg += "".join(f"\t\t{k}: {v}\n" for k, v in error.get_context(id_lookup).items())
188
- else:
189
- msg += "\n".join(f"{i + 1:>4}. {err}" for i, err in enumerate(errors))
190
- return msg
191
-
192
-
193
- def _nan_type(component: ComponentTypeLike, field: str, data_type: DatasetType = DatasetType.input):
194
- """
195
- Helper function to retrieve the nan value for a certain field as defined in the power_grid_meta_data.
196
- """
197
- component = _str_to_component_type(component)
198
- return power_grid_meta_data[data_type][component].nans[field]
199
-
200
-
201
- def _get_indexer(source: np.ndarray, target: np.ndarray, default_value: int | None = None) -> np.ndarray:
202
- """
203
- Given array of values from a source and a target dataset.
204
- Find the position of each value in the target dataset in the context of the source dataset.
205
- This is needed to update values in the dataset by id lookup.
206
- Internally this is done by sorting the input ids, then using binary search lookup.
207
-
208
- E.g.: Find the position of each id in an update (target) dataset in the input (source) dataset
209
-
210
- >>> input_ids = [1, 2, 3, 4, 5]
211
- >>> update_ids = [3]
212
- >>> assert _get_indexer(input_ids, update_ids) == np.array([2])
213
-
214
- Args:
215
- source: array of values in the source dataset
216
- target: array of values in the target dataset
217
- default_value (optional): the default index to provide for target values not in source
218
-
219
- Returns:
220
- np.ndarray: array of positions of the values from target dataset in the source dataset
221
- if default_value is None, (source[result] == target)
222
- else, ((source[result] == target) | (source[result] == default_value))
223
-
224
- Raises:
225
- IndexError: if default_value is None and there were values in target that were not in source
226
- """
227
-
228
- permutation_sort = np.argsort(source) # complexity O(N_input * logN_input)
229
- indices = np.searchsorted(source, target, sorter=permutation_sort) # complexity O(N_update * logN_input)
230
-
231
- if default_value is None:
232
- return permutation_sort[indices]
233
-
234
- if len(source) == 0:
235
- return np.full_like(target, fill_value=default_value)
236
-
237
- clipped_indices = np.take(permutation_sort, indices, mode="clip")
238
- return np.where(source[clipped_indices] == target, permutation_sort[clipped_indices], default_value)
239
-
240
-
241
- def _set_default_value(
242
- data: SingleDataset, component: ComponentTypeLike, field: str, default_value: int | float | np.ndarray
243
- ):
244
- """
245
- This function sets the default value in the data that is to be validated, so the default values are included in the
246
- validation.
247
-
248
- Args:
249
- data: The input/update data set for all components
250
- component: The component of interest
251
- field: The field of interest
252
- default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
253
- input validation, these default values should be included in the validation. It can be a fixed value for the
254
- entire column (int/float) or be different for each element (np.ndarray).
255
-
256
- Returns:
257
-
258
- """
259
- if np.isnan(_nan_type(component, field)):
260
- mask = np.isnan(data[component][field])
261
- else:
262
- mask = data[component][field] == _nan_type(component, field)
263
- if isinstance(default_value, np.ndarray):
264
- data[component][field][mask] = default_value[mask]
265
- else:
266
- data[component][field][mask] = default_value
267
-
268
-
269
- def _get_valid_ids(data: SingleDataset, ref_components: ComponentTypeLike | list[ComponentTypeVar]) -> list[int]:
270
- """
271
- This function returns the valid IDs specified by all ref_components
272
-
273
- Args:
274
- data: The input/update data set for all components
275
- ref_components: The component or components in which we want to look for ids
276
-
277
- Returns:
278
- list[int]: the list of valid IDs
279
- """
280
- # For convenience, ref_component may be a string and we'll convert it to a 'list' containing that string as it's
281
- # single element.
282
- if isinstance(ref_components, (str, ComponentType)):
283
- ref_components = cast(list[ComponentTypeVar], [ref_components])
284
-
285
- # Create a set of ids by chaining the ids of all ref_components
286
- valid_ids = set()
287
- for ref_component in ref_components:
288
- if ref_component in data:
289
- nan = _nan_type(ref_component, "id")
290
- if np.isnan(nan):
291
- mask = ~np.isnan(data[ref_component]["id"])
292
- else:
293
- mask = np.not_equal(data[ref_component]["id"], nan)
294
- valid_ids.update(data[ref_component]["id"][mask])
295
-
296
- return list(valid_ids)
297
-
298
-
299
- def _get_mask(data: SingleDataset, component: ComponentTypeLike, field: str, **filters: Any) -> np.ndarray:
300
- """
301
- Get a mask based on the specified filters. E.g. measured_terminal_type=MeasuredTerminalType.source.
302
-
303
- Args:
304
- data: The input/update data set for all components
305
- component: The component of interest
306
- field: The field of interest
307
- ref_components: The component or components in which we want to look for ids
308
- **filters: One or more filters on the dataset. E.
309
-
310
- Returns:
311
- np.ndarray: the mask
312
- """
313
- values = data[component][field]
314
- mask = np.ones(shape=values.shape, dtype=bool)
315
- for filter_field, filter_value in filters.items():
316
- mask = np.logical_and(mask, data[component][filter_field] == filter_value)
317
-
318
- return mask
1
+ # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ """
6
+ Utilities used for validation. Only errors_to_string() is intended for end users.
7
+ """
8
+
9
+ import re
10
+ from typing import Any, cast
11
+
12
+ import numpy as np
13
+
14
+ from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
15
+ from power_grid_model._core.power_grid_meta import power_grid_meta_data
16
+ from power_grid_model.data_types import SingleArray, SingleComponentData, SingleDataset
17
+ from power_grid_model.validation.errors import ValidationError
18
+
19
+
20
+ def _eval_expression(data: np.ndarray, expression: int | float | str) -> np.ndarray:
21
+ """
22
+ Wrapper function that checks the type of the 'expression'. If the expression is a string, it is assumed to be a
23
+ field expression and the expression is validated. Otherwise it is assumed to be a numerical value and the value
24
+ is casted to a numpy 'array'.
25
+
26
+ Args:
27
+ data: A numpy structured array
28
+ expression: A numerical value, or a string, representing a (combination of) field(s)
29
+
30
+ Returns: The number or an evaluation of the field name(s) in the data, always represented as a Numpy 'array'.
31
+
32
+ Examples:
33
+ 123 -> np.array(123)
34
+ 123.4 -> np.array(123.4)
35
+ 'value' -> data['value']
36
+ 'foo/bar' -> data['foo'] / data[bar]
37
+
38
+ """
39
+ if isinstance(expression, str):
40
+ return _eval_field_expression(data, expression)
41
+ return np.array(expression)
42
+
43
+
44
+ def _eval_field_expression(data: np.ndarray, expression: str) -> np.ndarray:
45
+ """
46
+ A field expression can either be the name of a field (e.g. 'field_x') in the data, or a ratio between two fields
47
+ (e.g. 'field_x / field_y'). The expression is checked on validity and then the fields are checked to be present in
48
+ the data. If the expression is a single field name, the field is returned. If it is a ratio, the ratio is
49
+ calculated and returned. Values divided by 0 will result in nan values without warning.
50
+
51
+ Args:
52
+ data: A numpy structured array
53
+ expression: A string, representing a (combination of) field(s)
54
+
55
+ Expression should be a combination of:
56
+ - field names (may contain lower case letters, numbers and underscores)
57
+ - a single mathematical operator /
58
+
59
+ Returns: An evaluation of the field name(s) in the data.
60
+
61
+ Examples:
62
+ 'value' -> data['value']
63
+ 'foo/bar' -> data['foo'] / data['bar']
64
+
65
+ """
66
+ # Validate the expression
67
+ match = re.fullmatch(r"[a-z][a-z0-9_]*(\s*/\s*[a-z][a-z0-9_]*)?", expression)
68
+ if not match:
69
+ raise ValueError(f"Invalid field expression '{expression}'")
70
+
71
+ if data.dtype.names is None:
72
+ raise ValueError("No attributes available in meta")
73
+
74
+ # Find all field names and check if they exist in the dataset
75
+ fields = [f.strip() for f in expression.split("/")]
76
+ for field in fields:
77
+ if field not in data.dtype.names:
78
+ raise KeyError(f"Invalid field name {field}")
79
+
80
+ if len(fields) == 1:
81
+ return data[fields[0]]
82
+
83
+ max_num_fields = 2
84
+ if len(fields) != max_num_fields:
85
+ raise ValueError(f"There should be exactly {max_num_fields} fields, got {len(fields)} fields")
86
+ zero_div = np.logical_or(np.equal(data[fields[1]], 0.0), np.logical_not(np.isfinite(data[fields[1]])))
87
+ if np.any(zero_div):
88
+ result = np.full_like(data[fields[0]], np.nan)
89
+ np.true_divide(data[fields[0]], data[fields[1]], out=result, where=~zero_div)
90
+ return result
91
+ return np.true_divide(data[fields[0]], data[fields[1]])
92
+
93
+
94
+ def _update_input_data(input_data: SingleDataset, update_data: SingleDataset):
95
+ """
96
+ Update the input data using the available non-nan values in the update data.
97
+ """
98
+
99
+ merged_data = {component: array.copy() for component, array in input_data.items()}
100
+ for component in update_data:
101
+ _update_component_data(component, merged_data[component], update_data[component])
102
+ return merged_data
103
+
104
+
105
+ def _update_component_data(
106
+ component: ComponentType, input_data: SingleComponentData, update_data: SingleComponentData
107
+ ) -> None:
108
+ """
109
+ Update the data in a single component data set, with another single component data set,
110
+ indexed on the "id" field and only non-NaN values are overwritten.
111
+ """
112
+ if isinstance(input_data, np.ndarray) and isinstance(update_data, np.ndarray):
113
+ return _update_component_array_data(component=component, input_data=input_data, update_data=update_data)
114
+
115
+ raise NotImplementedError # TODO(mgovers): add support for columnar data
116
+
117
+
118
+ def _update_component_array_data(component: ComponentType, input_data: SingleArray, update_data: SingleArray) -> None:
119
+ """
120
+ Update the data in a numpy array, with another numpy array,
121
+ indexed on the "id" field and only non-NaN values are overwritten.
122
+ """
123
+ batch_dataset_ndim = 2
124
+ if update_data.dtype.names is None:
125
+ raise ValueError("Invalid data format")
126
+
127
+ optional_ids_active = (
128
+ "id" in update_data.dtype.names
129
+ and np.all(update_data["id"] == np.iinfo(update_data["id"].dtype).min)
130
+ and len(update_data["id"]) == len(input_data["id"])
131
+ )
132
+ update_data_ids = input_data["id"] if optional_ids_active else update_data["id"]
133
+
134
+ for field in update_data.dtype.names:
135
+ if field == "id":
136
+ continue
137
+ nan = _nan_type(component, field, DatasetType.update)
138
+ mask = ~np.isnan(update_data[field]) if np.isnan(nan) else np.not_equal(update_data[field], nan)
139
+
140
+ if mask.ndim == batch_dataset_ndim:
141
+ for phase in range(mask.shape[1]):
142
+ # find indexers of to-be-updated object
143
+ sub_mask = mask[:, phase]
144
+ idx = _get_indexer(input_data["id"], update_data_ids[sub_mask])
145
+ # update
146
+ input_data[field][idx, phase] = update_data[field][sub_mask, phase]
147
+ else:
148
+ # find indexers of to-be-updated object
149
+ idx = _get_indexer(input_data["id"], update_data_ids[mask])
150
+ # update
151
+ input_data[field][idx] = update_data[field][mask]
152
+
153
+
154
+ def errors_to_string(
155
+ errors: list[ValidationError] | dict[int, list[ValidationError]] | None,
156
+ name: str = "the data",
157
+ details: bool = False,
158
+ id_lookup: list[str] | dict[int, str] | None = None,
159
+ ) -> str:
160
+ """
161
+ Convert a set of errors (list or dict) to a human readable string representation.
162
+
163
+ Args:
164
+ errors: The error objects. List for input_data only, dict for batch data.
165
+ name: Human understandable name of the dataset, e.g. input_data, or update_data.
166
+ details: Display object ids and error specific information.
167
+ id_lookup: A list or dict (int->str) containing textual object ids
168
+
169
+ Returns:
170
+ A human readable string representation of a set of errors.
171
+ """
172
+ if errors is None or len(errors) == 0:
173
+ return f"{name}: OK"
174
+ if isinstance(errors, dict):
175
+ return "\n".join(errors_to_string(err, f"{name}, batch #{i}", details) for i, err in sorted(errors.items()))
176
+ if len(errors) == 1 and not details:
177
+ return f"There is a validation error in {name}:\n\t{errors[0]}"
178
+ if len(errors) == 1:
179
+ msg = f"There is a validation error in {name}:\n"
180
+ else:
181
+ msg = f"There are {len(errors)} validation errors in {name}:\n"
182
+ if details:
183
+ for error in errors:
184
+ msg += "\n\t" + str(error) + "\n"
185
+ msg += "".join(f"\t\t{k}: {v}\n" for k, v in error.get_context(id_lookup).items())
186
+ else:
187
+ msg += "\n".join(f"{i + 1:>4}. {err}" for i, err in enumerate(errors))
188
+ return msg
189
+
190
+
191
+ def _nan_type(component: ComponentType, field: str, data_type: DatasetType = DatasetType.input):
192
+ """Helper function to retrieve the nan value for a certain field as defined in the power_grid_meta_data."""
193
+ return power_grid_meta_data[data_type][component].nans[field]
194
+
195
+
196
+ def _get_indexer(source: np.ndarray, target: np.ndarray, default_value: int | None = None) -> np.ndarray:
197
+ """
198
+ Given array of values from a source and a target dataset.
199
+ Find the position of each value in the target dataset in the context of the source dataset.
200
+ This is needed to update values in the dataset by id lookup.
201
+ Internally this is done by sorting the input ids, then using binary search lookup.
202
+
203
+ E.g.: Find the position of each id in an update (target) dataset in the input (source) dataset
204
+
205
+ >>> input_ids = [1, 2, 3, 4, 5]
206
+ >>> update_ids = [3]
207
+ >>> assert _get_indexer(input_ids, update_ids) == np.array([2])
208
+
209
+ Args:
210
+ source: array of values in the source dataset
211
+ target: array of values in the target dataset
212
+ default_value (optional): the default index to provide for target values not in source
213
+
214
+ Returns:
215
+ np.ndarray: array of positions of the values from target dataset in the source dataset
216
+ if default_value is None, (source[result] == target)
217
+ else, ((source[result] == target) | (source[result] == default_value))
218
+
219
+ Raises:
220
+ IndexError: if default_value is None and there were values in target that were not in source
221
+ """
222
+
223
+ permutation_sort = np.argsort(source) # complexity O(N_input * logN_input)
224
+ indices = np.searchsorted(source, target, sorter=permutation_sort) # complexity O(N_update * logN_input)
225
+
226
+ if default_value is None:
227
+ return permutation_sort[indices]
228
+
229
+ if len(source) == 0:
230
+ return np.full_like(target, fill_value=default_value)
231
+
232
+ clipped_indices = np.take(permutation_sort, indices, mode="clip")
233
+ return np.where(source[clipped_indices] == target, permutation_sort[clipped_indices], default_value)
234
+
235
+
236
+ def _set_default_value(
237
+ data: SingleDataset, component: ComponentType, field: str, default_value: int | float | np.ndarray
238
+ ):
239
+ """
240
+ This function sets the default value in the data that is to be validated, so the default values are included in the
241
+ validation.
242
+
243
+ Args:
244
+ data: The input/update data set for all components
245
+ component: The component of interest
246
+ field: The field of interest
247
+ default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
248
+ input validation, these default values should be included in the validation. It can be a fixed value for the
249
+ entire column (int/float) or be different for each element (np.ndarray).
250
+
251
+ Returns:
252
+
253
+ """
254
+ if np.isnan(_nan_type(component, field)):
255
+ mask = np.isnan(data[component][field])
256
+ else:
257
+ mask = data[component][field] == _nan_type(component, field)
258
+ if isinstance(default_value, np.ndarray):
259
+ data[component][field][mask] = default_value[mask]
260
+ else:
261
+ data[component][field][mask] = default_value
262
+
263
+
264
+ def _get_valid_ids(data: SingleDataset, ref_components: ComponentType | list[ComponentType]) -> list[int]:
265
+ """
266
+ This function returns the valid IDs specified by all ref_components.
267
+
268
+ Args:
269
+ data: The input/update data set for all components
270
+ ref_components: The component or components in which we want to look for ids
271
+
272
+ Returns:
273
+ list[int]: the list of valid IDs
274
+ """
275
+ # For convenience, ref_component may be a string and we'll convert it to a 'list' containing that string as it's
276
+ # single element.
277
+ if isinstance(ref_components, (str, ComponentType)):
278
+ ref_components = cast(list[ComponentType], [ref_components])
279
+
280
+ # Create a set of ids by chaining the ids of all ref_components
281
+ valid_ids = set()
282
+ for ref_component in ref_components:
283
+ if ref_component in data:
284
+ nan = _nan_type(ref_component, "id")
285
+ if np.isnan(nan):
286
+ mask = ~np.isnan(data[ref_component]["id"])
287
+ else:
288
+ mask = np.not_equal(data[ref_component]["id"], nan)
289
+ valid_ids.update(data[ref_component]["id"][mask])
290
+
291
+ return list(valid_ids)
292
+
293
+
294
+ def _get_mask(data: SingleDataset, component: ComponentType, field: str, **filters: Any) -> np.ndarray:
295
+ """
296
+ Get a mask based on the specified filters. E.g. measured_terminal_type=MeasuredTerminalType.source.
297
+
298
+ Args:
299
+ data: The input/update data set for all components
300
+ component: The component of interest
301
+ field: The field of interest
302
+ ref_components: The component or components in which we want to look for ids
303
+ **filters: One or more filters on the dataset. E.
304
+
305
+ Returns:
306
+ np.ndarray: the mask
307
+ """
308
+ values = data[component][field]
309
+ mask = np.ones(shape=values.shape, dtype=bool)
310
+ for filter_field, filter_value in filters.items():
311
+ mask = np.logical_and(mask, data[component][filter_field] == filter_value)
312
+
313
+ return mask