power-grid-model 1.11.33__py3-none-win_amd64.whl → 1.12.70__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. power_grid_model/__init__.py +54 -54
  2. power_grid_model/_core/__init__.py +3 -3
  3. power_grid_model/_core/buffer_handling.py +493 -478
  4. power_grid_model/_core/data_handling.py +195 -141
  5. power_grid_model/_core/data_types.py +143 -132
  6. power_grid_model/_core/dataset_definitions.py +109 -108
  7. power_grid_model/_core/enum.py +226 -226
  8. power_grid_model/_core/error_handling.py +206 -205
  9. power_grid_model/_core/errors.py +130 -126
  10. power_grid_model/_core/index_integer.py +17 -17
  11. power_grid_model/_core/options.py +71 -70
  12. power_grid_model/_core/power_grid_core.py +563 -581
  13. power_grid_model/_core/power_grid_dataset.py +535 -534
  14. power_grid_model/_core/power_grid_meta.py +257 -243
  15. power_grid_model/_core/power_grid_model.py +969 -687
  16. power_grid_model/_core/power_grid_model_c/__init__.py +3 -0
  17. power_grid_model/_core/power_grid_model_c/bin/power_grid_model_c.dll +0 -0
  18. power_grid_model/_core/power_grid_model_c/get_pgm_dll_path.py +63 -0
  19. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/basics.h +255 -0
  20. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/buffer.h +108 -0
  21. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset.h +316 -0
  22. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset_definitions.h +1052 -0
  23. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/handle.h +99 -0
  24. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/meta_data.h +189 -0
  25. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/model.h +125 -0
  26. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/options.h +142 -0
  27. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/serialization.h +118 -0
  28. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c.h +36 -0
  29. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/basics.hpp +65 -0
  30. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/buffer.hpp +61 -0
  31. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/dataset.hpp +220 -0
  32. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/handle.hpp +108 -0
  33. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/meta_data.hpp +84 -0
  34. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/model.hpp +63 -0
  35. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/options.hpp +52 -0
  36. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/serialization.hpp +124 -0
  37. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/utils.hpp +81 -0
  38. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp.hpp +19 -0
  39. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfig.cmake +37 -0
  40. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfigVersion.cmake +65 -0
  41. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets-release.cmake +19 -0
  42. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets.cmake +144 -0
  43. power_grid_model/_core/power_grid_model_c/lib/power_grid_model_c.lib +0 -0
  44. power_grid_model/_core/power_grid_model_c/share/LICENSE +292 -0
  45. power_grid_model/_core/power_grid_model_c/share/README.md +15 -0
  46. power_grid_model/_core/serialization.py +317 -317
  47. power_grid_model/_core/typing.py +20 -20
  48. power_grid_model/_core/utils.py +798 -789
  49. power_grid_model/data_types.py +321 -321
  50. power_grid_model/enum.py +27 -27
  51. power_grid_model/errors.py +37 -37
  52. power_grid_model/typing.py +43 -43
  53. power_grid_model/utils.py +473 -469
  54. power_grid_model/validation/__init__.py +25 -25
  55. power_grid_model/validation/_rules.py +1171 -1174
  56. power_grid_model/validation/_validation.py +1172 -1173
  57. power_grid_model/validation/assertions.py +93 -93
  58. power_grid_model/validation/errors.py +602 -589
  59. power_grid_model/validation/utils.py +313 -312
  60. {power_grid_model-1.11.33.dist-info → power_grid_model-1.12.70.dist-info}/METADATA +178 -180
  61. power_grid_model-1.12.70.dist-info/RECORD +65 -0
  62. {power_grid_model-1.11.33.dist-info → power_grid_model-1.12.70.dist-info}/WHEEL +1 -1
  63. power_grid_model-1.12.70.dist-info/entry_points.txt +3 -0
  64. power_grid_model/_core/_power_grid_core.dll +0 -0
  65. power_grid_model-1.11.33.dist-info/RECORD +0 -36
  66. power_grid_model-1.11.33.dist-info/top_level.txt +0 -1
  67. {power_grid_model-1.11.33.dist-info → power_grid_model-1.12.70.dist-info}/licenses/LICENSE +0 -0
@@ -1,312 +1,313 @@
1
- # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """
6
- Utilities used for validation. Only errors_to_string() is intended for end users.
7
- """
8
-
9
- import re
10
- from typing import Any, cast
11
-
12
- import numpy as np
13
-
14
- from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
15
- from power_grid_model._core.power_grid_meta import power_grid_meta_data
16
- from power_grid_model.data_types import SingleArray, SingleComponentData, SingleDataset
17
- from power_grid_model.validation.errors import ValidationError
18
-
19
-
20
- def _eval_expression(data: np.ndarray, expression: int | float | str) -> np.ndarray:
21
- """
22
- Wrapper function that checks the type of the 'expression'. If the expression is a string, it is assumed to be a
23
- field expression and the expression is validated. Otherwise it is assumed to be a numerical value and the value
24
- is casted to a numpy 'array'.
25
-
26
- Args:
27
- data: A numpy structured array
28
- expression: A numerical value, or a string, representing a (combination of) field(s)
29
-
30
- Returns: The number or an evaluation of the field name(s) in the data, always represented as a Numpy 'array'.
31
-
32
- Examples:
33
- 123 -> np.array(123)
34
- 123.4 -> np.array(123.4)
35
- 'value' -> data['value']
36
- 'foo/bar' -> data['foo'] / data[bar]
37
-
38
- """
39
- if isinstance(expression, str):
40
- return _eval_field_expression(data, expression)
41
- return np.array(expression)
42
-
43
-
44
- def _eval_field_expression(data: np.ndarray, expression: str) -> np.ndarray:
45
- """
46
- A field expression can either be the name of a field (e.g. 'field_x') in the data, or a ratio between two fields
47
- (e.g. 'field_x / field_y'). The expression is checked on validity and then the fields are checked to be present in
48
- the data. If the expression is a single field name, the field is returned. If it is a ratio, the ratio is
49
- calculated and returned. Values divided by 0 will result in nan values without warning.
50
-
51
- Args:
52
- data: A numpy structured array
53
- expression: A string, representing a (combination of) field(s)
54
-
55
- Expression should be a combination of:
56
- - field names (may contain lower case letters, numbers and underscores)
57
- - a single mathematical operator /
58
-
59
- Returns: An evaluation of the field name(s) in the data.
60
-
61
- Examples:
62
- 'value' -> data['value']
63
- 'foo/bar' -> data['foo'] / data['bar']
64
-
65
- """
66
- # Validate the expression
67
- match = re.fullmatch(r"[a-z][a-z0-9_]*(\s*/\s*[a-z][a-z0-9_]*)?", expression)
68
- if not match:
69
- raise ValueError(f"Invalid field expression '{expression}'")
70
-
71
- if data.dtype.names is None:
72
- raise ValueError("No attributes available in meta")
73
-
74
- # Find all field names and check if they exist in the dataset
75
- fields = [f.strip() for f in expression.split("/")]
76
- for field in fields:
77
- if field not in data.dtype.names:
78
- raise KeyError(f"Invalid field name {field}")
79
-
80
- if len(fields) == 1:
81
- return data[fields[0]]
82
-
83
- assert len(fields) == 2
84
- zero_div = np.logical_or(np.equal(data[fields[1]], 0.0), np.logical_not(np.isfinite(data[fields[1]])))
85
- if np.any(zero_div):
86
- result = np.full_like(data[fields[0]], np.nan)
87
- np.true_divide(data[fields[0]], data[fields[1]], out=result, where=~zero_div)
88
- return result
89
- return np.true_divide(data[fields[0]], data[fields[1]])
90
-
91
-
92
- def _update_input_data(input_data: SingleDataset, update_data: SingleDataset):
93
- """
94
- Update the input data using the available non-nan values in the update data.
95
- """
96
-
97
- merged_data = {component: array.copy() for component, array in input_data.items()}
98
- for component in update_data:
99
- _update_component_data(component, merged_data[component], update_data[component])
100
- return merged_data
101
-
102
-
103
- def _update_component_data(
104
- component: ComponentType, input_data: SingleComponentData, update_data: SingleComponentData
105
- ) -> None:
106
- """
107
- Update the data in a single component data set, with another single component data set,
108
- indexed on the "id" field and only non-NaN values are overwritten.
109
- """
110
- if isinstance(input_data, np.ndarray) and isinstance(update_data, np.ndarray):
111
- return _update_component_array_data(component=component, input_data=input_data, update_data=update_data)
112
-
113
- raise NotImplementedError() # TODO(mgovers): add support for columnar data
114
-
115
-
116
- def _update_component_array_data(component: ComponentType, input_data: SingleArray, update_data: SingleArray) -> None:
117
- """
118
- Update the data in a numpy array, with another numpy array,
119
- indexed on the "id" field and only non-NaN values are overwritten.
120
- """
121
- if update_data.dtype.names is None:
122
- raise ValueError("Invalid data format")
123
-
124
- optional_ids_active = (
125
- "id" in update_data.dtype.names
126
- and np.all(update_data["id"] == np.iinfo(update_data["id"].dtype).min)
127
- and len(update_data["id"]) == len(input_data["id"])
128
- )
129
- update_data_ids = input_data["id"] if optional_ids_active else update_data["id"]
130
-
131
- for field in update_data.dtype.names:
132
- if field == "id":
133
- continue
134
- nan = _nan_type(component, field, DatasetType.update)
135
- mask = ~np.isnan(update_data[field]) if np.isnan(nan) else np.not_equal(update_data[field], nan)
136
-
137
- if mask.ndim == 2:
138
- for phase in range(mask.shape[1]):
139
- # find indexers of to-be-updated object
140
- sub_mask = mask[:, phase]
141
- idx = _get_indexer(input_data["id"], update_data_ids[sub_mask])
142
- # update
143
- input_data[field][idx, phase] = update_data[field][sub_mask, phase]
144
- else:
145
- # find indexers of to-be-updated object
146
- idx = _get_indexer(input_data["id"], update_data_ids[mask])
147
- # update
148
- input_data[field][idx] = update_data[field][mask]
149
-
150
-
151
- def errors_to_string(
152
- errors: list[ValidationError] | dict[int, list[ValidationError]] | None,
153
- name: str = "the data",
154
- details: bool = False,
155
- id_lookup: list[str] | dict[int, str] | None = None,
156
- ) -> str:
157
- """
158
- Convert a set of errors (list or dict) to a human readable string representation.
159
-
160
- Args:
161
- errors: The error objects. List for input_data only, dict for batch data.
162
- name: Human understandable name of the dataset, e.g. input_data, or update_data.
163
- details: Display object ids and error specific information.
164
- id_lookup: A list or dict (int->str) containing textual object ids
165
-
166
- Returns:
167
- A human readable string representation of a set of errors.
168
- """
169
- if errors is None or len(errors) == 0:
170
- return f"{name}: OK"
171
- if isinstance(errors, dict):
172
- return "\n".join(errors_to_string(err, f"{name}, batch #{i}", details) for i, err in sorted(errors.items()))
173
- if len(errors) == 1 and not details:
174
- return f"There is a validation error in {name}:\n\t{errors[0]}"
175
- if len(errors) == 1:
176
- msg = f"There is a validation error in {name}:\n"
177
- else:
178
- msg = f"There are {len(errors)} validation errors in {name}:\n"
179
- if details:
180
- for error in errors:
181
- msg += "\n\t" + str(error) + "\n"
182
- msg += "".join(f"\t\t{k}: {v}\n" for k, v in error.get_context(id_lookup).items())
183
- else:
184
- msg += "\n".join(f"{i + 1:>4}. {err}" for i, err in enumerate(errors))
185
- return msg
186
-
187
-
188
- def _nan_type(component: ComponentType, field: str, data_type: DatasetType = DatasetType.input):
189
- """
190
- Helper function to retrieve the nan value for a certain field as defined in the power_grid_meta_data.
191
- """
192
- return power_grid_meta_data[data_type][component].nans[field]
193
-
194
-
195
- def _get_indexer(source: np.ndarray, target: np.ndarray, default_value: int | None = None) -> np.ndarray:
196
- """
197
- Given array of values from a source and a target dataset.
198
- Find the position of each value in the target dataset in the context of the source dataset.
199
- This is needed to update values in the dataset by id lookup.
200
- Internally this is done by sorting the input ids, then using binary search lookup.
201
-
202
- E.g.: Find the position of each id in an update (target) dataset in the input (source) dataset
203
-
204
- >>> input_ids = [1, 2, 3, 4, 5]
205
- >>> update_ids = [3]
206
- >>> assert _get_indexer(input_ids, update_ids) == np.array([2])
207
-
208
- Args:
209
- source: array of values in the source dataset
210
- target: array of values in the target dataset
211
- default_value (optional): the default index to provide for target values not in source
212
-
213
- Returns:
214
- np.ndarray: array of positions of the values from target dataset in the source dataset
215
- if default_value is None, (source[result] == target)
216
- else, ((source[result] == target) | (source[result] == default_value))
217
-
218
- Raises:
219
- IndexError: if default_value is None and there were values in target that were not in source
220
- """
221
-
222
- permutation_sort = np.argsort(source) # complexity O(N_input * logN_input)
223
- indices = np.searchsorted(source, target, sorter=permutation_sort) # complexity O(N_update * logN_input)
224
-
225
- if default_value is None:
226
- return permutation_sort[indices]
227
-
228
- if len(source) == 0:
229
- return np.full_like(target, fill_value=default_value)
230
-
231
- clipped_indices = np.take(permutation_sort, indices, mode="clip")
232
- return np.where(source[clipped_indices] == target, permutation_sort[clipped_indices], default_value)
233
-
234
-
235
- def _set_default_value(
236
- data: SingleDataset, component: ComponentType, field: str, default_value: int | float | np.ndarray
237
- ):
238
- """
239
- This function sets the default value in the data that is to be validated, so the default values are included in the
240
- validation.
241
-
242
- Args:
243
- data: The input/update data set for all components
244
- component: The component of interest
245
- field: The field of interest
246
- default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
247
- input validation, these default values should be included in the validation. It can be a fixed value for the
248
- entire column (int/float) or be different for each element (np.ndarray).
249
-
250
- Returns:
251
-
252
- """
253
- if np.isnan(_nan_type(component, field)):
254
- mask = np.isnan(data[component][field])
255
- else:
256
- mask = data[component][field] == _nan_type(component, field)
257
- if isinstance(default_value, np.ndarray):
258
- data[component][field][mask] = default_value[mask]
259
- else:
260
- data[component][field][mask] = default_value
261
-
262
-
263
- def _get_valid_ids(data: SingleDataset, ref_components: ComponentType | list[ComponentType]) -> list[int]:
264
- """
265
- This function returns the valid IDs specified by all ref_components
266
-
267
- Args:
268
- data: The input/update data set for all components
269
- ref_components: The component or components in which we want to look for ids
270
-
271
- Returns:
272
- list[int]: the list of valid IDs
273
- """
274
- # For convenience, ref_component may be a string and we'll convert it to a 'list' containing that string as it's
275
- # single element.
276
- if isinstance(ref_components, (str, ComponentType)):
277
- ref_components = cast(list[ComponentType], [ref_components])
278
-
279
- # Create a set of ids by chaining the ids of all ref_components
280
- valid_ids = set()
281
- for ref_component in ref_components:
282
- if ref_component in data:
283
- nan = _nan_type(ref_component, "id")
284
- if np.isnan(nan):
285
- mask = ~np.isnan(data[ref_component]["id"])
286
- else:
287
- mask = np.not_equal(data[ref_component]["id"], nan)
288
- valid_ids.update(data[ref_component]["id"][mask])
289
-
290
- return list(valid_ids)
291
-
292
-
293
- def _get_mask(data: SingleDataset, component: ComponentType, field: str, **filters: Any) -> np.ndarray:
294
- """
295
- Get a mask based on the specified filters. E.g. measured_terminal_type=MeasuredTerminalType.source.
296
-
297
- Args:
298
- data: The input/update data set for all components
299
- component: The component of interest
300
- field: The field of interest
301
- ref_components: The component or components in which we want to look for ids
302
- **filters: One or more filters on the dataset. E.
303
-
304
- Returns:
305
- np.ndarray: the mask
306
- """
307
- values = data[component][field]
308
- mask = np.ones(shape=values.shape, dtype=bool)
309
- for filter_field, filter_value in filters.items():
310
- mask = np.logical_and(mask, data[component][filter_field] == filter_value)
311
-
312
- return mask
1
+ # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ """
6
+ Utilities used for validation. Only errors_to_string() is intended for end users.
7
+ """
8
+
9
+ import re
10
+ from typing import Any, cast
11
+
12
+ import numpy as np
13
+
14
+ from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
15
+ from power_grid_model._core.power_grid_meta import power_grid_meta_data
16
+ from power_grid_model.data_types import SingleArray, SingleComponentData, SingleDataset
17
+ from power_grid_model.validation.errors import ValidationError
18
+
19
+
20
+ def _eval_expression(data: np.ndarray, expression: int | float | str) -> np.ndarray:
21
+ """
22
+ Wrapper function that checks the type of the 'expression'. If the expression is a string, it is assumed to be a
23
+ field expression and the expression is validated. Otherwise it is assumed to be a numerical value and the value
24
+ is casted to a numpy 'array'.
25
+
26
+ Args:
27
+ data: A numpy structured array
28
+ expression: A numerical value, or a string, representing a (combination of) field(s)
29
+
30
+ Returns: The number or an evaluation of the field name(s) in the data, always represented as a Numpy 'array'.
31
+
32
+ Examples:
33
+ 123 -> np.array(123)
34
+ 123.4 -> np.array(123.4)
35
+ 'value' -> data['value']
36
+ 'foo/bar' -> data['foo'] / data[bar]
37
+
38
+ """
39
+ if isinstance(expression, str):
40
+ return _eval_field_expression(data, expression)
41
+ return np.array(expression)
42
+
43
+
44
+ def _eval_field_expression(data: np.ndarray, expression: str) -> np.ndarray:
45
+ """
46
+ A field expression can either be the name of a field (e.g. 'field_x') in the data, or a ratio between two fields
47
+ (e.g. 'field_x / field_y'). The expression is checked on validity and then the fields are checked to be present in
48
+ the data. If the expression is a single field name, the field is returned. If it is a ratio, the ratio is
49
+ calculated and returned. Values divided by 0 will result in nan values without warning.
50
+
51
+ Args:
52
+ data: A numpy structured array
53
+ expression: A string, representing a (combination of) field(s)
54
+
55
+ Expression should be a combination of:
56
+ - field names (may contain lower case letters, numbers and underscores)
57
+ - a single mathematical operator /
58
+
59
+ Returns: An evaluation of the field name(s) in the data.
60
+
61
+ Examples:
62
+ 'value' -> data['value']
63
+ 'foo/bar' -> data['foo'] / data['bar']
64
+
65
+ """
66
+ # Validate the expression
67
+ match = re.fullmatch(r"[a-z][a-z0-9_]*(\s*/\s*[a-z][a-z0-9_]*)?", expression)
68
+ if not match:
69
+ raise ValueError(f"Invalid field expression '{expression}'")
70
+
71
+ if data.dtype.names is None:
72
+ raise ValueError("No attributes available in meta")
73
+
74
+ # Find all field names and check if they exist in the dataset
75
+ fields = [f.strip() for f in expression.split("/")]
76
+ for field in fields:
77
+ if field not in data.dtype.names:
78
+ raise KeyError(f"Invalid field name {field}")
79
+
80
+ if len(fields) == 1:
81
+ return data[fields[0]]
82
+
83
+ max_num_fields = 2
84
+ if len(fields) != max_num_fields:
85
+ raise ValueError(f"There should be exactly {max_num_fields} fields, got {len(fields)} fields")
86
+ zero_div = np.logical_or(np.equal(data[fields[1]], 0.0), np.logical_not(np.isfinite(data[fields[1]])))
87
+ if np.any(zero_div):
88
+ result = np.full_like(data[fields[0]], np.nan)
89
+ np.true_divide(data[fields[0]], data[fields[1]], out=result, where=~zero_div)
90
+ return result
91
+ return np.true_divide(data[fields[0]], data[fields[1]])
92
+
93
+
94
+ def _update_input_data(input_data: SingleDataset, update_data: SingleDataset):
95
+ """
96
+ Update the input data using the available non-nan values in the update data.
97
+ """
98
+
99
+ merged_data = {component: array.copy() for component, array in input_data.items()}
100
+ for component in update_data:
101
+ _update_component_data(component, merged_data[component], update_data[component])
102
+ return merged_data
103
+
104
+
105
+ def _update_component_data(
106
+ component: ComponentType, input_data: SingleComponentData, update_data: SingleComponentData
107
+ ) -> None:
108
+ """
109
+ Update the data in a single component data set, with another single component data set,
110
+ indexed on the "id" field and only non-NaN values are overwritten.
111
+ """
112
+ if isinstance(input_data, np.ndarray) and isinstance(update_data, np.ndarray):
113
+ return _update_component_array_data(component=component, input_data=input_data, update_data=update_data)
114
+
115
+ raise NotImplementedError # TODO(mgovers): add support for columnar data
116
+
117
+
118
+ def _update_component_array_data(component: ComponentType, input_data: SingleArray, update_data: SingleArray) -> None:
119
+ """
120
+ Update the data in a numpy array, with another numpy array,
121
+ indexed on the "id" field and only non-NaN values are overwritten.
122
+ """
123
+ batch_dataset_ndim = 2
124
+ if update_data.dtype.names is None:
125
+ raise ValueError("Invalid data format")
126
+
127
+ optional_ids_active = (
128
+ "id" in update_data.dtype.names
129
+ and np.all(update_data["id"] == np.iinfo(update_data["id"].dtype).min)
130
+ and len(update_data["id"]) == len(input_data["id"])
131
+ )
132
+ update_data_ids = input_data["id"] if optional_ids_active else update_data["id"]
133
+
134
+ for field in update_data.dtype.names:
135
+ if field == "id":
136
+ continue
137
+ nan = _nan_type(component, field, DatasetType.update)
138
+ mask = ~np.isnan(update_data[field]) if np.isnan(nan) else np.not_equal(update_data[field], nan)
139
+
140
+ if mask.ndim == batch_dataset_ndim:
141
+ for phase in range(mask.shape[1]):
142
+ # find indexers of to-be-updated object
143
+ sub_mask = mask[:, phase]
144
+ idx = _get_indexer(input_data["id"], update_data_ids[sub_mask])
145
+ # update
146
+ input_data[field][idx, phase] = update_data[field][sub_mask, phase]
147
+ else:
148
+ # find indexers of to-be-updated object
149
+ idx = _get_indexer(input_data["id"], update_data_ids[mask])
150
+ # update
151
+ input_data[field][idx] = update_data[field][mask]
152
+
153
+
154
+ def errors_to_string(
155
+ errors: list[ValidationError] | dict[int, list[ValidationError]] | None,
156
+ name: str = "the data",
157
+ details: bool = False,
158
+ id_lookup: list[str] | dict[int, str] | None = None,
159
+ ) -> str:
160
+ """
161
+ Convert a set of errors (list or dict) to a human readable string representation.
162
+
163
+ Args:
164
+ errors: The error objects. List for input_data only, dict for batch data.
165
+ name: Human understandable name of the dataset, e.g. input_data, or update_data.
166
+ details: Display object ids and error specific information.
167
+ id_lookup: A list or dict (int->str) containing textual object ids
168
+
169
+ Returns:
170
+ A human readable string representation of a set of errors.
171
+ """
172
+ if errors is None or len(errors) == 0:
173
+ return f"{name}: OK"
174
+ if isinstance(errors, dict):
175
+ return "\n".join(errors_to_string(err, f"{name}, batch #{i}", details) for i, err in sorted(errors.items()))
176
+ if len(errors) == 1 and not details:
177
+ return f"There is a validation error in {name}:\n\t{errors[0]}"
178
+ if len(errors) == 1:
179
+ msg = f"There is a validation error in {name}:\n"
180
+ else:
181
+ msg = f"There are {len(errors)} validation errors in {name}:\n"
182
+ if details:
183
+ for error in errors:
184
+ msg += "\n\t" + str(error) + "\n"
185
+ msg += "".join(f"\t\t{k}: {v}\n" for k, v in error.get_context(id_lookup).items())
186
+ else:
187
+ msg += "\n".join(f"{i + 1:>4}. {err}" for i, err in enumerate(errors))
188
+ return msg
189
+
190
+
191
+ def _nan_type(component: ComponentType, field: str, data_type: DatasetType = DatasetType.input):
192
+ """Helper function to retrieve the nan value for a certain field as defined in the power_grid_meta_data."""
193
+ return power_grid_meta_data[data_type][component].nans[field]
194
+
195
+
196
+ def _get_indexer(source: np.ndarray, target: np.ndarray, default_value: int | None = None) -> np.ndarray:
197
+ """
198
+ Given array of values from a source and a target dataset.
199
+ Find the position of each value in the target dataset in the context of the source dataset.
200
+ This is needed to update values in the dataset by id lookup.
201
+ Internally this is done by sorting the input ids, then using binary search lookup.
202
+
203
+ E.g.: Find the position of each id in an update (target) dataset in the input (source) dataset
204
+
205
+ >>> input_ids = [1, 2, 3, 4, 5]
206
+ >>> update_ids = [3]
207
+ >>> assert _get_indexer(input_ids, update_ids) == np.array([2])
208
+
209
+ Args:
210
+ source: array of values in the source dataset
211
+ target: array of values in the target dataset
212
+ default_value (optional): the default index to provide for target values not in source
213
+
214
+ Returns:
215
+ np.ndarray: array of positions of the values from target dataset in the source dataset
216
+ if default_value is None, (source[result] == target)
217
+ else, ((source[result] == target) | (source[result] == default_value))
218
+
219
+ Raises:
220
+ IndexError: if default_value is None and there were values in target that were not in source
221
+ """
222
+
223
+ permutation_sort = np.argsort(source) # complexity O(N_input * logN_input)
224
+ indices = np.searchsorted(source, target, sorter=permutation_sort) # complexity O(N_update * logN_input)
225
+
226
+ if default_value is None:
227
+ return permutation_sort[indices]
228
+
229
+ if len(source) == 0:
230
+ return np.full_like(target, fill_value=default_value)
231
+
232
+ clipped_indices = np.take(permutation_sort, indices, mode="clip")
233
+ return np.where(source[clipped_indices] == target, permutation_sort[clipped_indices], default_value)
234
+
235
+
236
+ def _set_default_value(
237
+ data: SingleDataset, component: ComponentType, field: str, default_value: int | float | np.ndarray
238
+ ):
239
+ """
240
+ This function sets the default value in the data that is to be validated, so the default values are included in the
241
+ validation.
242
+
243
+ Args:
244
+ data: The input/update data set for all components
245
+ component: The component of interest
246
+ field: The field of interest
247
+ default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
248
+ input validation, these default values should be included in the validation. It can be a fixed value for the
249
+ entire column (int/float) or be different for each element (np.ndarray).
250
+
251
+ Returns:
252
+
253
+ """
254
+ if np.isnan(_nan_type(component, field)):
255
+ mask = np.isnan(data[component][field])
256
+ else:
257
+ mask = data[component][field] == _nan_type(component, field)
258
+ if isinstance(default_value, np.ndarray):
259
+ data[component][field][mask] = default_value[mask]
260
+ else:
261
+ data[component][field][mask] = default_value
262
+
263
+
264
+ def _get_valid_ids(data: SingleDataset, ref_components: ComponentType | list[ComponentType]) -> list[int]:
265
+ """
266
+ This function returns the valid IDs specified by all ref_components.
267
+
268
+ Args:
269
+ data: The input/update data set for all components
270
+ ref_components: The component or components in which we want to look for ids
271
+
272
+ Returns:
273
+ list[int]: the list of valid IDs
274
+ """
275
+ # For convenience, ref_component may be a string and we'll convert it to a 'list' containing that string as it's
276
+ # single element.
277
+ if isinstance(ref_components, (str, ComponentType)):
278
+ ref_components = cast(list[ComponentType], [ref_components])
279
+
280
+ # Create a set of ids by chaining the ids of all ref_components
281
+ valid_ids = set()
282
+ for ref_component in ref_components:
283
+ if ref_component in data:
284
+ nan = _nan_type(ref_component, "id")
285
+ if np.isnan(nan):
286
+ mask = ~np.isnan(data[ref_component]["id"])
287
+ else:
288
+ mask = np.not_equal(data[ref_component]["id"], nan)
289
+ valid_ids.update(data[ref_component]["id"][mask])
290
+
291
+ return list(valid_ids)
292
+
293
+
294
+ def _get_mask(data: SingleDataset, component: ComponentType, field: str, **filters: Any) -> np.ndarray:
295
+ """
296
+ Get a mask based on the specified filters. E.g. measured_terminal_type=MeasuredTerminalType.source.
297
+
298
+ Args:
299
+ data: The input/update data set for all components
300
+ component: The component of interest
301
+ field: The field of interest
302
+ ref_components: The component or components in which we want to look for ids
303
+ **filters: One or more filters on the dataset. E.
304
+
305
+ Returns:
306
+ np.ndarray: the mask
307
+ """
308
+ values = data[component][field]
309
+ mask = np.ones(shape=values.shape, dtype=bool)
310
+ for filter_field, filter_value in filters.items():
311
+ mask = np.logical_and(mask, data[component][filter_field] == filter_value)
312
+
313
+ return mask