power-grid-model 1.10.74__py3-none-win_amd64.whl → 1.12.119__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of power-grid-model might be problematic. Click here for more details.

Files changed (67) hide show
  1. power_grid_model/__init__.py +54 -29
  2. power_grid_model/_core/__init__.py +3 -3
  3. power_grid_model/_core/buffer_handling.py +507 -478
  4. power_grid_model/_core/data_handling.py +195 -141
  5. power_grid_model/_core/data_types.py +142 -0
  6. power_grid_model/_core/dataset_definitions.py +109 -109
  7. power_grid_model/_core/enum.py +226 -0
  8. power_grid_model/_core/error_handling.py +215 -202
  9. power_grid_model/_core/errors.py +134 -0
  10. power_grid_model/_core/index_integer.py +17 -17
  11. power_grid_model/_core/options.py +71 -69
  12. power_grid_model/_core/power_grid_core.py +577 -597
  13. power_grid_model/_core/power_grid_dataset.py +545 -528
  14. power_grid_model/_core/power_grid_meta.py +262 -244
  15. power_grid_model/_core/power_grid_model.py +1025 -692
  16. power_grid_model/_core/power_grid_model_c/__init__.py +3 -0
  17. power_grid_model/_core/power_grid_model_c/bin/power_grid_model_c.dll +0 -0
  18. power_grid_model/_core/power_grid_model_c/get_pgm_dll_path.py +63 -0
  19. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/basics.h +251 -0
  20. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/buffer.h +108 -0
  21. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset.h +332 -0
  22. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset_definitions.h +1060 -0
  23. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/handle.h +111 -0
  24. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/meta_data.h +189 -0
  25. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/model.h +130 -0
  26. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/options.h +142 -0
  27. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/serialization.h +118 -0
  28. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c.h +36 -0
  29. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/basics.hpp +65 -0
  30. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/buffer.hpp +61 -0
  31. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/dataset.hpp +224 -0
  32. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/handle.hpp +108 -0
  33. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/meta_data.hpp +84 -0
  34. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/model.hpp +63 -0
  35. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/options.hpp +52 -0
  36. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/serialization.hpp +124 -0
  37. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/utils.hpp +81 -0
  38. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp.hpp +19 -0
  39. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfig.cmake +37 -0
  40. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfigVersion.cmake +65 -0
  41. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets-release.cmake +19 -0
  42. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets.cmake +144 -0
  43. power_grid_model/_core/power_grid_model_c/lib/power_grid_model_c.lib +0 -0
  44. power_grid_model/_core/power_grid_model_c/share/LICENSE +292 -0
  45. power_grid_model/_core/power_grid_model_c/share/README.md +15 -0
  46. power_grid_model/_core/serialization.py +319 -317
  47. power_grid_model/_core/typing.py +20 -0
  48. power_grid_model/{_utils.py → _core/utils.py} +798 -783
  49. power_grid_model/data_types.py +321 -319
  50. power_grid_model/enum.py +27 -214
  51. power_grid_model/errors.py +37 -123
  52. power_grid_model/typing.py +43 -48
  53. power_grid_model/utils.py +529 -400
  54. power_grid_model/validation/__init__.py +25 -14
  55. power_grid_model/validation/_rules.py +1167 -904
  56. power_grid_model/validation/_validation.py +1172 -980
  57. power_grid_model/validation/assertions.py +93 -92
  58. power_grid_model/validation/errors.py +602 -520
  59. power_grid_model/validation/utils.py +313 -318
  60. {power_grid_model-1.10.74.dist-info → power_grid_model-1.12.119.dist-info}/METADATA +162 -171
  61. power_grid_model-1.12.119.dist-info/RECORD +65 -0
  62. {power_grid_model-1.10.74.dist-info → power_grid_model-1.12.119.dist-info}/WHEEL +1 -1
  63. power_grid_model-1.12.119.dist-info/entry_points.txt +3 -0
  64. power_grid_model/_core/_power_grid_core.dll +0 -0
  65. power_grid_model-1.10.74.dist-info/RECORD +0 -32
  66. power_grid_model-1.10.74.dist-info/top_level.txt +0 -1
  67. {power_grid_model-1.10.74.dist-info → power_grid_model-1.12.119.dist-info}/licenses/LICENSE +0 -0
@@ -1,904 +1,1167 @@
1
- # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """
6
- This module contains a set of comparison rules. They all share the same (or similar) logic and interface.
7
-
8
- In general each function checks the values in a single 'column' (i.e. field) of a numpy structured array and it
9
- returns an error object containing the component, the field and the ids of the records that did not match the rule.
10
- E.g. all_greater_than_zero(data, 'node', 'u_rated') returns a NotGreaterThanError if any of the node's `u_rated`
11
- values are 0 or less.
12
-
13
- In general, the rules are designed to ignore NaN values, except for none_missing() which explicitly checks for NaN
14
- values in the entire data set. It is important to understand that np.less_equal(x) yields different results than
15
- np.logical_not(np.greater(x)) as a NaN comparison always results in False. The most extreme example is that even
16
- np.nan == np.nan yields False.
17
-
18
- np.less_equal( [0.1, 0.2, 0.3, np.nan], 0.0) = [False, False, False, False] -> OK
19
- np.logical_not(np.greater([0.1, 0.2, 0.3, np.nan], 0.0)) = [False, False, False, True] -> Error (false positive)
20
-
21
- Input data:
22
-
23
- data: SingleDataset
24
- The entire input/update data set
25
-
26
- component: ComponentType
27
- The name of the component, which should be an existing key in the data
28
-
29
- field: str
30
- The name of the column, which should be an field in the component data (numpy structured array)
31
-
32
- Output data:
33
- errors: list[ValidationError]
34
- A list containing errors; in case of success, `errors` is the empty list: [].
35
-
36
- """
37
- from enum import Enum
38
- from typing import Any, Callable, Type, TypeVar
39
-
40
- import numpy as np
41
-
42
- from power_grid_model import ComponentType
43
- from power_grid_model._utils import get_comp_size, is_nan_or_default
44
- from power_grid_model.data_types import SingleDataset
45
- from power_grid_model.enum import FaultPhase, FaultType, WindingType
46
- from power_grid_model.validation.errors import (
47
- ComparisonError,
48
- FaultPhaseError,
49
- IdNotInDatasetError,
50
- InfinityError,
51
- InvalidAssociatedEnumValueError,
52
- InvalidEnumValueError,
53
- InvalidIdError,
54
- MissingValueError,
55
- MultiComponentNotUniqueError,
56
- NotBetweenError,
57
- NotBetweenOrAtError,
58
- NotBooleanError,
59
- NotGreaterOrEqualError,
60
- NotGreaterThanError,
61
- NotIdenticalError,
62
- NotLessOrEqualError,
63
- NotLessThanError,
64
- NotUniqueError,
65
- PQSigmaPairError,
66
- SameValueError,
67
- TransformerClockError,
68
- TwoValuesZeroError,
69
- ValidationError,
70
- )
71
- from power_grid_model.validation.utils import _eval_expression, _get_mask, _get_valid_ids, _nan_type, _set_default_value
72
-
73
- Error = TypeVar("Error", bound=ValidationError)
74
- CompError = TypeVar("CompError", bound=ComparisonError)
75
-
76
-
77
- def all_greater_than_zero(data: SingleDataset, component: ComponentType, field: str) -> list[NotGreaterThanError]:
78
- """
79
- Check that for all records of a particular type of component, the values in the 'field' column are greater than
80
- zero. Returns an empty list on success, or a list containing a single error object on failure.
81
-
82
- Args:
83
- data (SingleDataset): The input/update data set for all components
84
- component (ComponentType): The component of interest
85
- field (str): The field of interest
86
-
87
- Returns:
88
- A list containing zero or one NotGreaterThanErrors, listing all ids where the value in the field of interest
89
- was zero or less.
90
- """
91
- return all_greater_than(data, component, field, 0.0)
92
-
93
-
94
- def all_greater_than_or_equal_to_zero(
95
- data: SingleDataset,
96
- component: ComponentType,
97
- field: str,
98
- default_value: np.ndarray | int | float | None = None,
99
- ) -> list[NotGreaterOrEqualError]:
100
- """
101
- Check that for all records of a particular type of component, the values in the 'field' column are greater than,
102
- or equal to zero. Returns an empty list on success, or a list containing a single error object on failure.
103
-
104
- Args:
105
- data (SingleDataset): The input/update data set for all components
106
- component (ComponentType) The component of interest
107
- field (str): The field of interest
108
- default_value (np.ndarray | int | float | None, optional): Some values are not required, but will
109
- receive a default value in the C++ core. To do a proper input validation, these default values should be
110
- included in the validation. It can be a fixed value for the entire column (int/float) or be different for
111
- each element (np.ndarray).
112
-
113
- Returns:
114
- A list containing zero or one NotGreaterOrEqualErrors, listing all ids where the value in the field of
115
- interest was less than zero.
116
- """
117
- return all_greater_or_equal(data, component, field, 0.0, default_value)
118
-
119
-
120
- def all_greater_than(
121
- data: SingleDataset, component: ComponentType, field: str, ref_value: int | float | str
122
- ) -> list[NotGreaterThanError]:
123
- """
124
- Check that for all records of a particular type of component, the values in the 'field' column are greater than
125
- the reference value. Returns an empty list on success, or a list containing a single error object on failure.
126
-
127
- Args:
128
- data: The input/update data set for all components
129
- component: The component of interest
130
- field: The field of interest
131
- ref_value: The reference value against which all values in the 'field' column are compared. If the reference
132
- value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a ratio between
133
- two fields (e.g. 'field_x / field_y')
134
-
135
- Returns:
136
- A list containing zero or one NotGreaterThanErrors, listing all ids where the value in the field of interest
137
- was less than, or equal to, the ref_value.
138
- """
139
-
140
- def not_greater(val: np.ndarray, *ref: np.ndarray):
141
- return np.less_equal(val, *ref)
142
-
143
- return none_match_comparison(data, component, field, not_greater, ref_value, NotGreaterThanError)
144
-
145
-
146
- def all_greater_or_equal(
147
- data: SingleDataset,
148
- component: ComponentType,
149
- field: str,
150
- ref_value: int | float | str,
151
- default_value: np.ndarray | int | float | None = None,
152
- ) -> list[NotGreaterOrEqualError]:
153
- """
154
- Check that for all records of a particular type of component, the values in the 'field' column are greater than,
155
- or equal to the reference value. Returns an empty list on success, or a list containing a single error object on
156
- failure.
157
-
158
- Args:
159
- data: The input/update data set for all components
160
- component: The component of interest
161
- field: The field of interest
162
- ref_value: The reference value against which all values in the 'field' column are compared. If the reference
163
- value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a ratio between
164
- two fields (e.g. 'field_x / field_y')
165
- default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
166
- input validation, these default values should be included in the validation. It can be a fixed value for the
167
- entire column (int/float) or be different for each element (np.ndarray).
168
-
169
- Returns:
170
- A list containing zero or one NotGreaterOrEqualErrors, listing all ids where the value in the field of
171
- interest was less than the ref_value.
172
-
173
- """
174
-
175
- def not_greater_or_equal(val: np.ndarray, *ref: np.ndarray):
176
- return np.less(val, *ref)
177
-
178
- return none_match_comparison(
179
- data, component, field, not_greater_or_equal, ref_value, NotGreaterOrEqualError, default_value
180
- )
181
-
182
-
183
- def all_less_than(
184
- data: SingleDataset, component: ComponentType, field: str, ref_value: int | float | str
185
- ) -> list[NotLessThanError]:
186
- """
187
- Check that for all records of a particular type of component, the values in the 'field' column are less than the
188
- reference value. Returns an empty list on success, or a list containing a single error object on failure.
189
-
190
- Args:
191
- data: The input/update data set for all components
192
- component: The component of interest
193
- field: The field of interest
194
- ref_value: The reference value against which all values in the 'field' column are compared. If the reference
195
- value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a ratio between
196
- two fields (e.g. 'field_x / field_y')
197
-
198
- Returns:
199
- A list containing zero or one NotLessThanErrors, listing all ids where the value in the field of interest was
200
- greater than, or equal to, the ref_value.
201
- """
202
-
203
- def not_less(val: np.ndarray, *ref: np.ndarray):
204
- return np.greater_equal(val, *ref)
205
-
206
- return none_match_comparison(data, component, field, not_less, ref_value, NotLessThanError)
207
-
208
-
209
- def all_less_or_equal(
210
- data: SingleDataset, component: ComponentType, field: str, ref_value: int | float | str
211
- ) -> list[NotLessOrEqualError]:
212
- """
213
- Check that for all records of a particular type of component, the values in the 'field' column are less than,
214
- or equal to the reference value. Returns an empty list on success, or a list containing a single error object on
215
- failure.
216
-
217
- Args:
218
- data: The input/update data set for all components
219
- component: The component of interest
220
- field: The field of interest
221
- ref_value: The reference value against which all values in the 'field' column are compared. If the reference
222
- value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a ratio between
223
- two fields (e.g. 'field_x / field_y')
224
-
225
- Returns:
226
- A list containing zero or one NotLessOrEqualErrors, listing all ids where the value in the field of interest was
227
- greater than the ref_value.
228
-
229
- """
230
-
231
- def not_less_or_equal(val: np.ndarray, *ref: np.ndarray):
232
- return np.greater(val, *ref)
233
-
234
- return none_match_comparison(data, component, field, not_less_or_equal, ref_value, NotLessOrEqualError)
235
-
236
-
237
- # pylint: disable=too-many-arguments
238
- def all_between( # pylint: disable=too-many-positional-arguments
239
- data: SingleDataset,
240
- component: ComponentType,
241
- field: str,
242
- ref_value_1: int | float | str,
243
- ref_value_2: int | float | str,
244
- default_value: np.ndarray | int | float | None = None,
245
- ) -> list[NotBetweenError]:
246
- """
247
- Check that for all records of a particular type of component, the values in the 'field' column are (exclusively)
248
- between reference value 1 and 2. Value 1 may be smaller, but also larger than value 2. Returns an empty list on
249
- success, or a list containing a single error object on failure.
250
-
251
- Args:
252
- data: The input/update data set for all components
253
- component: The component of interest
254
- field: The field of interest
255
- ref_value_1: The first reference value against which all values in the 'field' column are compared. If the
256
- reference value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a
257
- ratio between two fields (e.g. 'field_x / field_y')
258
- ref_value_2: The second reference value against which all values in the 'field' column are compared. If the
259
- reference value is a string, it is assumed to be another field (e.g. 'field_x') of the same component,
260
- or a ratio between two fields (e.g. 'field_x / field_y')
261
- default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
262
- input validation, these default values should be included in the validation. It can be a fixed value for the
263
- entire column (int/float) or be different for each element (np.ndarray).
264
-
265
- Returns:
266
- A list containing zero or one NotBetweenErrors, listing all ids where the value in the field of interest was
267
- outside the range defined by the reference values.
268
- """
269
-
270
- def outside(val: np.ndarray, *ref: np.ndarray) -> np.ndarray:
271
- return np.logical_or(np.less_equal(val, np.minimum(*ref)), np.greater_equal(val, np.maximum(*ref)))
272
-
273
- return none_match_comparison(
274
- data, component, field, outside, (ref_value_1, ref_value_2), NotBetweenError, default_value
275
- )
276
-
277
-
278
- # pylint: disable=too-many-arguments
279
- def all_between_or_at( # pylint: disable=too-many-positional-arguments
280
- data: SingleDataset,
281
- component: ComponentType,
282
- field: str,
283
- ref_value_1: int | float | str,
284
- ref_value_2: int | float | str,
285
- default_value_1: np.ndarray | int | float | None = None,
286
- default_value_2: np.ndarray | int | float | None = None,
287
- ) -> list[NotBetweenOrAtError]:
288
- """
289
- Check that for all records of a particular type of component, the values in the 'field' column are inclusively
290
- between reference value 1 and 2. Value 1 may be smaller, but also larger than value 2. Returns an empty list on
291
- success, or a list containing a single error object on failure.
292
-
293
- Args:
294
- data: The input/update data set for all components
295
- component: The component of interest
296
- field: The field of interest
297
- ref_value_1: The first reference value against which all values in the 'field' column are compared. If the
298
- reference value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a
299
- ratio between two fields (e.g. 'field_x / field_y')
300
- ref_value_2: The second reference value against which all values in the 'field' column are compared. If the
301
- reference value is a string, it is assumed to be another field (e.g. 'field_x') of the same component,
302
- or a ratio between two fields (e.g. 'field_x / field_y')
303
- default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
304
- input validation, these default values should be included in the validation. It can be a fixed value for the
305
- entire column (int/float) or be different for each element (np.ndarray).
306
- default_value_2: Some values can have a double default: the default will be set to another attribute of the
307
- component, but if that attribute is missing, the default will be set to a fixed value.
308
-
309
- Returns:
310
- A list containing zero or one NotBetweenOrAtErrors, listing all ids where the value in the field of interest was
311
- outside the range defined by the reference values.
312
- """
313
-
314
- def outside(val: np.ndarray, *ref: np.ndarray) -> np.ndarray:
315
- return np.logical_or(np.less(val, np.minimum(*ref)), np.greater(val, np.maximum(*ref)))
316
-
317
- return none_match_comparison(
318
- data,
319
- component,
320
- field,
321
- outside,
322
- (ref_value_1, ref_value_2),
323
- NotBetweenOrAtError,
324
- default_value_1,
325
- default_value_2,
326
- )
327
-
328
-
329
- def none_match_comparison( # pylint: disable=too-many-arguments
330
- data: SingleDataset,
331
- component: ComponentType,
332
- field: str,
333
- compare_fn: Callable,
334
- ref_value: ComparisonError.RefType,
335
- error: Type[CompError] = ComparisonError, # type: ignore
336
- default_value_1: np.ndarray | int | float | None = None,
337
- default_value_2: np.ndarray | int | float | None = None,
338
- ) -> list[CompError]:
339
- # pylint: disable=too-many-positional-arguments
340
- """
341
- For all records of a particular type of component, check if the value in the 'field' column match the comparison.
342
- Returns an empty list if none of the value match the comparison, or a list containing a single error object when at
343
- the value in 'field' of at least one record matches the comparison.
344
-
345
- Args:
346
- data: The input/update data set for all components
347
- component: The component of interest
348
- field: The field of interest
349
- compare_fn: A function that takes the data in the 'field' column, and any number of reference values
350
- ref_value: A reference value, or a tuple of reference values, against which all values in the 'field' column
351
- are compared using the compare_fn. If a reference value is a string, it is assumed to be another field
352
- (e.g. 'field_x') of the same component, or a ratio between two fields (e.g. 'field_x / field_y')
353
- error: The type (class) of error that should be returned in case any of the values match the comparison.
354
- default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
355
- input validation, these default values should be included in the validation. It can be a fixed value for the
356
- entire column (int/float) or be different for each element (np.ndarray).
357
- default_value_2: Some values can have a double default: the default will be set to another attribute of the
358
- component, but if that attribute is missing, the default will be set to a fixed value.
359
-
360
- Returns:
361
- A list containing zero or one comparison errors (should be a subclass of ComparisonError), listing all ids
362
- where the value in the field of interest matched the comparison.
363
- """
364
- if default_value_1 is not None:
365
- _set_default_value(data=data, component=component, field=field, default_value=default_value_1)
366
- if default_value_2 is not None:
367
- _set_default_value(data=data, component=component, field=field, default_value=default_value_2)
368
- component_data = data[component]
369
- if not isinstance(component_data, np.ndarray):
370
- raise NotImplementedError() # TODO(mgovers): add support for columnar data
371
-
372
- if isinstance(ref_value, tuple):
373
- ref = tuple(_eval_expression(component_data, v) for v in ref_value)
374
- else:
375
- ref = (_eval_expression(component_data, ref_value),)
376
- matches = compare_fn(component_data[field], *ref)
377
- if matches.any():
378
- if matches.ndim > 1:
379
- matches = matches.any(axis=1)
380
- ids = component_data["id"][matches].flatten().tolist()
381
- return [error(component, field, ids, ref_value)]
382
- return []
383
-
384
-
385
- def all_identical(data: SingleDataset, component: ComponentType, field: str) -> list[NotIdenticalError]:
386
- """
387
- Check that for all records of a particular type of component, the values in the 'field' column are identical.
388
-
389
- Args:
390
- data (SingleDataset): The input/update data set for all components
391
- component (str): The component of interest
392
- field (str): The field of interest
393
-
394
- Returns:
395
- A list containing zero or one NotIdenticalError, listing all ids of that component if the value in the field
396
- of interest was not identical across all components, all values for those ids, the set of unique values in
397
- that field and the number of unique values in that field.
398
- """
399
- field_data = data[component][field]
400
- if len(field_data) > 0:
401
- first = field_data[0]
402
- if np.any(field_data != first):
403
- return [NotIdenticalError(component, field, data[component]["id"], list(field_data))]
404
-
405
- return []
406
-
407
-
408
- def all_enabled_identical(
409
- data: SingleDataset, component: ComponentType, field: str, status_field: str
410
- ) -> list[NotIdenticalError]:
411
- """
412
- Check that for all records of a particular type of component, the values in the 'field' column are identical.
413
- Only entries are checked where the 'status' field is not 0.
414
-
415
- Args:
416
- data (SingleDataset): The input/update data set for all components
417
- component (str): The component of interest
418
- field (str): The field of interest
419
- status_field (str): The status field based on which to decide whether a component is enabled
420
-
421
- Returns:
422
- A list containing zero or one NotIdenticalError, listing:
423
-
424
- - all ids of enabled components if the value in the field of interest was not identical across all enabled
425
- components
426
- - all values of the 'field' column for enabled components (including duplications)
427
- - the set of unique such values
428
- - the amount of unique such values.
429
- """
430
- return all_identical(
431
- {key: (value if key is not component else value[value[status_field] != 0]) for key, value in data.items()},
432
- component,
433
- field,
434
- )
435
-
436
-
437
- def all_unique(data: SingleDataset, component: ComponentType, field: str) -> list[NotUniqueError]:
438
- """
439
- Check that for all records of a particular type of component, the values in the 'field' column are unique within
440
- the 'field' column of that component.
441
-
442
- Args:
443
- data (SingleDataset): The input/update data set for all components
444
- component (str): The component of interest
445
- field (str): The field of interest
446
-
447
- Returns:
448
- A list containing zero or one NotUniqueError, listing all ids where the value in the field of interest was
449
- not unique. If the field name was 'id' (a very common check), the id is added as many times as it occurred in
450
- the 'id' column, to maintain object counts.
451
- """
452
- field_data = data[component][field]
453
- _, inverse, counts = np.unique(field_data, return_inverse=True, return_counts=True)
454
- if any(counts != 1):
455
- ids = data[component]["id"][(counts != 1)[inverse]].flatten().tolist()
456
- return [NotUniqueError(component, field, ids)]
457
- return []
458
-
459
-
460
- def all_cross_unique(
461
- data: SingleDataset, fields: list[tuple[ComponentType, str]], cross_only=True
462
- ) -> list[MultiComponentNotUniqueError]:
463
- """
464
- Check that for all records of a particular type of component, the values in the 'field' column are unique within
465
- the 'field' column of that component.
466
-
467
- Args:
468
- data (SingleDataset): The input/update data set for all components
469
- fields (list[tuple[str, str]]): The fields of interest, formatted as
470
- [(component_1, field_1), (component_2, field_2)]
471
- cross_only (bool, optional): Do not include duplicates within a single field. It is advised that you use
472
- all_unique() to explicitly check uniqueness within a single field.
473
-
474
- Returns:
475
- A list containing zero or one MultiComponentNotUniqueError, listing all fields and ids where the value was not
476
- unique between the fields.
477
- """
478
- all_values: dict[int, list[tuple[tuple[ComponentType, str], int]]] = {}
479
- duplicate_ids = set()
480
- for component, field in fields:
481
- for obj_id, value in zip(data[component]["id"], data[component][field]):
482
- component_id = ((component, field), obj_id)
483
- if value not in all_values:
484
- all_values[value] = []
485
- elif not cross_only or not all(f == (component, field) for f, _ in all_values[value]):
486
- duplicate_ids.update(all_values[value])
487
- duplicate_ids.add(component_id)
488
- all_values[value].append(component_id)
489
- if duplicate_ids:
490
- fields_with_duplicated_ids = {f for f, _ in duplicate_ids}
491
- ids_with_duplicated_ids = {(c, i) for (c, _), i in duplicate_ids}
492
- return [MultiComponentNotUniqueError(list(fields_with_duplicated_ids), list(ids_with_duplicated_ids))]
493
- return []
494
-
495
-
496
- def all_valid_enum_values(
497
- data: SingleDataset, component: ComponentType, field: str, enum: Type[Enum] | list[Type[Enum]]
498
- ) -> list[InvalidEnumValueError]:
499
- """
500
- Check that for all records of a particular type of component, the values in the 'field' column are valid values for
501
- the supplied enum class. Returns an empty list on success, or a list containing a single error object on failure.
502
-
503
- Args:
504
- data (SingleDataset): The input/update data set for all components
505
- component (ComponentType): The component of interest
506
- field (str): The field of interest
507
- enum (Type[Enum] | list[Type[Enum]]): The enum type to validate against, or a list of such enum types
508
-
509
- Returns:
510
- A list containing zero or one InvalidEnumValueError, listing all ids where the value in the field of interest
511
- was not a valid value in the supplied enum type.
512
- """
513
- enums: list[Type[Enum]] = enum if isinstance(enum, list) else [enum]
514
-
515
- valid = {_nan_type(component, field)}
516
- for enum_type in enums:
517
- valid.update(list(enum_type))
518
-
519
- invalid = np.isin(data[component][field], np.array(list(valid), dtype=np.int8), invert=True)
520
- if invalid.any():
521
- ids = data[component]["id"][invalid].flatten().tolist()
522
- return [InvalidEnumValueError(component, field, ids, enum)]
523
- return []
524
-
525
-
526
- # pylint: disable=too-many-arguments
527
- def all_valid_associated_enum_values( # pylint: disable=too-many-positional-arguments
528
- data: SingleDataset,
529
- component: ComponentType,
530
- field: str,
531
- ref_object_id_field: str,
532
- ref_components: list[ComponentType],
533
- enum: Type[Enum] | list[Type[Enum]],
534
- **filters: Any,
535
- ) -> list[InvalidAssociatedEnumValueError]:
536
- """
537
- Args:
538
- data (SingleDataset): The input/update data set for all components
539
- component (ComponentType): The component of interest
540
- field (str): The field of interest
541
- ref_object_id_field (str): The field that contains the referenced component ids
542
- ref_components (list[ComponentType]): The component or components in which we want to look for ids
543
- enum (Type[Enum] | list[Type[Enum]]): The enum type to validate against, or a list of such enum types
544
- **filters: One or more filters on the dataset. E.g. regulated_object="transformer".
545
-
546
- Returns:
547
- A list containing zero or one InvalidAssociatedEnumValueError, listing all ids where the value in the field
548
- of interest was not a valid value in the supplied enum type.
549
- """
550
- enums: list[Type[Enum]] = enum if isinstance(enum, list) else [enum]
551
-
552
- valid_ids = _get_valid_ids(data=data, ref_components=ref_components)
553
- mask = np.logical_and(
554
- _get_mask(data=data, component=component, field=field, **filters),
555
- np.isin(data[component][ref_object_id_field], valid_ids),
556
- )
557
-
558
- valid = {_nan_type(component, field)}
559
- for enum_type in enums:
560
- valid.update(list(enum_type))
561
-
562
- invalid = np.isin(data[component][field][mask], np.array(list(valid), dtype=np.int8), invert=True)
563
- if invalid.any():
564
- ids = data[component]["id"][mask][invalid].flatten().tolist()
565
- return [InvalidAssociatedEnumValueError(component, [field, ref_object_id_field], ids, enum)]
566
- return []
567
-
568
-
569
- def all_valid_ids(
570
- data: SingleDataset,
571
- component: ComponentType,
572
- field: str,
573
- ref_components: ComponentType | list[ComponentType],
574
- **filters: Any,
575
- ) -> list[InvalidIdError]:
576
- """
577
- For a column which should contain object identifiers (ids), check if the id exists in the data, for a specific set
578
- of reference component types. E.g. is the from_node field of each line referring to an existing node id?
579
-
580
- Args:
581
- data: The input/update data set for all components
582
- component: The component of interest
583
- field: The field of interest
584
- ref_components: The component or components in which we want to look for ids
585
- **filters: One or more filters on the dataset. E.g. measured_terminal_type=MeasuredTerminalType.source.
586
-
587
- Returns:
588
- A list containing zero or one InvalidIdError, listing all ids where the value in the field of interest
589
- was not a valid object identifier.
590
- """
591
- valid_ids = _get_valid_ids(data=data, ref_components=ref_components)
592
- mask = _get_mask(data=data, component=component, field=field, **filters)
593
-
594
- # Find any values that can't be found in the set of ids
595
- invalid = np.logical_and(mask, np.isin(data[component][field], valid_ids, invert=True))
596
- if invalid.any():
597
- ids = data[component]["id"][invalid].flatten().tolist()
598
- return [InvalidIdError(component, field, ids, ref_components, filters)]
599
- return []
600
-
601
-
602
- def all_boolean(data: SingleDataset, component: ComponentType, field: str) -> list[NotBooleanError]:
603
- """
604
- Check that for all records of a particular type of component, the values in the 'field' column are valid boolean
605
- values, i.e. 0 or 1. Returns an empty list on success, or a list containing a single error object on failure.
606
-
607
- Args:
608
- data: The input/update data set for all components
609
- component: The component of interest
610
- field: The field of interest
611
-
612
- Returns:
613
- A list containing zero or one NotBooleanError, listing all ids where the value in the field of interest was not
614
- a valid boolean value.
615
- """
616
- invalid = np.isin(data[component][field], [0, 1], invert=True)
617
- if invalid.any():
618
- ids = data[component]["id"][invalid].flatten().tolist()
619
- return [NotBooleanError(component, field, ids)]
620
- return []
621
-
622
-
623
- def all_not_two_values_zero(
624
- data: SingleDataset, component: ComponentType, field_1: str, field_2: str
625
- ) -> list[TwoValuesZeroError]:
626
- """
627
- Check that for all records of a particular type of component, the values in the 'field_1' and 'field_2' column are
628
- not both zero. Returns an empty list on success, or a list containing a single error object on failure.
629
-
630
- Args:
631
- data: The input/update data set for all components
632
- component: The component of interest
633
- field_1: The first field of interest
634
- field_2: The second field of interest
635
- Returns:
636
- A list containing zero or one TwoValuesZeroError, listing all ids where the value in the two fields of interest
637
- were both zero.
638
- """
639
- invalid = np.logical_and(np.equal(data[component][field_1], 0.0), np.equal(data[component][field_2], 0.0))
640
- if invalid.any():
641
- if invalid.ndim > 1:
642
- invalid = invalid.any(axis=1)
643
- ids = data[component]["id"][invalid].flatten().tolist()
644
- return [TwoValuesZeroError(component, [field_1, field_2], ids)]
645
- return []
646
-
647
-
648
- def all_not_two_values_equal(
649
- data: SingleDataset, component: ComponentType, field_1: str, field_2: str
650
- ) -> list[SameValueError]:
651
- """
652
- Check that for all records of a particular type of component, the values in the 'field_1' and 'field_2' column are
653
- not both the same value. E.g. from_node and to_node of a line. Returns an empty list on success, or a list
654
- containing a single error object on failure.
655
-
656
- Args:
657
- data: The input/update data set for all components
658
- component: The component of interest
659
- field_1: The first field of interest
660
- field_2: The second field of interest
661
- Returns:
662
- A list containing zero or one SameValueError, listing all ids where the value in the two fields of interest
663
- were both the same.
664
- """
665
- invalid = np.equal(data[component][field_1], data[component][field_2])
666
- if invalid.any():
667
- if invalid.ndim > 1:
668
- invalid = invalid.any(axis=1)
669
- ids = data[component]["id"][invalid].flatten().tolist()
670
- return [SameValueError(component, [field_1, field_2], ids)]
671
- return []
672
-
673
-
674
- def ids_valid_in_update_data_set(
675
- update_data: SingleDataset, ref_data: SingleDataset, component: ComponentType, ref_name: str
676
- ) -> list[IdNotInDatasetError | InvalidIdError]:
677
- """
678
- Check that for all records of a particular type of component, whether the ids:
679
- - exist and match those in the reference data set
680
- - are not present but qualifies for optional id
681
-
682
- Args:
683
- update_data: The update data set for all components
684
- ref_data: The reference (input) data set for all components
685
- component: The component of interest
686
- ref_name: The name of the reference data set, e.g. 'update_data'
687
- Returns:
688
- A list containing zero or one IdNotInDatasetError, listing all ids of the objects in the data set which do not
689
- exist in the reference data set.
690
- """
691
- component_data = update_data[component]
692
- component_ref_data = ref_data[component]
693
- if component_ref_data["id"].size == 0:
694
- return [InvalidIdError(component=component, field="id", ids=None)]
695
- id_field_is_nan = np.array(is_nan_or_default(component_data["id"]))
696
- # check whether id qualify for optional
697
- if component_data["id"].size == 0 or np.all(id_field_is_nan):
698
- # check if the dimension of the component_data is the same as the component_ref_data
699
- if get_comp_size(component_data) != get_comp_size(component_ref_data):
700
- return [InvalidIdError(component=component, field="id", ids=None)]
701
- return [] # supported optional id
702
-
703
- if np.all(id_field_is_nan) and not np.all(~id_field_is_nan):
704
- return [InvalidIdError(component=component, field="id", ids=None)]
705
-
706
- # normal check: exist and match with input
707
- invalid = np.isin(component_data["id"], component_ref_data["id"], invert=True)
708
- if invalid.any():
709
- ids = component_data["id"][invalid].flatten().tolist()
710
- return [IdNotInDatasetError(component, ids, ref_name)]
711
- return []
712
-
713
-
714
- def all_finite(data: SingleDataset, exceptions: dict[ComponentType, list[str]] | None = None) -> list[InfinityError]:
715
- """
716
- Check that for all records in all component, the values in all columns are finite value, i.e. float values other
717
- than inf, or -inf. Nan values are ignored, as in all other comparison functions. You can use non_missing() to
718
- check for missing/nan values. Returns an empty list on success, or a list containing an error object for each
719
- component/field combination where.
720
-
721
- Args:
722
- data: The input/update data set for all components
723
- exceptions:
724
- A dictionary of fields per component type for which infinite values are supported. Defaults to empty.
725
-
726
- Returns:
727
- A list containing zero or one NotBooleanError, listing all ids where the value in the field of interest was not
728
- a valid boolean value.
729
- """
730
- errors = []
731
- for component, array in data.items():
732
- if not isinstance(array, np.ndarray):
733
- raise NotImplementedError() # TODO(mgovers): add support for columnar data
734
-
735
- for field, (dtype, _) in array.dtype.fields.items():
736
- if not np.issubdtype(dtype, np.floating):
737
- continue
738
-
739
- if exceptions and field in exceptions.get(component, []):
740
- continue
741
-
742
- invalid = np.isinf(array[field])
743
- if invalid.any():
744
- ids = data[component]["id"][invalid].flatten().tolist()
745
- errors.append(InfinityError(component, field, ids))
746
- return errors
747
-
748
-
749
- def none_missing(data: SingleDataset, component: ComponentType, fields: str | list[str]) -> list[MissingValueError]:
750
- """
751
- Check that for all records of a particular type of component, the values in the 'fields' columns are not NaN.
752
- Returns an empty list on success, or a list containing a single error object on failure.
753
-
754
- Args:
755
- data: The input/update data set for all components
756
- component: The component of interest
757
- fields: The fields of interest
758
-
759
- Returns:
760
- A list containing zero or more MissingValueError; one for each field, listing all ids where the value in the
761
- field was NaN.
762
- """
763
- errors = []
764
- if isinstance(fields, str):
765
- fields = [fields]
766
- for field in fields:
767
- nan = _nan_type(component, field)
768
- if np.isnan(nan):
769
- invalid = np.isnan(data[component][field])
770
- else:
771
- invalid = np.equal(data[component][field], nan)
772
-
773
- if invalid.any():
774
- # handle both symmetric and asymmetric values
775
- invalid = np.any(invalid, axis=tuple(range(1, invalid.ndim)))
776
- ids = data[component]["id"][invalid].flatten().tolist()
777
- errors.append(MissingValueError(component, field, ids))
778
- return errors
779
-
780
-
781
- def valid_p_q_sigma(data: SingleDataset, component: ComponentType) -> list[PQSigmaPairError]:
782
- """
783
- Check validity of the pair `(p_sigma, q_sigma)` for 'sym_power_sensor' and 'asym_power_sensor'.
784
-
785
- Args:
786
- data: The input/update data set for all components
787
- component: The component of interest, in this case only 'sym_power_sensor' or 'asym_power_sensor'
788
-
789
- Returns:
790
- A list containing zero or one PQSigmaPairError, listing the p_sigma and q_sigma mismatch.
791
- Note that with asymetric power sensors, partial assignment of p_sigma and q_sigma is also considered mismatch.
792
- """
793
- errors = []
794
- p_sigma = data[component]["p_sigma"]
795
- q_sigma = data[component]["q_sigma"]
796
- p_nan = np.isnan(p_sigma)
797
- q_nan = np.isnan(q_sigma)
798
- p_inf = np.isinf(p_sigma)
799
- q_inf = np.isinf(q_sigma)
800
- mis_match = p_nan != q_nan
801
- mis_match |= np.logical_xor(p_inf, q_inf) # infinite sigmas are supported if they are both infinite
802
- if p_sigma.ndim > 1: # if component == 'asym_power_sensor':
803
- mis_match = mis_match.any(axis=-1)
804
- mis_match |= np.logical_xor(p_nan.any(axis=-1), p_nan.all(axis=-1))
805
- mis_match |= np.logical_xor(q_nan.any(axis=-1), q_nan.all(axis=-1))
806
- mis_match |= np.logical_xor(p_inf.any(axis=-1), p_inf.all(axis=-1))
807
- mis_match |= np.logical_xor(q_inf.any(axis=-1), q_inf.all(axis=-1))
808
-
809
- if mis_match.any():
810
- ids = data[component]["id"][mis_match].flatten().tolist()
811
- errors.append(PQSigmaPairError(component, ["p_sigma", "q_sigma"], ids))
812
- return errors
813
-
814
-
815
- def all_valid_clocks(
816
- data: SingleDataset, component: ComponentType, clock_field: str, winding_from_field: str, winding_to_field: str
817
- ) -> list[TransformerClockError]:
818
- """
819
- Custom validation rule: Odd clock number is only allowed for Dy(n) or Y(N)d configuration.
820
-
821
- Args:
822
- data: The input/update data set for all components
823
- component: The component of interest
824
- clock_field: The clock field
825
- winding_from_field: The winding from field
826
- winding_to_field: The winding to field
827
-
828
- Returns:
829
- A list containing zero or more TransformerClockErrors; listing all the ids of transformers where the clock was
830
- invalid, given the winding type.
831
- """
832
-
833
- clk = data[component][clock_field]
834
- wfr = data[component][winding_from_field]
835
- wto = data[component][winding_to_field]
836
- wfr_is_wye = np.isin(wfr, [WindingType.wye, WindingType.wye_n])
837
- wto_is_wye = np.isin(wto, [WindingType.wye, WindingType.wye_n])
838
- odd = clk % 2 == 1
839
- # even number is not possible if one side is wye winding and the other side is not wye winding.
840
- # odd number is not possible, if both sides are wye winding or both sides are not wye winding.
841
- err = (~odd & (wfr_is_wye != wto_is_wye)) | (odd & (wfr_is_wye == wto_is_wye))
842
- if err.any():
843
- return [
844
- TransformerClockError(
845
- component=component,
846
- fields=[clock_field, winding_from_field, winding_to_field],
847
- ids=data[component]["id"][err].flatten().tolist(),
848
- )
849
- ]
850
- return []
851
-
852
-
853
- def all_valid_fault_phases(
854
- data: SingleDataset, component: ComponentType, fault_type_field: str, fault_phase_field: str
855
- ) -> list[FaultPhaseError]:
856
- """
857
- Custom validation rule: Only a subset of fault_phases is supported for each fault type.
858
-
859
- Args:
860
- data (SingleDataset): The input/update data set for all components
861
- component (str): The component of interest
862
- fault_type_field (str): The fault type field
863
- fault_phase_field (str): The fault phase field
864
-
865
- Returns:
866
- A list containing zero or more FaultPhaseErrors; listing all the ids of faults where the fault phase was
867
- invalid, given the fault phase.
868
- """
869
- fault_types = data[component][fault_type_field]
870
- fault_phases = data[component][fault_phase_field]
871
-
872
- supported_combinations: dict[FaultType, list[FaultPhase]] = {
873
- FaultType.three_phase: [FaultPhase.abc, FaultPhase.default_value, FaultPhase.nan],
874
- FaultType.single_phase_to_ground: [
875
- FaultPhase.a,
876
- FaultPhase.b,
877
- FaultPhase.c,
878
- FaultPhase.default_value,
879
- FaultPhase.nan,
880
- ],
881
- FaultType.two_phase: [FaultPhase.ab, FaultPhase.ac, FaultPhase.bc, FaultPhase.default_value, FaultPhase.nan],
882
- FaultType.two_phase_to_ground: [
883
- FaultPhase.ab,
884
- FaultPhase.ac,
885
- FaultPhase.bc,
886
- FaultPhase.default_value,
887
- FaultPhase.nan,
888
- ],
889
- FaultType.nan: [],
890
- }
891
-
892
- def _fault_phase_supported(fault_type: FaultType, fault_phase: FaultPhase):
893
- return fault_phase not in supported_combinations.get(fault_type, [])
894
-
895
- err = np.vectorize(_fault_phase_supported)(fault_type=fault_types, fault_phase=fault_phases)
896
- if err.any():
897
- return [
898
- FaultPhaseError(
899
- component=component,
900
- fields=[fault_type_field, fault_phase_field],
901
- ids=data[component]["id"][err].flatten().tolist(),
902
- )
903
- ]
904
- return []
1
+ # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ """
6
+ This module contains a set of comparison rules. They all share the same (or similar) logic and interface.
7
+
8
+ In general each function checks the values in a single 'column' (i.e. field) of a numpy structured array and it
9
+ returns an error object containing the component, the field and the ids of the records that did not match the rule.
10
+ E.g. all_greater_than_zero(data, 'node', 'u_rated') returns a NotGreaterThanError if any of the node's `u_rated`
11
+ values are 0 or less.
12
+
13
+ In general, the rules are designed to ignore NaN values, except for none_missing() which explicitly checks for NaN
14
+ values in the entire data set. It is important to understand that np.less_equal(x) yields different results than
15
+ np.logical_not(np.greater(x)) as a NaN comparison always results in False. The most extreme example is that even
16
+ np.nan == np.nan yields False.
17
+
18
+ np.less_equal( [0.1, 0.2, 0.3, np.nan], 0.0) = [False, False, False, False] -> OK
19
+ np.logical_not(np.greater([0.1, 0.2, 0.3, np.nan], 0.0)) = [False, False, False, True] -> Error (false positive)
20
+
21
+ Input data:
22
+
23
+ data: SingleDataset
24
+ The entire input/update data set
25
+
26
+ component: ComponentType
27
+ The name of the component, which should be an existing key in the data
28
+
29
+ field: str
30
+ The name of the column, which should be an field in the component data (numpy structured array)
31
+
32
+ Output data:
33
+ errors: list[ValidationError]
34
+ A list containing errors; in case of success, `errors` is the empty list: [].
35
+
36
+ """
37
+
38
+ from collections.abc import Callable
39
+ from enum import Enum
40
+ from typing import Any
41
+
42
+ import numpy as np
43
+
44
+ from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
45
+ from power_grid_model._core.enum import AngleMeasurementType, FaultPhase, FaultType, WindingType
46
+ from power_grid_model._core.utils import get_comp_size, is_nan_or_default
47
+ from power_grid_model.data_types import SingleDataset
48
+ from power_grid_model.validation.errors import (
49
+ ComparisonError,
50
+ FaultPhaseError,
51
+ IdNotInDatasetError,
52
+ InfinityError,
53
+ InvalidAssociatedEnumValueError,
54
+ InvalidEnumValueError,
55
+ InvalidIdError,
56
+ MissingValueError,
57
+ MissingVoltageAngleMeasurementError,
58
+ MixedCurrentAngleMeasurementTypeError,
59
+ MixedPowerCurrentSensorError,
60
+ MultiComponentNotUniqueError,
61
+ MultiFieldValidationError,
62
+ NotBetweenError,
63
+ NotBetweenOrAtError,
64
+ NotBooleanError,
65
+ NotGreaterOrEqualError,
66
+ NotGreaterThanError,
67
+ NotIdenticalError,
68
+ NotLessOrEqualError,
69
+ NotLessThanError,
70
+ NotUniqueError,
71
+ PQSigmaPairError,
72
+ SameValueError,
73
+ TransformerClockError,
74
+ TwoValuesZeroError,
75
+ UnsupportedMeasuredTerminalType,
76
+ )
77
+ from power_grid_model.validation.utils import _eval_expression, _get_mask, _get_valid_ids, _nan_type, _set_default_value
78
+
79
+
80
+ def all_greater_than_zero(data: SingleDataset, component: ComponentType, field: str) -> list[NotGreaterThanError]:
81
+ """
82
+ Check that for all records of a particular type of component, the values in the 'field' column are greater than
83
+ zero. Returns an empty list on success, or a list containing a single error object on failure.
84
+
85
+ Args:
86
+ data (SingleDataset): The input/update data set for all components
87
+ component (ComponentType): The component of interest
88
+ field (str): The field of interest
89
+
90
+ Returns:
91
+ A list containing zero or one NotGreaterThanErrors, listing all ids where the value in the field of interest
92
+ was zero or less.
93
+ """
94
+ return all_greater_than(data, component, field, 0.0)
95
+
96
+
97
+ def all_greater_than_or_equal_to_zero(
98
+ data: SingleDataset,
99
+ component: ComponentType,
100
+ field: str,
101
+ default_value: np.ndarray | int | float | None = None,
102
+ ) -> list[NotGreaterOrEqualError]:
103
+ """
104
+ Check that for all records of a particular type of component, the values in the 'field' column are greater than,
105
+ or equal to zero. Returns an empty list on success, or a list containing a single error object on failure.
106
+
107
+ Args:
108
+ data (SingleDataset): The input/update data set for all components
109
+ component (ComponentType) The component of interest
110
+ field (str): The field of interest
111
+ default_value (np.ndarray | int | float | None, optional): Some values are not required, but will
112
+ receive a default value in the C++ core. To do a proper input validation, these default values should be
113
+ included in the validation. It can be a fixed value for the entire column (int/float) or be different for
114
+ each element (np.ndarray).
115
+
116
+ Returns:
117
+ A list containing zero or one NotGreaterOrEqualErrors, listing all ids where the value in the field of
118
+ interest was less than zero.
119
+ """
120
+ return all_greater_or_equal(data, component, field, 0.0, default_value)
121
+
122
+
123
+ def all_greater_than(
124
+ data: SingleDataset, component: ComponentType, field: str, ref_value: int | float | str
125
+ ) -> list[NotGreaterThanError]:
126
+ """
127
+ Check that for all records of a particular type of component, the values in the 'field' column are greater than
128
+ the reference value. Returns an empty list on success, or a list containing a single error object on failure.
129
+
130
+ Args:
131
+ data: The input/update data set for all components
132
+ component: The component of interest
133
+ field: The field of interest
134
+ ref_value: The reference value against which all values in the 'field' column are compared. If the reference
135
+ value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a ratio between
136
+ two fields (e.g. 'field_x / field_y')
137
+
138
+ Returns:
139
+ A list containing zero or one NotGreaterThanErrors, listing all ids where the value in the field of interest
140
+ was less than, or equal to, the ref_value.
141
+ """
142
+
143
+ def not_greater(val: np.ndarray, *ref: np.ndarray):
144
+ return np.less_equal(val, *ref)
145
+
146
+ return none_match_comparison(data, component, field, not_greater, ref_value, NotGreaterThanError)
147
+
148
+
149
+ def all_greater_or_equal(
150
+ data: SingleDataset,
151
+ component: ComponentType,
152
+ field: str,
153
+ ref_value: int | float | str,
154
+ default_value: np.ndarray | int | float | None = None,
155
+ ) -> list[NotGreaterOrEqualError]:
156
+ """
157
+ Check that for all records of a particular type of component, the values in the 'field' column are greater than,
158
+ or equal to the reference value. Returns an empty list on success, or a list containing a single error object on
159
+ failure.
160
+
161
+ Args:
162
+ data: The input/update data set for all components
163
+ component: The component of interest
164
+ field: The field of interest
165
+ ref_value: The reference value against which all values in the 'field' column are compared. If the reference
166
+ value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a ratio between
167
+ two fields (e.g. 'field_x / field_y')
168
+ default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
169
+ input validation, these default values should be included in the validation. It can be a fixed value for the
170
+ entire column (int/float) or be different for each element (np.ndarray).
171
+
172
+ Returns:
173
+ A list containing zero or one NotGreaterOrEqualErrors, listing all ids where the value in the field of
174
+ interest was less than the ref_value.
175
+
176
+ """
177
+
178
+ def not_greater_or_equal(val: np.ndarray, *ref: np.ndarray):
179
+ return np.less(val, *ref)
180
+
181
+ return none_match_comparison(
182
+ data, component, field, not_greater_or_equal, ref_value, NotGreaterOrEqualError, default_value
183
+ )
184
+
185
+
186
+ def all_less_than(
187
+ data: SingleDataset, component: ComponentType, field: str, ref_value: int | float | str
188
+ ) -> list[NotLessThanError]:
189
+ """
190
+ Check that for all records of a particular type of component, the values in the 'field' column are less than the
191
+ reference value. Returns an empty list on success, or a list containing a single error object on failure.
192
+
193
+ Args:
194
+ data: The input/update data set for all components
195
+ component: The component of interest
196
+ field: The field of interest
197
+ ref_value: The reference value against which all values in the 'field' column are compared. If the reference
198
+ value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a ratio between
199
+ two fields (e.g. 'field_x / field_y')
200
+
201
+ Returns:
202
+ A list containing zero or one NotLessThanErrors, listing all ids where the value in the field of interest was
203
+ greater than, or equal to, the ref_value.
204
+ """
205
+
206
+ def not_less(val: np.ndarray, *ref: np.ndarray):
207
+ return np.greater_equal(val, *ref)
208
+
209
+ return none_match_comparison(data, component, field, not_less, ref_value, NotLessThanError)
210
+
211
+
212
+ def all_less_or_equal(
213
+ data: SingleDataset, component: ComponentType, field: str, ref_value: int | float | str
214
+ ) -> list[NotLessOrEqualError]:
215
+ """
216
+ Check that for all records of a particular type of component, the values in the 'field' column are less than,
217
+ or equal to the reference value. Returns an empty list on success, or a list containing a single error object on
218
+ failure.
219
+
220
+ Args:
221
+ data: The input/update data set for all components
222
+ component: The component of interest
223
+ field: The field of interest
224
+ ref_value: The reference value against which all values in the 'field' column are compared. If the reference
225
+ value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a ratio between
226
+ two fields (e.g. 'field_x / field_y')
227
+
228
+ Returns:
229
+ A list containing zero or one NotLessOrEqualErrors, listing all ids where the value in the field of interest was
230
+ greater than the ref_value.
231
+
232
+ """
233
+
234
+ def not_less_or_equal(val: np.ndarray, *ref: np.ndarray):
235
+ return np.greater(val, *ref)
236
+
237
+ return none_match_comparison(data, component, field, not_less_or_equal, ref_value, NotLessOrEqualError)
238
+
239
+
240
+ def all_between( # noqa: PLR0913
241
+ data: SingleDataset,
242
+ component: ComponentType,
243
+ field: str,
244
+ ref_value_1: int | float | str,
245
+ ref_value_2: int | float | str,
246
+ default_value: np.ndarray | int | float | None = None,
247
+ ) -> list[NotBetweenError]:
248
+ """
249
+ Check that for all records of a particular type of component, the values in the 'field' column are (exclusively)
250
+ between reference value 1 and 2. Value 1 may be smaller, but also larger than value 2. Returns an empty list on
251
+ success, or a list containing a single error object on failure.
252
+
253
+ Args:
254
+ data: The input/update data set for all components
255
+ component: The component of interest
256
+ field: The field of interest
257
+ ref_value_1: The first reference value against which all values in the 'field' column are compared. If the
258
+ reference value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a
259
+ ratio between two fields (e.g. 'field_x / field_y')
260
+ ref_value_2: The second reference value against which all values in the 'field' column are compared. If the
261
+ reference value is a string, it is assumed to be another field (e.g. 'field_x') of the same component,
262
+ or a ratio between two fields (e.g. 'field_x / field_y')
263
+ default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
264
+ input validation, these default values should be included in the validation. It can be a fixed value for the
265
+ entire column (int/float) or be different for each element (np.ndarray).
266
+
267
+ Returns:
268
+ A list containing zero or one NotBetweenErrors, listing all ids where the value in the field of interest was
269
+ outside the range defined by the reference values.
270
+ """
271
+
272
+ def outside(val: np.ndarray, *ref: np.ndarray) -> np.ndarray:
273
+ return np.logical_or(np.less_equal(val, np.minimum(*ref)), np.greater_equal(val, np.maximum(*ref)))
274
+
275
+ return none_match_comparison(
276
+ data, component, field, outside, (ref_value_1, ref_value_2), NotBetweenError, default_value
277
+ )
278
+
279
+
280
+ def all_between_or_at( # noqa: PLR0913
281
+ data: SingleDataset,
282
+ component: ComponentType,
283
+ field: str,
284
+ ref_value_1: int | float | str,
285
+ ref_value_2: int | float | str,
286
+ default_value_1: np.ndarray | int | float | None = None,
287
+ default_value_2: np.ndarray | int | float | None = None,
288
+ ) -> list[NotBetweenOrAtError]:
289
+ """
290
+ Check that for all records of a particular type of component, the values in the 'field' column are inclusively
291
+ between reference value 1 and 2. Value 1 may be smaller, but also larger than value 2. Returns an empty list on
292
+ success, or a list containing a single error object on failure.
293
+
294
+ Args:
295
+ data: The input/update data set for all components
296
+ component: The component of interest
297
+ field: The field of interest
298
+ ref_value_1: The first reference value against which all values in the 'field' column are compared. If the
299
+ reference value is a string, it is assumed to be another field (e.g. 'field_x') of the same component, or a
300
+ ratio between two fields (e.g. 'field_x / field_y')
301
+ ref_value_2: The second reference value against which all values in the 'field' column are compared. If the
302
+ reference value is a string, it is assumed to be another field (e.g. 'field_x') of the same component,
303
+ or a ratio between two fields (e.g. 'field_x / field_y')
304
+ default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
305
+ input validation, these default values should be included in the validation. It can be a fixed value for the
306
+ entire column (int/float) or be different for each element (np.ndarray).
307
+ default_value_2: Some values can have a double default: the default will be set to another attribute of the
308
+ component, but if that attribute is missing, the default will be set to a fixed value.
309
+
310
+ Returns:
311
+ A list containing zero or one NotBetweenOrAtErrors, listing all ids where the value in the field of interest was
312
+ outside the range defined by the reference values.
313
+ """
314
+
315
+ def outside(val: np.ndarray, *ref: np.ndarray) -> np.ndarray:
316
+ return np.logical_or(np.less(val, np.minimum(*ref)), np.greater(val, np.maximum(*ref)))
317
+
318
+ return none_match_comparison(
319
+ data,
320
+ component,
321
+ field,
322
+ outside,
323
+ (ref_value_1, ref_value_2),
324
+ NotBetweenOrAtError,
325
+ default_value_1,
326
+ default_value_2,
327
+ )
328
+
329
+
330
+ def none_match_comparison[ErrorType: ComparisonError]( # noqa: PLR0913
331
+ data: SingleDataset,
332
+ component: ComponentType,
333
+ field: str,
334
+ compare_fn: Callable,
335
+ ref_value: ComparisonError.RefType,
336
+ error: type[ErrorType] = ComparisonError, # type: ignore
337
+ default_value_1: np.ndarray | int | float | None = None,
338
+ default_value_2: np.ndarray | int | float | None = None,
339
+ ) -> list[ErrorType]:
340
+ """
341
+ For all records of a particular type of component, check if the value in the 'field' column match the comparison.
342
+ Returns an empty list if none of the value match the comparison, or a list containing a single error object when at
343
+ the value in 'field' of at least one record matches the comparison.
344
+
345
+ Args:
346
+ data: The input/update data set for all components
347
+ component: The component of interest
348
+ field: The field of interest
349
+ compare_fn: A function that takes the data in the 'field' column, and any number of reference values
350
+ ref_value: A reference value, or a tuple of reference values, against which all values in the 'field' column
351
+ are compared using the compare_fn. If a reference value is a string, it is assumed to be another field
352
+ (e.g. 'field_x') of the same component, or a ratio between two fields (e.g. 'field_x / field_y')
353
+ error: The type (class) of error that should be returned in case any of the values match the comparison.
354
+ default_value: Some values are not required, but will receive a default value in the C++ core. To do a proper
355
+ input validation, these default values should be included in the validation. It can be a fixed value for the
356
+ entire column (int/float) or be different for each element (np.ndarray).
357
+ default_value_2: Some values can have a double default: the default will be set to another attribute of the
358
+ component, but if that attribute is missing, the default will be set to a fixed value.
359
+
360
+ Returns:
361
+ A list containing zero or one comparison errors (should be a subclass of ComparisonError), listing all ids
362
+ where the value in the field of interest matched the comparison.
363
+ """
364
+ if default_value_1 is not None:
365
+ _set_default_value(data=data, component=component, field=field, default_value=default_value_1)
366
+ if default_value_2 is not None:
367
+ _set_default_value(data=data, component=component, field=field, default_value=default_value_2)
368
+ component_data = data[component]
369
+ if not isinstance(component_data, np.ndarray):
370
+ raise NotImplementedError # TODO(mgovers): add support for columnar data
371
+
372
+ if isinstance(ref_value, tuple):
373
+ ref = tuple(_eval_expression(component_data, v) for v in ref_value)
374
+ else:
375
+ ref = (_eval_expression(component_data, ref_value),)
376
+ matches = compare_fn(component_data[field], *ref)
377
+ if matches.any():
378
+ if matches.ndim > 1:
379
+ matches = matches.any(axis=1)
380
+ ids = component_data["id"][matches].flatten().tolist()
381
+ return [error(component, field, ids, ref_value)]
382
+ return []
383
+
384
+
385
+ def all_identical(data: SingleDataset, component: ComponentType, field: str) -> list[NotIdenticalError]:
386
+ """
387
+ Check that for all records of a particular type of component, the values in the 'field' column are identical.
388
+
389
+ Args:
390
+ data (SingleDataset): The input/update data set for all components
391
+ component (ComponentType): The component of interest
392
+ field (str): The field of interest
393
+
394
+ Returns:
395
+ A list containing zero or one NotIdenticalError, listing all ids of that component if the value in the field
396
+ of interest was not identical across all components, all values for those ids, the set of unique values in
397
+ that field and the number of unique values in that field.
398
+ """
399
+ field_data = data[component][field]
400
+ if len(field_data) > 0:
401
+ first = field_data[0]
402
+ if np.any(field_data != first):
403
+ return [NotIdenticalError(component, field, data[component]["id"], list(field_data))]
404
+
405
+ return []
406
+
407
+
408
+ def all_enabled_identical(
409
+ data: SingleDataset, component: ComponentType, field: str, status_field: str
410
+ ) -> list[NotIdenticalError]:
411
+ """
412
+ Check that for all records of a particular type of component, the values in the 'field' column are identical.
413
+ Only entries are checked where the 'status' field is not 0.
414
+
415
+ Args:
416
+ data (SingleDataset): The input/update data set for all components
417
+ component (ComponentType): The component of interest
418
+ field (str): The field of interest
419
+ status_field (str): The status field based on which to decide whether a component is enabled
420
+
421
+ Returns:
422
+ A list containing zero or one NotIdenticalError, listing:
423
+
424
+ - all ids of enabled components if the value in the field of interest was not identical across all enabled
425
+ components
426
+ - all values of the 'field' column for enabled components (including duplications)
427
+ - the set of unique such values
428
+ - the amount of unique such values.
429
+ """
430
+ return all_identical(
431
+ {key: (value if key is not component else value[value[status_field] != 0]) for key, value in data.items()},
432
+ component,
433
+ field,
434
+ )
435
+
436
+
437
+ def all_unique(data: SingleDataset, component: ComponentType, field: str) -> list[NotUniqueError]:
438
+ """
439
+ Check that for all records of a particular type of component, the values in the 'field' column are unique within
440
+ the 'field' column of that component.
441
+
442
+ Args:
443
+ data (SingleDataset): The input/update data set for all components
444
+ component (ComponentType): The component of interest
445
+ field (str): The field of interest
446
+
447
+ Returns:
448
+ A list containing zero or one NotUniqueError, listing all ids where the value in the field of interest was
449
+ not unique. If the field name was 'id' (a very common check), the id is added as many times as it occurred in
450
+ the 'id' column, to maintain object counts.
451
+ """
452
+ field_data = data[component][field]
453
+ _, inverse, counts = np.unique(field_data, return_inverse=True, return_counts=True)
454
+ if any(counts != 1):
455
+ ids = data[component]["id"][(counts != 1)[inverse]].flatten().tolist()
456
+ return [NotUniqueError(component, field, ids)]
457
+ return []
458
+
459
+
460
+ def all_cross_unique(
461
+ data: SingleDataset, fields: list[tuple[ComponentType, str]], cross_only=True
462
+ ) -> list[MultiComponentNotUniqueError]:
463
+ """
464
+ Check that for all records of a particular type of component, the values in the 'field' column are unique within
465
+ the 'field' column of that component.
466
+
467
+ Args:
468
+ data (SingleDataset): The input/update data set for all components
469
+ fields (list[tuple[str, str]]): The fields of interest, formatted as
470
+ [(component_1, field_1), (component_2, field_2)]
471
+ cross_only (bool, optional): Do not include duplicates within a single field. It is advised that you use
472
+ all_unique() to explicitly check uniqueness within a single field.
473
+
474
+ Returns:
475
+ A list containing zero or one MultiComponentNotUniqueError, listing all fields and ids where the value was not
476
+ unique between the fields.
477
+ """
478
+ all_values: dict[int, list[tuple[tuple[ComponentType, str], int]]] = {}
479
+ duplicate_ids = set()
480
+ for component, field in fields:
481
+ for obj_id, value in zip(data[component]["id"], data[component][field]):
482
+ component_id = ((component, field), obj_id)
483
+ if value not in all_values:
484
+ all_values[value] = []
485
+ elif not cross_only or not all(f == (component, field) for f, _ in all_values[value]):
486
+ duplicate_ids.update(all_values[value])
487
+ duplicate_ids.add(component_id)
488
+ all_values[value].append(component_id)
489
+ if duplicate_ids:
490
+ fields_with_duplicated_ids = {f for f, _ in duplicate_ids}
491
+ ids_with_duplicated_ids = {(c, i) for (c, _), i in duplicate_ids}
492
+ return [MultiComponentNotUniqueError(list(fields_with_duplicated_ids), list(ids_with_duplicated_ids))]
493
+ return []
494
+
495
+
496
+ def all_in_valid_values(
497
+ data: SingleDataset, component: ComponentType, field: str, values: list
498
+ ) -> list[UnsupportedMeasuredTerminalType]:
499
+ """
500
+ Check that for all records of a particular type of component, the values in the 'field' column are valid values for
501
+ the supplied enum class. Returns an empty list on success, or a list containing a single error object on failure.
502
+
503
+ Args:
504
+ data (SingleDataset): The input/update data set for all components
505
+ component (ComponentType): The component of interest
506
+ field (str): The field of interest
507
+ values (list | tuple): The values to validate against
508
+
509
+ Returns:
510
+ A list containing zero or one UnsupportedMeasuredTerminalType, listing all ids where the value in the field of
511
+ interest was not a valid value and the sequence of supported values.
512
+ """
513
+ valid = {_nan_type(component, field)}
514
+ valid.update(values)
515
+
516
+ invalid = np.isin(data[component][field], np.array(list(valid)), invert=True)
517
+ if invalid.any():
518
+ ids = data[component]["id"][invalid].flatten().tolist()
519
+ return [UnsupportedMeasuredTerminalType(component, field, ids, values)]
520
+ return []
521
+
522
+
523
+ def all_valid_enum_values(
524
+ data: SingleDataset, component: ComponentType, field: str, enum: type[Enum] | list[type[Enum]]
525
+ ) -> list[InvalidEnumValueError]:
526
+ """
527
+ Check that for all records of a particular type of component, the values in the 'field' column are valid values for
528
+ the supplied enum class. Returns an empty list on success, or a list containing a single error object on failure.
529
+
530
+ Args:
531
+ data (SingleDataset): The input/update data set for all components
532
+ component (ComponentType): The component of interest
533
+ field (str): The field of interest
534
+ enum (Type[Enum] | list[Type[Enum]]): The enum type to validate against, or a list of such enum types
535
+
536
+ Returns:
537
+ A list containing zero or one InvalidEnumValueError, listing all ids where the value in the field of interest
538
+ was not a valid value in the supplied enum type.
539
+ """
540
+ enums: list[type[Enum]] = enum if isinstance(enum, list) else [enum]
541
+
542
+ valid = {_nan_type(component, field)}
543
+ for enum_type in enums:
544
+ valid.update(list(enum_type))
545
+
546
+ invalid = np.isin(data[component][field], np.array(list(valid), dtype=np.int8), invert=True)
547
+ if invalid.any():
548
+ ids = data[component]["id"][invalid].flatten().tolist()
549
+ return [InvalidEnumValueError(component, field, ids, enum)]
550
+ return []
551
+
552
+
553
+ def all_valid_associated_enum_values( # noqa: PLR0913
554
+ data: SingleDataset,
555
+ component: ComponentType,
556
+ field: str,
557
+ ref_object_id_field: str,
558
+ ref_components: list[ComponentType],
559
+ enum: type[Enum] | list[type[Enum]],
560
+ **filters: Any,
561
+ ) -> list[InvalidAssociatedEnumValueError]:
562
+ """
563
+ Args:
564
+ data (SingleDataset): The input/update data set for all components
565
+ component (ComponentType): The component of interest
566
+ field (str): The field of interest
567
+ ref_object_id_field (str): The field that contains the referenced component ids
568
+ ref_components (list[ComponentType]): The component or components in which we want to look for ids
569
+ enum (Type[Enum] | list[Type[Enum]]): The enum type to validate against, or a list of such enum types
570
+ **filters: One or more filters on the dataset. E.g. regulated_object="transformer".
571
+
572
+ Returns:
573
+ A list containing zero or one InvalidAssociatedEnumValueError, listing all ids where the value in the field
574
+ of interest was not a valid value in the supplied enum type.
575
+ """
576
+ enums: list[type[Enum]] = enum if isinstance(enum, list) else [enum]
577
+
578
+ valid_ids = _get_valid_ids(data=data, ref_components=ref_components)
579
+ mask = np.logical_and(
580
+ _get_mask(data=data, component=component, field=field, **filters),
581
+ np.isin(data[component][ref_object_id_field], valid_ids),
582
+ )
583
+
584
+ valid = {_nan_type(component, field)}
585
+ for enum_type in enums:
586
+ valid.update(list(enum_type))
587
+
588
+ invalid = np.isin(data[component][field][mask], np.array(list(valid), dtype=np.int8), invert=True)
589
+ if invalid.any():
590
+ ids = data[component]["id"][mask][invalid].flatten().tolist()
591
+ return [InvalidAssociatedEnumValueError(component, [field, ref_object_id_field], ids, enum)]
592
+ return []
593
+
594
+
595
+ def all_valid_ids(
596
+ data: SingleDataset,
597
+ component: ComponentType,
598
+ field: str,
599
+ ref_components: ComponentType | list[ComponentType],
600
+ **filters: Any,
601
+ ) -> list[InvalidIdError]:
602
+ """
603
+ For a column which should contain object identifiers (ids), check if the id exists in the data, for a specific set
604
+ of reference component types. E.g. is the from_node field of each line referring to an existing node id?
605
+
606
+ Args:
607
+ data: The input/update data set for all components
608
+ component: The component of interest
609
+ field: The field of interest
610
+ ref_components: The component or components in which we want to look for ids
611
+ **filters: One or more filters on the dataset. E.g. measured_terminal_type=MeasuredTerminalType.source.
612
+
613
+ Returns:
614
+ A list containing zero or one InvalidIdError, listing all ids where the value in the field of interest
615
+ was not a valid object identifier.
616
+ """
617
+ valid_ids = _get_valid_ids(data=data, ref_components=ref_components)
618
+ mask = _get_mask(data=data, component=component, field=field, **filters)
619
+
620
+ # Find any values that can't be found in the set of ids
621
+ invalid = np.logical_and(mask, np.isin(data[component][field], valid_ids, invert=True))
622
+ if invalid.any():
623
+ ids = data[component]["id"][invalid].flatten().tolist()
624
+ return [InvalidIdError(component, field, ids, ref_components, filters)]
625
+ return []
626
+
627
+
628
+ def all_boolean(data: SingleDataset, component: ComponentType, field: str) -> list[NotBooleanError]:
629
+ """
630
+ Check that for all records of a particular type of component, the values in the 'field' column are valid boolean
631
+ values, i.e. 0 or 1. Returns an empty list on success, or a list containing a single error object on failure.
632
+
633
+ Args:
634
+ data: The input/update data set for all components
635
+ component: The component of interest
636
+ field: The field of interest
637
+
638
+ Returns:
639
+ A list containing zero or one NotBooleanError, listing all ids where the value in the field of interest was not
640
+ a valid boolean value.
641
+ """
642
+ invalid = np.isin(data[component][field], [0, 1], invert=True)
643
+ if invalid.any():
644
+ ids = data[component]["id"][invalid].flatten().tolist()
645
+ return [NotBooleanError(component, field, ids)]
646
+ return []
647
+
648
+
649
+ def all_not_two_values_zero(
650
+ data: SingleDataset, component: ComponentType, field_1: str, field_2: str
651
+ ) -> list[TwoValuesZeroError]:
652
+ """
653
+ Check that for all records of a particular type of component, the values in the 'field_1' and 'field_2' column are
654
+ not both zero. Returns an empty list on success, or a list containing a single error object on failure.
655
+
656
+ Args:
657
+ data: The input/update data set for all components
658
+ component: The component of interest
659
+ field_1: The first field of interest
660
+ field_2: The second field of interest
661
+
662
+ Returns:
663
+ A list containing zero or one TwoValuesZeroError, listing all ids where the value in the two fields of interest
664
+ were both zero.
665
+ """
666
+ invalid = np.logical_and(np.equal(data[component][field_1], 0.0), np.equal(data[component][field_2], 0.0))
667
+ if invalid.any():
668
+ if invalid.ndim > 1:
669
+ invalid = invalid.any(axis=1)
670
+ ids = data[component]["id"][invalid].flatten().tolist()
671
+ return [TwoValuesZeroError(component, [field_1, field_2], ids)]
672
+ return []
673
+
674
+
675
+ def all_not_two_values_equal(
676
+ data: SingleDataset, component: ComponentType, field_1: str, field_2: str
677
+ ) -> list[SameValueError]:
678
+ """
679
+ Check that for all records of a particular type of component, the values in the 'field_1' and 'field_2' column are
680
+ not both the same value. E.g. from_node and to_node of a line. Returns an empty list on success, or a list
681
+ containing a single error object on failure.
682
+
683
+ Args:
684
+ data: The input/update data set for all components
685
+ component: The component of interest
686
+ field_1: The first field of interest
687
+ field_2: The second field of interest
688
+
689
+ Returns:
690
+ A list containing zero or one SameValueError, listing all ids where the value in the two fields of interest
691
+ were both the same.
692
+ """
693
+ invalid = np.equal(data[component][field_1], data[component][field_2])
694
+ if invalid.any():
695
+ if invalid.ndim > 1:
696
+ invalid = invalid.any(axis=1)
697
+ ids = data[component]["id"][invalid].flatten().tolist()
698
+ return [SameValueError(component, [field_1, field_2], ids)]
699
+ return []
700
+
701
+
702
+ def ids_valid_in_update_data_set(
703
+ update_data: SingleDataset, ref_data: SingleDataset, component: ComponentType, ref_name: DatasetType
704
+ ) -> list[IdNotInDatasetError | InvalidIdError]:
705
+ """
706
+ Check that for all records of a particular type of component, whether the ids:
707
+ - exist and match those in the reference data set
708
+ - are not present but qualifies for optional id
709
+
710
+ Args:
711
+ update_data: The update data set for all components
712
+ ref_data: The reference (input) data set for all components
713
+ component: The component of interest
714
+ ref_name: The name of the reference data set type
715
+
716
+ Returns:
717
+ A list containing zero or one IdNotInDatasetError, listing all ids of the objects in the data set which do not
718
+ exist in the reference data set.
719
+ """
720
+ component_data = update_data[component]
721
+ component_ref_data = ref_data[component]
722
+ if component_ref_data["id"].size == 0:
723
+ return [InvalidIdError(component=component, field="id", ids=None)]
724
+ id_field_is_nan = np.array(is_nan_or_default(component_data["id"]))
725
+ # check whether id qualify for optional
726
+ if component_data["id"].size == 0 or np.all(id_field_is_nan):
727
+ # check if the dimension of the component_data is the same as the component_ref_data
728
+ if get_comp_size(component_data) != get_comp_size(component_ref_data):
729
+ return [InvalidIdError(component=component, field="id", ids=None)]
730
+ return [] # supported optional id
731
+
732
+ if np.all(id_field_is_nan) and not np.all(~id_field_is_nan):
733
+ return [InvalidIdError(component=component, field="id", ids=None)]
734
+
735
+ # normal check: exist and match with input
736
+ invalid = np.isin(component_data["id"], component_ref_data["id"], invert=True)
737
+ if invalid.any():
738
+ ids = component_data["id"][invalid].flatten().tolist()
739
+ return [IdNotInDatasetError(component, ids, ref_name)]
740
+ return []
741
+
742
+
743
+ def all_finite(data: SingleDataset, exceptions: dict[ComponentType, list[str]] | None = None) -> list[InfinityError]:
744
+ """
745
+ Check that for all records in all component, the values in all columns are finite value, i.e. float values other
746
+ than inf, or -inf. Nan values are ignored, as in all other comparison functions. You can use non_missing() to
747
+ check for missing/nan values. Returns an empty list on success, or a list containing an error object for each
748
+ component/field combination where.
749
+
750
+ Args:
751
+ data: The input/update data set for all components
752
+ exceptions:
753
+ A dictionary of fields per component type for which infinite values are supported. Defaults to empty.
754
+
755
+ Returns:
756
+ A list containing zero or one NotBooleanError, listing all ids where the value in the field of interest was not
757
+ a valid boolean value.
758
+ """
759
+ errors = []
760
+ for component, array in data.items():
761
+ if not isinstance(array, np.ndarray):
762
+ raise NotImplementedError # TODO(mgovers): add support for columnar data
763
+
764
+ for field, (dtype, _) in array.dtype.fields.items():
765
+ if not np.issubdtype(dtype, np.floating):
766
+ continue
767
+
768
+ if exceptions and field in exceptions.get(component, []):
769
+ continue
770
+
771
+ invalid = np.isinf(array[field])
772
+ if invalid.any():
773
+ ids = array["id"][invalid].flatten().tolist()
774
+ errors.append(InfinityError(component, field, ids))
775
+ return errors
776
+
777
+
778
+ def no_strict_subset_missing(data: SingleDataset, fields: list[str], component_type: ComponentType):
779
+ """
780
+ Helper function that generates multi field validation errors if a subset of the supplied fields is missing.
781
+ If for an instance of component type all fields are missing or all fields are not missing then,
782
+ no error is returned for that instance.
783
+ In any other case an error for that id is returned.
784
+
785
+ Args:
786
+ data: SingleDataset, pgm data
787
+ fields: List of fields
788
+ component_type: component type to check
789
+ """
790
+ errors = []
791
+ if component_type in data:
792
+ component_data = data[component_type]
793
+ instances_with_nan_data = np.full_like([], fill_value=False, shape=(len(component_data),), dtype=bool)
794
+ instances_with_non_nan_data = np.full_like([], fill_value=False, shape=(len(component_data),), dtype=bool)
795
+ for field in fields:
796
+ nan_value = _nan_type(component_type, field)
797
+ asym_axes = tuple(range(component_data.ndim, component_data[field].ndim))
798
+ instances_with_nan_data = np.logical_or(
799
+ instances_with_nan_data,
800
+ np.any(
801
+ (
802
+ np.isnan(component_data[field])
803
+ if np.any(np.isnan(nan_value))
804
+ else np.equal(component_data[field], nan_value)
805
+ ),
806
+ axis=asym_axes,
807
+ ),
808
+ )
809
+ instances_with_non_nan_data = np.logical_or(
810
+ instances_with_non_nan_data,
811
+ np.any(
812
+ (
813
+ np.logical_not(np.isnan(component_data[field]))
814
+ if np.any(np.isnan(nan_value))
815
+ else np.logical_not(np.equal(component_data[field], nan_value))
816
+ ),
817
+ axis=asym_axes,
818
+ ),
819
+ )
820
+
821
+ instances_with_invalid_data = np.logical_and(instances_with_nan_data, instances_with_non_nan_data)
822
+
823
+ ids = component_data["id"][instances_with_invalid_data]
824
+ if len(ids) > 0:
825
+ errors.append(MultiFieldValidationError(component_type, fields, ids))
826
+
827
+ return errors
828
+
829
+
830
+ def not_all_missing(data: SingleDataset, fields: list[str], component_type: ComponentType):
831
+ """
832
+ Helper function that generates a multi field validation error if:
833
+ all values specified by the fields parameters are missing.
834
+
835
+ Args:
836
+ data: SingleDataset, pgm data
837
+ fields: List of fields
838
+ component_type: component type to check
839
+ """
840
+ min_fields = 2
841
+ if len(fields) < min_fields:
842
+ raise ValueError(
843
+ "The fields parameter must contain at least 2 fields. Otherwise use the none_missing function."
844
+ )
845
+
846
+ errors = []
847
+ if component_type in data:
848
+ component_data = data[component_type]
849
+ instances_with_all_nan_data = np.full_like([], fill_value=True, shape=(len(component_data),), dtype=bool)
850
+
851
+ for field in fields:
852
+ nan_value = _nan_type(component_type, field)
853
+ asym_axes = tuple(range(component_data.ndim, component_data[field].ndim))
854
+ instances_with_all_nan_data = np.logical_and(
855
+ instances_with_all_nan_data,
856
+ np.any(
857
+ (
858
+ np.isnan(component_data[field])
859
+ if np.any(np.isnan(nan_value))
860
+ else np.equal(component_data[field], nan_value)
861
+ ),
862
+ axis=asym_axes,
863
+ ),
864
+ )
865
+
866
+ ids = component_data["id"][instances_with_all_nan_data].flatten().tolist()
867
+ if len(ids) > 0:
868
+ errors.append(MultiFieldValidationError(component_type, fields, ids))
869
+
870
+ return errors
871
+
872
+
873
+ def none_missing(data: SingleDataset, component: ComponentType, fields: str | list[str]) -> list[MissingValueError]:
874
+ """
875
+ Check that for all records of a particular type of component, the values in the 'fields' columns are not NaN.
876
+ Returns an empty list on success, or a list containing a single error object on failure.
877
+
878
+ Args:
879
+ data: The input/update data set for all components
880
+ component: The component of interest
881
+ fields: The fields of interest
882
+
883
+ Returns:
884
+ A list containing zero or more MissingValueError; one for each field, listing all ids where the value in the
885
+ field was NaN.
886
+ """
887
+ errors = []
888
+ if isinstance(fields, str):
889
+ fields = [fields]
890
+ for field in fields:
891
+ nan = _nan_type(component, field)
892
+ invalid = np.isnan(data[component][field]) if np.isnan(nan) else np.equal(data[component][field], nan)
893
+
894
+ if invalid.any():
895
+ # handle both symmetric and asymmetric values
896
+ invalid = np.any(invalid, axis=tuple(range(1, invalid.ndim)))
897
+ ids = data[component]["id"][invalid].flatten().tolist()
898
+ errors.append(MissingValueError(component, field, ids))
899
+ return errors
900
+
901
+
902
+ def valid_p_q_sigma(data: SingleDataset, component: ComponentType) -> list[PQSigmaPairError]:
903
+ """
904
+ Check validity of the pair `(p_sigma, q_sigma)` for 'sym_power_sensor' and 'asym_power_sensor'.
905
+
906
+ Args:
907
+ data: The input/update data set for all components
908
+ component: The component of interest, in this case only 'sym_power_sensor' or 'asym_power_sensor'
909
+
910
+ Returns:
911
+ A list containing zero or one PQSigmaPairError, listing the p_sigma and q_sigma mismatch.
912
+ Note that with asymetric power sensors, partial assignment of p_sigma and q_sigma is also considered mismatch.
913
+ """
914
+ errors = []
915
+ p_sigma = data[component]["p_sigma"]
916
+ q_sigma = data[component]["q_sigma"]
917
+ p_nan = np.isnan(p_sigma)
918
+ q_nan = np.isnan(q_sigma)
919
+ mis_match = p_nan != q_nan
920
+ if p_sigma.ndim > 1: # if component == 'asym_power_sensor':
921
+ mis_match = mis_match.any(axis=-1)
922
+ mis_match |= np.logical_xor(p_nan.any(axis=-1), p_nan.all(axis=-1))
923
+ mis_match |= np.logical_xor(q_nan.any(axis=-1), q_nan.all(axis=-1))
924
+
925
+ if mis_match.any():
926
+ ids = data[component]["id"][mis_match].flatten().tolist()
927
+ errors.append(PQSigmaPairError(component, ["p_sigma", "q_sigma"], ids))
928
+ return errors
929
+
930
+
931
+ def all_valid_clocks(
932
+ data: SingleDataset, component: ComponentType, clock_field: str, winding_from_field: str, winding_to_field: str
933
+ ) -> list[TransformerClockError]:
934
+ """
935
+ Custom validation rule: Odd clock number is only allowed for Dy(n) or Y(N)d configuration.
936
+
937
+ Args:
938
+ data: The input/update data set for all components
939
+ component: The component of interest
940
+ clock_field: The clock field
941
+ winding_from_field: The winding from field
942
+ winding_to_field: The winding to field
943
+
944
+ Returns:
945
+ A list containing zero or more TransformerClockErrors; listing all the ids of transformers where the clock was
946
+ invalid, given the winding type.
947
+ """
948
+
949
+ clk = data[component][clock_field]
950
+ wfr = data[component][winding_from_field]
951
+ wto = data[component][winding_to_field]
952
+ wfr_is_wye = np.isin(wfr, [WindingType.wye, WindingType.wye_n])
953
+ wto_is_wye = np.isin(wto, [WindingType.wye, WindingType.wye_n])
954
+ odd = clk % 2 == 1
955
+ # even number is not possible if one side is wye winding and the other side is not wye winding.
956
+ # odd number is not possible, if both sides are wye winding or both sides are not wye winding.
957
+ err = (~odd & (wfr_is_wye != wto_is_wye)) | (odd & (wfr_is_wye == wto_is_wye))
958
+ if err.any():
959
+ return [
960
+ TransformerClockError(
961
+ component=component,
962
+ fields=[clock_field, winding_from_field, winding_to_field],
963
+ ids=data[component]["id"][err].flatten().tolist(),
964
+ )
965
+ ]
966
+ return []
967
+
968
+
969
+ def all_same_current_angle_measurement_type_on_terminal(
970
+ data: SingleDataset,
971
+ component: ComponentType,
972
+ measured_object_field: str,
973
+ measured_terminal_type_field: str,
974
+ angle_measurement_type_field: str,
975
+ ) -> list[MixedCurrentAngleMeasurementTypeError]:
976
+ """
977
+ Custom validation rule: All current angle measurement types on a terminal must be the same.
978
+
979
+ Args:
980
+ data (SingleDataset): The input/update data set for all components
981
+ component (ComponentType): The component of interest
982
+ measured_object_field (str): The measured object field
983
+ measured_terminal_type_field (str): The terminal field
984
+ angle_measurement_type_field (str): The angle measurement type field
985
+
986
+ Returns:
987
+ A list containing zero or more MixedCurrentAngleMeasurementTypeErrors; listing all the ids of
988
+ components where the current angle measurement type was not the same for the same terminal.
989
+ """
990
+ sorted_indices = np.argsort(data[component][[measured_object_field, measured_terminal_type_field]])
991
+ sorted_values = data[component][sorted_indices]
992
+
993
+ unique_current_measurements, measurement_sorted_indices = np.unique(
994
+ sorted_values[[measured_object_field, measured_terminal_type_field, angle_measurement_type_field]],
995
+ return_inverse=True,
996
+ )
997
+ _, terminal_sorted_indices = np.unique(
998
+ unique_current_measurements[[measured_object_field, measured_terminal_type_field]], return_inverse=True
999
+ )
1000
+
1001
+ mixed_sorted_indices = np.setdiff1d(measurement_sorted_indices, terminal_sorted_indices)
1002
+ mixed_terminals = np.unique(
1003
+ sorted_values[mixed_sorted_indices][[measured_object_field, measured_terminal_type_field]]
1004
+ )
1005
+
1006
+ err = np.isin(data[component][[measured_object_field, measured_terminal_type_field]], mixed_terminals)
1007
+ if err.any():
1008
+ return [
1009
+ MixedCurrentAngleMeasurementTypeError(
1010
+ component=component,
1011
+ fields=[measured_object_field, measured_terminal_type_field, angle_measurement_type_field],
1012
+ ids=data[component]["id"][err].flatten().tolist(),
1013
+ )
1014
+ ]
1015
+ return []
1016
+
1017
+
1018
+ def all_same_sensor_type_on_same_terminal(
1019
+ data: SingleDataset,
1020
+ power_sensor_type: ComponentType,
1021
+ current_sensor_type: ComponentType,
1022
+ measured_object_field: str,
1023
+ measured_terminal_type_field: str,
1024
+ ) -> list[MixedPowerCurrentSensorError]:
1025
+ """
1026
+ Custom validation rule: All sensors on a terminal must be of the same type.
1027
+
1028
+ E.g. mixing sym_power_sensor and asym_power_sensor on the same terminal is allowed, but mixing
1029
+ sym_power_sensor and sym_current_sensor is not allowed.
1030
+
1031
+ Args:
1032
+ data (SingleDataset): The input/update data set for all components
1033
+ power_sensor_type (ComponentType): The power sensor component
1034
+ current_sensor_type (ComponentType): The current sensor component
1035
+ measured_object_field (str): The measured object field
1036
+ measured_terminal_type_field (str): The measured terminal type field
1037
+
1038
+ Returns:
1039
+ A list containing zero or more MixedPowerCurrentSensorError; listing all the ids of
1040
+ components that measure the same terminal of the same component in different, unsupported ways.
1041
+ """
1042
+ power_sensor_data = data[power_sensor_type]
1043
+ current_sensor_data = data[current_sensor_type]
1044
+ power_sensor_measured_terminals = power_sensor_data[[measured_object_field, measured_terminal_type_field]]
1045
+ current_sensor_measured_terminals = current_sensor_data[[measured_object_field, measured_terminal_type_field]]
1046
+
1047
+ mixed_terminals = np.intersect1d(power_sensor_measured_terminals, current_sensor_measured_terminals)
1048
+ if mixed_terminals.size != 0:
1049
+ mixed_power_sensor_ids = power_sensor_data["id"][np.isin(power_sensor_measured_terminals, mixed_terminals)]
1050
+ mixed_current_sensor_ids = current_sensor_data["id"][
1051
+ np.isin(current_sensor_measured_terminals, mixed_terminals)
1052
+ ]
1053
+
1054
+ return [
1055
+ MixedPowerCurrentSensorError(
1056
+ fields=[
1057
+ (power_sensor_type, measured_object_field),
1058
+ (power_sensor_type, measured_terminal_type_field),
1059
+ (current_sensor_type, measured_object_field),
1060
+ (current_sensor_type, measured_terminal_type_field),
1061
+ ],
1062
+ ids=[(power_sensor_type, s) for s in mixed_power_sensor_ids.flatten().tolist()]
1063
+ + [(current_sensor_type, s) for s in mixed_current_sensor_ids.flatten().tolist()],
1064
+ )
1065
+ ]
1066
+ return []
1067
+
1068
+
1069
+ def all_valid_fault_phases(
1070
+ data: SingleDataset, component: ComponentType, fault_type_field: str, fault_phase_field: str
1071
+ ) -> list[FaultPhaseError]:
1072
+ """
1073
+ Custom validation rule: Only a subset of fault_phases is supported for each fault type.
1074
+
1075
+ Args:
1076
+ data (SingleDataset): The input/update data set for all components
1077
+ component (ComponentType): The component of interest
1078
+ fault_type_field (str): The fault type field
1079
+ fault_phase_field (str): The fault phase field
1080
+
1081
+ Returns:
1082
+ A list containing zero or more FaultPhaseErrors; listing all the ids of faults where the fault phase was
1083
+ invalid, given the fault phase.
1084
+ """
1085
+ fault_types = data[component][fault_type_field]
1086
+ fault_phases = data[component][fault_phase_field]
1087
+
1088
+ supported_combinations: dict[FaultType, list[FaultPhase]] = {
1089
+ FaultType.three_phase: [FaultPhase.abc, FaultPhase.default_value, FaultPhase.nan],
1090
+ FaultType.single_phase_to_ground: [
1091
+ FaultPhase.a,
1092
+ FaultPhase.b,
1093
+ FaultPhase.c,
1094
+ FaultPhase.default_value,
1095
+ FaultPhase.nan,
1096
+ ],
1097
+ FaultType.two_phase: [FaultPhase.ab, FaultPhase.ac, FaultPhase.bc, FaultPhase.default_value, FaultPhase.nan],
1098
+ FaultType.two_phase_to_ground: [
1099
+ FaultPhase.ab,
1100
+ FaultPhase.ac,
1101
+ FaultPhase.bc,
1102
+ FaultPhase.default_value,
1103
+ FaultPhase.nan,
1104
+ ],
1105
+ FaultType.nan: [],
1106
+ }
1107
+
1108
+ def _fault_phase_unsupported(fault_type: FaultType, fault_phase: FaultPhase):
1109
+ return fault_phase not in supported_combinations.get(fault_type, [])
1110
+
1111
+ err = np.vectorize(_fault_phase_unsupported)(fault_type=fault_types, fault_phase=fault_phases)
1112
+ if err.any():
1113
+ return [
1114
+ FaultPhaseError(
1115
+ component=component,
1116
+ fields=[fault_type_field, fault_phase_field],
1117
+ ids=data[component]["id"][err].flatten().tolist(),
1118
+ )
1119
+ ]
1120
+ return []
1121
+
1122
+
1123
+ def any_voltage_angle_measurement_if_global_current_measurement(
1124
+ data: SingleDataset,
1125
+ component: ComponentType,
1126
+ angle_measurement_type_filter: tuple[str, AngleMeasurementType],
1127
+ voltage_sensor_u_angle_measured: dict[ComponentType, str],
1128
+ ) -> list[MissingVoltageAngleMeasurementError]:
1129
+ """Require a voltage angle measurement if a global angle current measurement is present.
1130
+
1131
+ Args:
1132
+ data (SingleDataset): The input/update data set for all components
1133
+ component (ComponentType): The component of interest
1134
+ angle_measurement_type_filter (tuple[str, AngleMeasurementType]):
1135
+ The angle measurement type field and value to check for
1136
+ voltage_sensor_u_angle_measured (dict[ComponentType, str]):
1137
+ The voltage angle measure field for each voltage sensor type
1138
+
1139
+ Returns:
1140
+ A list containing zero or more MissingVoltageAngleMeasurementError; listing all the ids of global angle current
1141
+ sensors that require at least one voltage angle measurement.
1142
+ """
1143
+ angle_measurement_type_field, angle_measurement_type = angle_measurement_type_filter
1144
+
1145
+ current_sensors = data[component]
1146
+ if np.all(current_sensors[angle_measurement_type_field] != angle_measurement_type):
1147
+ return []
1148
+
1149
+ for voltage_sensor_type, voltage_angle_field in voltage_sensor_u_angle_measured.items():
1150
+ if (np.isfinite(data[voltage_sensor_type][voltage_angle_field])).any():
1151
+ return []
1152
+
1153
+ voltage_and_current_sensor_ids = {sensor: data[sensor]["id"] for sensor in voltage_sensor_u_angle_measured}
1154
+ voltage_and_current_sensor_ids[component] = current_sensors[
1155
+ current_sensors[angle_measurement_type_field] == angle_measurement_type
1156
+ ]["id"]
1157
+
1158
+ return [
1159
+ MissingVoltageAngleMeasurementError(
1160
+ fields=[(component, angle_measurement_type_field), *list(voltage_sensor_u_angle_measured.items())],
1161
+ ids=[
1162
+ (sensor_type, id_)
1163
+ for sensor_type, sensor_data in voltage_and_current_sensor_ids.items()
1164
+ for id_ in sensor_data.flatten().tolist()
1165
+ ],
1166
+ )
1167
+ ]