power-grid-model 1.11.23__py3-none-win_amd64.whl → 1.12.70__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. power_grid_model/__init__.py +54 -52
  2. power_grid_model/_core/__init__.py +3 -3
  3. power_grid_model/_core/buffer_handling.py +493 -478
  4. power_grid_model/_core/data_handling.py +195 -141
  5. power_grid_model/_core/data_types.py +143 -132
  6. power_grid_model/_core/dataset_definitions.py +109 -108
  7. power_grid_model/_core/enum.py +226 -226
  8. power_grid_model/_core/error_handling.py +206 -205
  9. power_grid_model/_core/errors.py +130 -126
  10. power_grid_model/_core/index_integer.py +17 -17
  11. power_grid_model/_core/options.py +71 -70
  12. power_grid_model/_core/power_grid_core.py +563 -581
  13. power_grid_model/_core/power_grid_dataset.py +535 -534
  14. power_grid_model/_core/power_grid_meta.py +257 -243
  15. power_grid_model/_core/power_grid_model.py +969 -687
  16. power_grid_model/_core/power_grid_model_c/__init__.py +3 -0
  17. power_grid_model/_core/power_grid_model_c/bin/power_grid_model_c.dll +0 -0
  18. power_grid_model/_core/power_grid_model_c/get_pgm_dll_path.py +63 -0
  19. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/basics.h +255 -0
  20. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/buffer.h +108 -0
  21. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset.h +316 -0
  22. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/dataset_definitions.h +1052 -0
  23. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/handle.h +99 -0
  24. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/meta_data.h +189 -0
  25. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/model.h +125 -0
  26. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/options.h +142 -0
  27. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c/serialization.h +118 -0
  28. power_grid_model/_core/power_grid_model_c/include/power_grid_model_c.h +36 -0
  29. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/basics.hpp +65 -0
  30. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/buffer.hpp +61 -0
  31. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/dataset.hpp +220 -0
  32. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/handle.hpp +108 -0
  33. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/meta_data.hpp +84 -0
  34. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/model.hpp +63 -0
  35. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/options.hpp +52 -0
  36. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/serialization.hpp +124 -0
  37. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp/utils.hpp +81 -0
  38. power_grid_model/_core/power_grid_model_c/include/power_grid_model_cpp.hpp +19 -0
  39. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfig.cmake +37 -0
  40. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelConfigVersion.cmake +65 -0
  41. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets-release.cmake +19 -0
  42. power_grid_model/_core/power_grid_model_c/lib/cmake/power_grid_model/power_grid_modelTargets.cmake +144 -0
  43. power_grid_model/_core/power_grid_model_c/lib/power_grid_model_c.lib +0 -0
  44. power_grid_model/_core/power_grid_model_c/share/LICENSE +292 -0
  45. power_grid_model/_core/power_grid_model_c/share/README.md +15 -0
  46. power_grid_model/_core/serialization.py +317 -317
  47. power_grid_model/_core/typing.py +20 -20
  48. power_grid_model/_core/utils.py +798 -789
  49. power_grid_model/data_types.py +321 -307
  50. power_grid_model/enum.py +27 -27
  51. power_grid_model/errors.py +37 -37
  52. power_grid_model/typing.py +43 -43
  53. power_grid_model/utils.py +473 -465
  54. power_grid_model/validation/__init__.py +25 -25
  55. power_grid_model/validation/_rules.py +1171 -1175
  56. power_grid_model/validation/_validation.py +1172 -1158
  57. power_grid_model/validation/assertions.py +93 -93
  58. power_grid_model/validation/errors.py +602 -588
  59. power_grid_model/validation/utils.py +313 -321
  60. {power_grid_model-1.11.23.dist-info → power_grid_model-1.12.70.dist-info}/METADATA +178 -180
  61. power_grid_model-1.12.70.dist-info/RECORD +65 -0
  62. {power_grid_model-1.11.23.dist-info → power_grid_model-1.12.70.dist-info}/WHEEL +1 -1
  63. power_grid_model-1.12.70.dist-info/entry_points.txt +3 -0
  64. power_grid_model/_core/_power_grid_core.dll +0 -0
  65. power_grid_model-1.11.23.dist-info/RECORD +0 -36
  66. power_grid_model-1.11.23.dist-info/top_level.txt +0 -1
  67. {power_grid_model-1.11.23.dist-info → power_grid_model-1.12.70.dist-info}/licenses/LICENSE +0 -0
@@ -1,478 +1,493 @@
1
- # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """
6
- Power grid model buffer handler
7
- """
8
-
9
- from dataclasses import dataclass
10
- from typing import cast
11
-
12
- import numpy as np
13
-
14
- from power_grid_model._core.data_types import (
15
- AttributeType,
16
- ComponentData,
17
- DenseBatchData,
18
- IndexPointer,
19
- SingleComponentData,
20
- SparseBatchArray,
21
- SparseBatchData,
22
- )
23
- from power_grid_model._core.error_handling import VALIDATOR_MSG
24
- from power_grid_model._core.index_integer import IdxC, IdxNp
25
- from power_grid_model._core.power_grid_core import IdxPtr, VoidPtr
26
- from power_grid_model._core.power_grid_meta import ComponentMetaData
27
- from power_grid_model._core.utils import (
28
- _extract_data_from_component_data,
29
- _extract_indptr,
30
- check_indptr_consistency,
31
- is_columnar,
32
- is_sparse,
33
- )
34
-
35
-
36
- @dataclass
37
- class BufferProperties:
38
- """
39
- Helper class to collect info on the dataset.
40
- """
41
-
42
- is_sparse: bool
43
- is_batch: bool
44
- batch_size: int
45
- n_elements_per_scenario: int
46
- n_total_elements: int
47
- columns: list[AttributeType] | None
48
-
49
-
50
- # prepared attribute data for c api
51
- @dataclass
52
- class CAttributeBuffer:
53
- """
54
- Buffer for a single attribute.
55
- """
56
-
57
- data: VoidPtr # type: ignore
58
-
59
-
60
- # prepared component data for c api
61
- @dataclass
62
- class CBuffer:
63
- """
64
- Buffer for a single component.
65
- """
66
-
67
- data: VoidPtr | None
68
- indptr: IdxPtr | None # type: ignore
69
- n_elements_per_scenario: int
70
- batch_size: int
71
- total_elements: int
72
- attribute_data: dict[AttributeType, CAttributeBuffer]
73
-
74
-
75
- def _get_raw_data_view(data: np.ndarray, dtype: np.dtype) -> VoidPtr:
76
- """
77
- Get a raw view on the data.
78
-
79
- Args:
80
- data: the data.
81
- dtype: the dtype the raw buffer should obey.
82
-
83
- Returns:
84
- a raw view on the data set.
85
- """
86
- if data.dtype != dtype:
87
- raise ValueError(f"Data type does not match schema. {VALIDATOR_MSG}")
88
- return np.ascontiguousarray(data, dtype=dtype).ctypes.data_as(VoidPtr)
89
-
90
-
91
- def _get_raw_component_data_view(
92
- data: np.ndarray | dict[AttributeType, np.ndarray], schema: ComponentMetaData
93
- ) -> VoidPtr | None:
94
- """
95
- Get a raw view on the data.
96
-
97
- Args:
98
- data: the data.
99
- schema: the schema the raw buffer should obey.
100
-
101
- Returns:
102
- a raw view on the data set.
103
- """
104
- if isinstance(data, np.ndarray):
105
- return _get_raw_data_view(data, dtype=schema.dtype)
106
- return None
107
-
108
-
109
- def _get_raw_attribute_data_view(data: np.ndarray, schema: ComponentMetaData, attribute: AttributeType) -> VoidPtr:
110
- """
111
- Get a raw view on the data.
112
-
113
- Args:
114
- data: the data.
115
- schema: the schema the raw buffer should obey.
116
-
117
- Returns:
118
- a raw view on the data set.
119
- """
120
- if schema.dtype[attribute].shape == (3,) and data.shape[-1] != 3:
121
- raise ValueError("Given data has a different schema than supported.")
122
- return _get_raw_data_view(data, dtype=schema.dtype[attribute].base)
123
-
124
-
125
- def _get_indptr_view(indptr: np.ndarray) -> IdxPtr: # type: ignore[valid-type]
126
- """
127
- Get a raw view on the index pointer.
128
-
129
- Args:
130
- indptr: the index pointer.
131
-
132
- Returns:
133
- a raw view on the index pointer.
134
- """
135
- return np.ascontiguousarray(indptr, dtype=IdxNp).ctypes.data_as(IdxPtr)
136
-
137
-
138
- def _get_dense_buffer_properties(
139
- data: ComponentData,
140
- schema: ComponentMetaData,
141
- is_batch: bool | None,
142
- batch_size: int | None,
143
- ) -> BufferProperties:
144
- """
145
- Extract the properties of the uniform batch dataset component.
146
-
147
- Args:
148
- data (ComponentData): the dataset component.
149
- schema (ComponentMetaData): the dataset type.
150
- is_batch (bool | None): whether the data is a batch dataset.
151
- batch_size (int | None): the batch size.
152
-
153
- Raises:
154
- KeyError: if the dataset component is not sparse.
155
- ValueError: if the dataset component contains conflicting or bad data.
156
-
157
- Returns:
158
- the properties of the dataset component.
159
- """
160
- if is_batch is not None and batch_size is not None and batch_size != 1 and not is_batch:
161
- raise ValueError(f"Inconsistent 'is batch' and 'batch size'. {VALIDATOR_MSG}")
162
-
163
- is_sparse_property = False
164
-
165
- sub_data = _extract_data_from_component_data(data)
166
- if not is_columnar(data):
167
- actual_ndim = sub_data.ndim
168
- shape: tuple[int] = sub_data.shape
169
- columns = None
170
- else:
171
- if not sub_data:
172
- raise ValueError(f"Empty columnar buffer is ambiguous. {VALIDATOR_MSG}")
173
- attribute, attribute_data = next(iter(sub_data.items()))
174
- actual_ndim = attribute_data.ndim - schema.dtype[attribute].ndim
175
- shape = attribute_data.shape[:actual_ndim]
176
- columns = list(sub_data)
177
-
178
- for attribute, attribute_data in sub_data.items():
179
- if (
180
- attribute_data.ndim != actual_ndim + schema.dtype[attribute].ndim
181
- or attribute_data.shape[:actual_ndim] != shape
182
- ):
183
- raise ValueError(f"Data buffers must be consistent. {VALIDATOR_MSG}")
184
-
185
- if actual_ndim not in (1, 2):
186
- raise ValueError(f"Array can only be 1D or 2D. {VALIDATOR_MSG}")
187
-
188
- actual_is_batch = actual_ndim == 2
189
- actual_batch_size = shape[0] if actual_is_batch else 1
190
- n_elements_per_scenario = shape[-1]
191
- n_total_elements = actual_batch_size * n_elements_per_scenario
192
-
193
- if is_batch is not None and is_batch != actual_is_batch:
194
- raise ValueError(f"Provided 'is batch' is incorrect for the provided data. {VALIDATOR_MSG}")
195
- if batch_size is not None and batch_size != actual_batch_size:
196
- raise ValueError(f"Provided 'batch size' is incorrect for the provided data. {VALIDATOR_MSG}")
197
-
198
- return BufferProperties(
199
- is_sparse=is_sparse_property,
200
- is_batch=actual_is_batch,
201
- batch_size=actual_batch_size,
202
- n_elements_per_scenario=n_elements_per_scenario,
203
- n_total_elements=n_total_elements,
204
- columns=columns,
205
- )
206
-
207
-
208
- def _get_sparse_buffer_properties(
209
- data: ComponentData,
210
- schema: ComponentMetaData,
211
- batch_size: int | None,
212
- ) -> BufferProperties:
213
- """
214
- Extract the properties of the sparse batch dataset component.
215
-
216
- Args:
217
- data (ComponentData): the sparse dataset component.
218
- schema (ComponentMetaData | None): the dataset type.
219
- batch_size (int | None): the batch size.
220
-
221
- Raises:
222
- KeyError: if the dataset component is not sparse.
223
- ValueError: if the dataset component contains conflicting or bad data.
224
-
225
- Returns:
226
- the properties of the dataset component.
227
- """
228
- is_sparse_property = True
229
-
230
- contents = _extract_data_from_component_data(data)
231
- indptr = _extract_indptr(data)
232
-
233
- ndim = 1
234
- columns: list[AttributeType] | None = None
235
- if not is_columnar(data):
236
- shape: tuple[int, ...] = contents.shape
237
- else:
238
- if not contents:
239
- raise ValueError(f"Empty columnar buffer is ambiguous. {VALIDATOR_MSG}")
240
- attribute_data = next(iter(contents.values()))
241
- shape = attribute_data.shape[:ndim]
242
- columns = list(contents)
243
- for attribute, attribute_data in contents.items():
244
- if attribute_data.ndim != ndim + schema.dtype[attribute].ndim or attribute_data.shape[:ndim] != shape:
245
- raise ValueError(f"Data buffers must be consistent. {VALIDATOR_MSG}")
246
-
247
- contents_size = shape[0]
248
- check_indptr_consistency(indptr, batch_size, contents_size)
249
-
250
- is_batch = True
251
- n_elements_per_scenario = -1
252
- n_total_elements = contents_size
253
-
254
- return BufferProperties(
255
- is_sparse=is_sparse_property,
256
- is_batch=is_batch,
257
- batch_size=indptr.size - 1,
258
- n_elements_per_scenario=n_elements_per_scenario,
259
- n_total_elements=n_total_elements,
260
- columns=columns,
261
- )
262
-
263
-
264
- def get_buffer_properties(
265
- data: ComponentData,
266
- schema: ComponentMetaData,
267
- is_batch: bool | None = None,
268
- batch_size: int | None = None,
269
- ) -> BufferProperties:
270
- """
271
- Extract the properties of the dataset component
272
-
273
- Args:
274
- data (ComponentData): the dataset component.
275
- schema (ComponentMetaData | None): the dataset type [optional if data is not columnar]
276
- is_batch (bool | None): whether the data is a batch dataset. [optional]
277
- batch_size (int | None): the batch size. [optional]
278
-
279
- Raises:
280
- ValueError: if the dataset component contains conflicting or bad data.
281
-
282
- Returns:
283
- the properties of the dataset component.
284
- """
285
- if not is_sparse(data):
286
- return _get_dense_buffer_properties(data=data, schema=schema, is_batch=is_batch, batch_size=batch_size)
287
-
288
- if is_batch is not None and not is_batch:
289
- raise ValueError("Sparse data must be batch data")
290
-
291
- return _get_sparse_buffer_properties(data=cast(SparseBatchArray, data), schema=schema, batch_size=batch_size)
292
-
293
-
294
- def _get_attribute_buffer_views(
295
- data: np.ndarray | dict[AttributeType, np.ndarray], schema: ComponentMetaData
296
- ) -> dict[AttributeType, CAttributeBuffer]:
297
- """
298
- Get C API compatible views on attribute buffers.
299
-
300
- Args:
301
- data (dict[AttributeType, np.ndarray]): the data.
302
- schema (ComponentMetaData): the schema that the data should obey.
303
-
304
- Returns:
305
- dict[AttributeType, CAttributeBuffer]: the C API attribute buffer view per attribute.
306
- """
307
- if isinstance(data, np.ndarray):
308
- return {}
309
-
310
- return {
311
- attribute: CAttributeBuffer(
312
- data=_get_raw_attribute_data_view(data=attribute_data, schema=schema, attribute=attribute)
313
- )
314
- for attribute, attribute_data in data.items()
315
- }
316
-
317
-
318
- def _get_uniform_buffer_view(
319
- data: DenseBatchData,
320
- schema: ComponentMetaData,
321
- is_batch: bool | None,
322
- batch_size: int | None,
323
- ) -> CBuffer:
324
- """
325
- Get a C API compatible view on a uniform buffer.
326
-
327
- Args:
328
- data: the data.
329
- schema: the schema that the data should obey.
330
- is_batch (bool | None): whether the data is a batch dataset.
331
- batch_size (int | None): the batch size.
332
-
333
- Returns:
334
- the C API buffer view.
335
- """
336
- properties = _get_dense_buffer_properties(data, schema=schema, is_batch=is_batch, batch_size=batch_size)
337
-
338
- return CBuffer(
339
- data=_get_raw_component_data_view(data=data, schema=schema),
340
- indptr=IdxPtr(),
341
- n_elements_per_scenario=properties.n_elements_per_scenario,
342
- batch_size=properties.batch_size,
343
- total_elements=properties.n_total_elements,
344
- attribute_data=_get_attribute_buffer_views(data=data, schema=schema),
345
- )
346
-
347
-
348
- def _get_sparse_buffer_view(
349
- data: SparseBatchArray,
350
- schema: ComponentMetaData,
351
- batch_size: int | None,
352
- ) -> CBuffer:
353
- """
354
- Get a C API compatible view on a sparse buffer.
355
-
356
- Args:
357
- data: the data.
358
- schema: the schema that the data should obey.
359
- batch_size (int | None): the batch size.
360
-
361
- Returns:
362
- the C API buffer view.
363
- """
364
- contents = data["data"]
365
- indptr = data["indptr"]
366
-
367
- properties = _get_sparse_buffer_properties(data, schema=schema, batch_size=batch_size)
368
-
369
- return CBuffer(
370
- data=_get_raw_component_data_view(data=contents, schema=schema),
371
- indptr=_get_indptr_view(indptr),
372
- n_elements_per_scenario=properties.n_elements_per_scenario,
373
- batch_size=properties.batch_size,
374
- total_elements=properties.n_total_elements,
375
- attribute_data=_get_attribute_buffer_views(data=contents, schema=schema),
376
- )
377
-
378
-
379
- def get_buffer_view(
380
- data: ComponentData,
381
- schema: ComponentMetaData,
382
- is_batch: bool | None = None,
383
- batch_size: int | None = None,
384
- ) -> CBuffer:
385
- """
386
- Get a C API compatible view on a buffer.
387
-
388
- Args:
389
- data: the data.
390
- schema: the schema that the data should obey.
391
- is_batch (bool | None): whether the data is a batch dataset. [optional]
392
- batch_size (int | None): the batch size. [optional]
393
-
394
- Returns:
395
- the C API buffer view.
396
- """
397
- if not is_sparse(data):
398
- return _get_uniform_buffer_view(cast(DenseBatchData, data), schema, is_batch, batch_size)
399
-
400
- if is_batch is not None and not is_batch:
401
- raise ValueError("Sparse data must be batch data")
402
-
403
- return _get_sparse_buffer_view(cast(SparseBatchArray, data), schema, batch_size)
404
-
405
-
406
- def create_buffer(properties: BufferProperties, schema: ComponentMetaData) -> ComponentData:
407
- """
408
- Create a buffer with the provided properties and type.
409
-
410
- Args:
411
- properties: the desired buffer properties.
412
- schema: the data type of the buffer.
413
-
414
- Raises:
415
- ValueError: if the buffer properties are not consistent.
416
-
417
- Returns:
418
- np.ndarray | dict[[str, np.ndarray]: a buffer with the correct properties.
419
- """
420
- if properties.is_sparse:
421
- return _create_sparse_buffer(properties=properties, schema=schema)
422
-
423
- return _create_uniform_buffer(properties=properties, schema=schema)
424
-
425
-
426
- def _create_uniform_buffer(properties: BufferProperties, schema: ComponentMetaData) -> DenseBatchData:
427
- """
428
- Create a uniform buffer with the provided properties and type.
429
-
430
- Args:
431
- properties: the desired buffer properties.
432
- schema: the data type of the buffer.
433
-
434
- Raises:
435
- ValueError: if the buffer properties are not uniform.
436
-
437
- Returns:
438
- A uniform buffer with the correct properties.
439
- """
440
- if properties.is_sparse:
441
- raise ValueError(f"A uniform buffer cannot be sparse. {VALIDATOR_MSG}")
442
-
443
- shape: int | tuple[int, int] = (
444
- (properties.batch_size, properties.n_elements_per_scenario)
445
- if properties.is_batch
446
- else properties.n_elements_per_scenario
447
- )
448
- return _create_contents_buffer(shape=shape, dtype=schema.dtype, columns=properties.columns)
449
-
450
-
451
- def _create_sparse_buffer(properties: BufferProperties, schema: ComponentMetaData) -> SparseBatchData:
452
- """
453
- Create a sparse buffer with the provided properties and type.
454
-
455
- Args:
456
- properties: the desired buffer properties.
457
- schema: the data type of the buffer.
458
-
459
- Raises:
460
- ValueError: if the buffer properties are not sparse.
461
-
462
- Returns:
463
- A sparse buffer with the correct properties.
464
- """
465
- data: SingleComponentData = _create_contents_buffer(
466
- shape=properties.n_total_elements,
467
- dtype=schema.dtype,
468
- columns=properties.columns,
469
- )
470
- indptr: IndexPointer = np.array([0] * properties.batch_size + [properties.n_total_elements], dtype=IdxC)
471
- return cast(SparseBatchData, {"data": data, "indptr": indptr})
472
-
473
-
474
- def _create_contents_buffer(shape, dtype, columns: list[AttributeType] | None) -> SingleComponentData | DenseBatchData:
475
- if columns is None:
476
- return np.empty(shape=shape, dtype=dtype)
477
-
478
- return {attribute: np.empty(shape=shape, dtype=dtype[attribute]) for attribute in columns}
1
+ # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ """
6
+ Power grid model buffer handler
7
+ """
8
+
9
+ from dataclasses import dataclass
10
+ from typing import cast
11
+
12
+ import numpy as np
13
+
14
+ from power_grid_model._core.data_types import (
15
+ AttributeType,
16
+ ComponentData,
17
+ DenseBatchData,
18
+ IndexPointer,
19
+ SingleComponentData,
20
+ SparseBatchArray,
21
+ SparseBatchData,
22
+ )
23
+ from power_grid_model._core.error_handling import VALIDATOR_MSG
24
+ from power_grid_model._core.index_integer import IdxC, IdxNp
25
+ from power_grid_model._core.power_grid_core import IdxPtr, VoidPtr
26
+ from power_grid_model._core.power_grid_meta import ComponentMetaData
27
+ from power_grid_model._core.utils import (
28
+ _extract_data_from_component_data,
29
+ _extract_indptr,
30
+ check_indptr_consistency,
31
+ is_columnar,
32
+ is_sparse,
33
+ )
34
+
35
+
36
+ @dataclass
37
+ class BufferProperties:
38
+ """
39
+ Helper class to collect info on the dataset.
40
+ """
41
+
42
+ is_sparse: bool
43
+ is_batch: bool
44
+ batch_size: int
45
+ n_elements_per_scenario: int
46
+ n_total_elements: int
47
+ columns: list[AttributeType] | None
48
+
49
+
50
+ # prepared attribute data for c api
51
+ @dataclass
52
+ class CAttributeBuffer:
53
+ """
54
+ Buffer for a single attribute.
55
+ """
56
+
57
+ data: VoidPtr # type: ignore
58
+
59
+
60
+ # prepared component data for c api
61
+ @dataclass
62
+ class CBuffer:
63
+ """
64
+ Buffer for a single component.
65
+ """
66
+
67
+ data: VoidPtr | None
68
+ indptr: IdxPtr | None # type: ignore
69
+ n_elements_per_scenario: int
70
+ batch_size: int
71
+ total_elements: int
72
+ attribute_data: dict[AttributeType, CAttributeBuffer]
73
+
74
+
75
+ def _get_raw_data_view(data: np.ndarray, dtype: np.dtype) -> VoidPtr:
76
+ """
77
+ Get a raw view on the data.
78
+
79
+ Args:
80
+ data: the data.
81
+ dtype: the dtype the raw buffer should obey.
82
+
83
+ Returns:
84
+ a raw view on the data set.
85
+ """
86
+ if data.dtype != dtype:
87
+ raise ValueError(f"Data type does not match schema. {VALIDATOR_MSG}")
88
+ return np.ascontiguousarray(data, dtype=dtype).ctypes.data_as(VoidPtr)
89
+
90
+
91
+ def _get_raw_component_data_view(
92
+ data: np.ndarray | dict[AttributeType, np.ndarray], schema: ComponentMetaData
93
+ ) -> VoidPtr | None:
94
+ """
95
+ Get a raw view on the data.
96
+
97
+ Args:
98
+ data: the data.
99
+ schema: the schema the raw buffer should obey.
100
+
101
+ Returns:
102
+ a raw view on the data set.
103
+ """
104
+ if isinstance(data, np.ndarray):
105
+ return _get_raw_data_view(data, dtype=schema.dtype)
106
+ return None
107
+
108
+
109
+ def _get_raw_attribute_data_view(data: np.ndarray, schema: ComponentMetaData, attribute: AttributeType) -> VoidPtr:
110
+ """
111
+ Get a raw view on the data.
112
+
113
+ Args:
114
+ data: the data.
115
+ schema: the schema the raw buffer should obey.
116
+
117
+ Returns:
118
+ a raw view on the data set.
119
+ """
120
+ dense_batch_ndim = 2
121
+
122
+ attr_schema = schema.dtype[attribute]
123
+ attr_shape_start = data.ndim - attr_schema.ndim
124
+ dataset_shape = data.shape[:attr_shape_start]
125
+ attr_shape = data.shape[attr_shape_start:]
126
+ if len(dataset_shape) <= dense_batch_ndim and attr_shape == attr_schema.shape:
127
+ return _get_raw_data_view(data, dtype=schema.dtype[attribute].base)
128
+ raise ValueError("Given data has a different schema than supported.")
129
+
130
+
131
+ def _get_indptr_view(indptr: np.ndarray) -> IdxPtr: # type: ignore[valid-type]
132
+ """
133
+ Get a raw view on the index pointer.
134
+
135
+ Args:
136
+ indptr: the index pointer.
137
+
138
+ Returns:
139
+ a raw view on the index pointer.
140
+ """
141
+ return np.ascontiguousarray(indptr, dtype=IdxNp).ctypes.data_as(IdxPtr)
142
+
143
+
144
+ def _get_dense_buffer_properties(
145
+ data: ComponentData,
146
+ schema: ComponentMetaData,
147
+ is_batch: bool | None,
148
+ batch_size: int | None,
149
+ ) -> BufferProperties:
150
+ """
151
+ Extract the properties of the uniform batch dataset component.
152
+
153
+ Args:
154
+ data (ComponentData): the dataset component.
155
+ schema (ComponentMetaData): the dataset type.
156
+ is_batch (bool | None): whether the data is a batch dataset.
157
+ batch_size (int | None): the batch size.
158
+
159
+ Raises:
160
+ KeyError: if the dataset component is not sparse.
161
+ ValueError: if the dataset component contains conflicting or bad data.
162
+
163
+ Returns:
164
+ the properties of the dataset component.
165
+ """
166
+ if is_batch is not None and batch_size is not None and batch_size != 1 and not is_batch:
167
+ raise ValueError(f"Inconsistent 'is batch' and 'batch size'. {VALIDATOR_MSG}")
168
+
169
+ is_sparse_property = False
170
+
171
+ sub_data = _extract_data_from_component_data(data)
172
+ if not is_columnar(data):
173
+ actual_ndim = sub_data.ndim
174
+ shape: tuple[int] = sub_data.shape
175
+ columns = None
176
+ else:
177
+ if not sub_data:
178
+ raise ValueError(f"Empty columnar buffer is ambiguous. {VALIDATOR_MSG}")
179
+ attribute, attribute_data = next(iter(sub_data.items()))
180
+ actual_ndim = attribute_data.ndim - schema.dtype[attribute].ndim
181
+ shape = attribute_data.shape[:actual_ndim]
182
+ columns = list(sub_data)
183
+
184
+ for attribute, attribute_data in sub_data.items():
185
+ if (
186
+ attribute_data.ndim != actual_ndim + schema.dtype[attribute].ndim
187
+ or attribute_data.shape[:actual_ndim] != shape
188
+ ):
189
+ raise ValueError(f"Data buffers must be consistent. {VALIDATOR_MSG}")
190
+
191
+ if actual_ndim not in (1, 2):
192
+ raise ValueError(f"Array can only be 1D or 2D. {VALIDATOR_MSG}")
193
+
194
+ single_dataset_ndim = 1
195
+ batch_dataset_ndim = 2
196
+
197
+ actual_is_batch = actual_ndim == batch_dataset_ndim
198
+ actual_batch_size = shape[0] if actual_is_batch else single_dataset_ndim
199
+ n_elements_per_scenario = shape[-1]
200
+ n_total_elements = actual_batch_size * n_elements_per_scenario
201
+
202
+ if is_batch is not None and is_batch != actual_is_batch:
203
+ raise ValueError(
204
+ f"Incorrect/inconsistent data provided: {'batch' if actual_is_batch else 'single'} "
205
+ f"data provided but {'batch' if is_batch else 'single'} data expected. {VALIDATOR_MSG}"
206
+ )
207
+ if batch_size is not None and batch_size != actual_batch_size:
208
+ raise ValueError(
209
+ f"Incorrect/inconsistent batch size provided: {actual_batch_size} scenarios provided "
210
+ f"but {batch_size} scenarios expected. {VALIDATOR_MSG}"
211
+ )
212
+
213
+ return BufferProperties(
214
+ is_sparse=is_sparse_property,
215
+ is_batch=actual_is_batch,
216
+ batch_size=actual_batch_size,
217
+ n_elements_per_scenario=n_elements_per_scenario,
218
+ n_total_elements=n_total_elements,
219
+ columns=columns,
220
+ )
221
+
222
+
223
+ def _get_sparse_buffer_properties(
224
+ data: ComponentData,
225
+ schema: ComponentMetaData,
226
+ batch_size: int | None,
227
+ ) -> BufferProperties:
228
+ """
229
+ Extract the properties of the sparse batch dataset component.
230
+
231
+ Args:
232
+ data (ComponentData): the sparse dataset component.
233
+ schema (ComponentMetaData | None): the dataset type.
234
+ batch_size (int | None): the batch size.
235
+
236
+ Raises:
237
+ KeyError: if the dataset component is not sparse.
238
+ ValueError: if the dataset component contains conflicting or bad data.
239
+
240
+ Returns:
241
+ the properties of the dataset component.
242
+ """
243
+ is_sparse_property = True
244
+
245
+ contents = _extract_data_from_component_data(data)
246
+ indptr = _extract_indptr(data)
247
+
248
+ ndim = 1
249
+ columns: list[AttributeType] | None = None
250
+ if not is_columnar(data):
251
+ shape: tuple[int, ...] = contents.shape
252
+ else:
253
+ if not contents:
254
+ raise ValueError(f"Empty columnar buffer is ambiguous. {VALIDATOR_MSG}")
255
+ attribute_data = next(iter(contents.values()))
256
+ shape = attribute_data.shape[:ndim]
257
+ columns = list(contents)
258
+ for attribute, attribute_data in contents.items():
259
+ if attribute_data.ndim != ndim + schema.dtype[attribute].ndim or attribute_data.shape[:ndim] != shape:
260
+ raise ValueError(f"Data buffers must be consistent. {VALIDATOR_MSG}")
261
+
262
+ contents_size = shape[0]
263
+ check_indptr_consistency(indptr, batch_size, contents_size)
264
+
265
+ is_batch = True
266
+ n_elements_per_scenario = -1
267
+ n_total_elements = contents_size
268
+
269
+ return BufferProperties(
270
+ is_sparse=is_sparse_property,
271
+ is_batch=is_batch,
272
+ batch_size=indptr.size - 1,
273
+ n_elements_per_scenario=n_elements_per_scenario,
274
+ n_total_elements=n_total_elements,
275
+ columns=columns,
276
+ )
277
+
278
+
279
+ def get_buffer_properties(
280
+ data: ComponentData,
281
+ schema: ComponentMetaData,
282
+ is_batch: bool | None = None,
283
+ batch_size: int | None = None,
284
+ ) -> BufferProperties:
285
+ """
286
+ Extract the properties of the dataset component
287
+
288
+ Args:
289
+ data (ComponentData): the dataset component.
290
+ schema (ComponentMetaData | None): the dataset type [optional if data is not columnar]
291
+ is_batch (bool | None): whether the data is a batch dataset. [optional]
292
+ batch_size (int | None): the batch size. [optional]
293
+
294
+ Raises:
295
+ ValueError: if the dataset component contains conflicting or bad data.
296
+
297
+ Returns:
298
+ the properties of the dataset component.
299
+ """
300
+ if not is_sparse(data):
301
+ return _get_dense_buffer_properties(data=data, schema=schema, is_batch=is_batch, batch_size=batch_size)
302
+
303
+ if is_batch is not None and not is_batch:
304
+ raise ValueError("Sparse data must be batch data")
305
+
306
+ return _get_sparse_buffer_properties(data=cast(SparseBatchArray, data), schema=schema, batch_size=batch_size)
307
+
308
+
309
+ def _get_attribute_buffer_views(
310
+ data: np.ndarray | dict[AttributeType, np.ndarray], schema: ComponentMetaData
311
+ ) -> dict[AttributeType, CAttributeBuffer]:
312
+ """
313
+ Get C API compatible views on attribute buffers.
314
+
315
+ Args:
316
+ data (dict[AttributeType, np.ndarray]): the data.
317
+ schema (ComponentMetaData): the schema that the data should obey.
318
+
319
+ Returns:
320
+ dict[AttributeType, CAttributeBuffer]: the C API attribute buffer view per attribute.
321
+ """
322
+ if isinstance(data, np.ndarray):
323
+ return {}
324
+
325
+ return {
326
+ attribute: CAttributeBuffer(
327
+ data=_get_raw_attribute_data_view(data=attribute_data, schema=schema, attribute=attribute)
328
+ )
329
+ for attribute, attribute_data in data.items()
330
+ }
331
+
332
+
333
+ def _get_uniform_buffer_view(
334
+ data: DenseBatchData,
335
+ schema: ComponentMetaData,
336
+ is_batch: bool | None,
337
+ batch_size: int | None,
338
+ ) -> CBuffer:
339
+ """
340
+ Get a C API compatible view on a uniform buffer.
341
+
342
+ Args:
343
+ data: the data.
344
+ schema: the schema that the data should obey.
345
+ is_batch (bool | None): whether the data is a batch dataset.
346
+ batch_size (int | None): the batch size.
347
+
348
+ Returns:
349
+ the C API buffer view.
350
+ """
351
+ properties = _get_dense_buffer_properties(data, schema=schema, is_batch=is_batch, batch_size=batch_size)
352
+
353
+ return CBuffer(
354
+ data=_get_raw_component_data_view(data=data, schema=schema),
355
+ indptr=IdxPtr(),
356
+ n_elements_per_scenario=properties.n_elements_per_scenario,
357
+ batch_size=properties.batch_size,
358
+ total_elements=properties.n_total_elements,
359
+ attribute_data=_get_attribute_buffer_views(data=data, schema=schema),
360
+ )
361
+
362
+
363
+ def _get_sparse_buffer_view(
364
+ data: SparseBatchArray,
365
+ schema: ComponentMetaData,
366
+ batch_size: int | None,
367
+ ) -> CBuffer:
368
+ """
369
+ Get a C API compatible view on a sparse buffer.
370
+
371
+ Args:
372
+ data: the data.
373
+ schema: the schema that the data should obey.
374
+ batch_size (int | None): the batch size.
375
+
376
+ Returns:
377
+ the C API buffer view.
378
+ """
379
+ contents = data["data"]
380
+ indptr = data["indptr"]
381
+
382
+ properties = _get_sparse_buffer_properties(data, schema=schema, batch_size=batch_size)
383
+
384
+ return CBuffer(
385
+ data=_get_raw_component_data_view(data=contents, schema=schema),
386
+ indptr=_get_indptr_view(indptr),
387
+ n_elements_per_scenario=properties.n_elements_per_scenario,
388
+ batch_size=properties.batch_size,
389
+ total_elements=properties.n_total_elements,
390
+ attribute_data=_get_attribute_buffer_views(data=contents, schema=schema),
391
+ )
392
+
393
+
394
+ def get_buffer_view(
395
+ data: ComponentData,
396
+ schema: ComponentMetaData,
397
+ is_batch: bool | None = None,
398
+ batch_size: int | None = None,
399
+ ) -> CBuffer:
400
+ """
401
+ Get a C API compatible view on a buffer.
402
+
403
+ Args:
404
+ data: the data.
405
+ schema: the schema that the data should obey.
406
+ is_batch (bool | None): whether the data is a batch dataset. [optional]
407
+ batch_size (int | None): the batch size. [optional]
408
+
409
+ Returns:
410
+ the C API buffer view.
411
+ """
412
+ if not is_sparse(data):
413
+ return _get_uniform_buffer_view(cast(DenseBatchData, data), schema, is_batch, batch_size)
414
+
415
+ if is_batch is not None and not is_batch:
416
+ raise ValueError("Sparse data must be batch data")
417
+
418
+ return _get_sparse_buffer_view(cast(SparseBatchArray, data), schema, batch_size)
419
+
420
+
421
+ def create_buffer(properties: BufferProperties, schema: ComponentMetaData) -> ComponentData:
422
+ """
423
+ Create a buffer with the provided properties and type.
424
+
425
+ Args:
426
+ properties: the desired buffer properties.
427
+ schema: the data type of the buffer.
428
+
429
+ Raises:
430
+ ValueError: if the buffer properties are not consistent.
431
+
432
+ Returns:
433
+ np.ndarray | dict[[str, np.ndarray]: a buffer with the correct properties.
434
+ """
435
+ if properties.is_sparse:
436
+ return _create_sparse_buffer(properties=properties, schema=schema)
437
+
438
+ return _create_uniform_buffer(properties=properties, schema=schema)
439
+
440
+
441
+ def _create_uniform_buffer(properties: BufferProperties, schema: ComponentMetaData) -> DenseBatchData:
442
+ """
443
+ Create a uniform buffer with the provided properties and type.
444
+
445
+ Args:
446
+ properties: the desired buffer properties.
447
+ schema: the data type of the buffer.
448
+
449
+ Raises:
450
+ ValueError: if the buffer properties are not uniform.
451
+
452
+ Returns:
453
+ A uniform buffer with the correct properties.
454
+ """
455
+ if properties.is_sparse:
456
+ raise ValueError(f"A uniform buffer cannot be sparse. {VALIDATOR_MSG}")
457
+
458
+ shape: int | tuple[int, int] = (
459
+ (properties.batch_size, properties.n_elements_per_scenario)
460
+ if properties.is_batch
461
+ else properties.n_elements_per_scenario
462
+ )
463
+ return _create_contents_buffer(shape=shape, dtype=schema.dtype, columns=properties.columns)
464
+
465
+
466
+ def _create_sparse_buffer(properties: BufferProperties, schema: ComponentMetaData) -> SparseBatchData:
467
+ """
468
+ Create a sparse buffer with the provided properties and type.
469
+
470
+ Args:
471
+ properties: the desired buffer properties.
472
+ schema: the data type of the buffer.
473
+
474
+ Raises:
475
+ ValueError: if the buffer properties are not sparse.
476
+
477
+ Returns:
478
+ A sparse buffer with the correct properties.
479
+ """
480
+ data: SingleComponentData = _create_contents_buffer(
481
+ shape=properties.n_total_elements,
482
+ dtype=schema.dtype,
483
+ columns=properties.columns,
484
+ )
485
+ indptr: IndexPointer = np.array([0] * properties.batch_size + [properties.n_total_elements], dtype=IdxC)
486
+ return cast(SparseBatchData, {"data": data, "indptr": indptr})
487
+
488
+
489
+ def _create_contents_buffer(shape, dtype, columns: list[AttributeType] | None) -> SingleComponentData | DenseBatchData:
490
+ if columns is None:
491
+ return np.empty(shape=shape, dtype=dtype)
492
+
493
+ return {attribute: np.empty(shape=shape, dtype=dtype[attribute]) for attribute in columns}