legend-pydataobj 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/METADATA +1 -1
  2. legend_pydataobj-1.6.0.dist-info/RECORD +54 -0
  3. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/WHEEL +1 -1
  4. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/entry_points.txt +1 -0
  5. lgdo/__init__.py +7 -4
  6. lgdo/_version.py +2 -2
  7. lgdo/cli.py +237 -12
  8. lgdo/compression/__init__.py +1 -0
  9. lgdo/lh5/__init__.py +9 -1
  10. lgdo/lh5/_serializers/__init__.py +43 -0
  11. lgdo/lh5/_serializers/read/__init__.py +0 -0
  12. lgdo/lh5/_serializers/read/array.py +34 -0
  13. lgdo/lh5/_serializers/read/composite.py +405 -0
  14. lgdo/lh5/_serializers/read/encoded.py +129 -0
  15. lgdo/lh5/_serializers/read/ndarray.py +104 -0
  16. lgdo/lh5/_serializers/read/scalar.py +34 -0
  17. lgdo/lh5/_serializers/read/utils.py +12 -0
  18. lgdo/lh5/_serializers/read/vector_of_vectors.py +195 -0
  19. lgdo/lh5/_serializers/write/__init__.py +0 -0
  20. lgdo/lh5/_serializers/write/array.py +92 -0
  21. lgdo/lh5/_serializers/write/composite.py +259 -0
  22. lgdo/lh5/_serializers/write/scalar.py +23 -0
  23. lgdo/lh5/_serializers/write/vector_of_vectors.py +95 -0
  24. lgdo/lh5/core.py +272 -0
  25. lgdo/lh5/datatype.py +46 -0
  26. lgdo/lh5/exceptions.py +34 -0
  27. lgdo/lh5/iterator.py +1 -1
  28. lgdo/lh5/store.py +69 -1160
  29. lgdo/lh5/tools.py +27 -53
  30. lgdo/lh5/utils.py +130 -27
  31. lgdo/lh5_store.py +11 -2
  32. lgdo/logging.py +1 -0
  33. lgdo/types/__init__.py +1 -0
  34. lgdo/types/array.py +1 -0
  35. lgdo/types/arrayofequalsizedarrays.py +1 -0
  36. lgdo/types/encoded.py +3 -8
  37. lgdo/types/fixedsizearray.py +1 -0
  38. lgdo/types/struct.py +1 -0
  39. lgdo/types/table.py +37 -5
  40. lgdo/types/vectorofvectors.py +314 -458
  41. lgdo/types/vovutils.py +320 -0
  42. lgdo/types/waveformtable.py +1 -0
  43. lgdo/utils.py +1 -32
  44. legend_pydataobj-1.5.1.dist-info/RECORD +0 -36
  45. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/LICENSE +0 -0
  46. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/top_level.txt +0 -0
lgdo/types/vovutils.py ADDED
@@ -0,0 +1,320 @@
1
+ """:class:`~.lgdo.typing.vectorofvectors.VectorOfVectors` utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from collections.abc import Sequence
7
+
8
+ import awkward as ak
9
+ import numba
10
+ import numpy as np
11
+ from numpy.typing import NDArray
12
+
13
+ from ..utils import numba_defaults_kwargs as nb_kwargs
14
+ from .array import Array
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+
19
+ def build_cl(
20
+ sorted_array_in: NDArray, cumulative_length_out: NDArray | None = None
21
+ ) -> NDArray:
22
+ """Build a cumulative length array from an array of sorted data.
23
+
24
+ Examples
25
+ --------
26
+ >>> build_cl(np.array([3, 3, 3, 4])
27
+ array([3., 4.])
28
+
29
+ For a `sorted_array_in` of indices, this is the inverse of
30
+ :func:`.explode_cl`, in the sense that doing
31
+ ``build_cl(explode_cl(cumulative_length))`` would recover the original
32
+ `cumulative_length`.
33
+
34
+ Parameters
35
+ ----------
36
+ sorted_array_in
37
+ array of data already sorted; each N matching contiguous entries will
38
+ be converted into a new row of `cumulative_length_out`.
39
+ cumulative_length_out
40
+ a pre-allocated array for the output `cumulative_length`. It will
41
+ always have length <= `sorted_array_in`, so giving them the same length
42
+ is safe if there is not a better guess.
43
+
44
+ Returns
45
+ -------
46
+ cumulative_length_out
47
+ the output cumulative length array. If the user provides a
48
+ `cumulative_length_out` that is too long, this return value is sliced
49
+ to contain only the used portion of the allocated memory.
50
+ """
51
+ if len(sorted_array_in) == 0:
52
+ return None
53
+ sorted_array_in = np.asarray(sorted_array_in)
54
+ if cumulative_length_out is None:
55
+ cumulative_length_out = np.zeros(len(sorted_array_in), dtype=np.uint64)
56
+ else:
57
+ cumulative_length_out.fill(0)
58
+ if len(cumulative_length_out) == 0 and len(sorted_array_in) > 0:
59
+ msg = "cumulative_length_out too short ({len(cumulative_length_out)})"
60
+ raise ValueError(msg)
61
+ return _nb_build_cl(sorted_array_in, cumulative_length_out)
62
+
63
+
64
+ @numba.njit(**nb_kwargs)
65
+ def _nb_build_cl(sorted_array_in: NDArray, cumulative_length_out: NDArray) -> NDArray:
66
+ """numbified inner loop for build_cl"""
67
+ ii = 0
68
+ last_val = sorted_array_in[0]
69
+ for val in sorted_array_in:
70
+ if val != last_val:
71
+ ii += 1
72
+ cumulative_length_out[ii] = cumulative_length_out[ii - 1]
73
+ if ii >= len(cumulative_length_out):
74
+ msg = "cumulative_length_out too short"
75
+ raise RuntimeError(msg)
76
+ last_val = val
77
+ cumulative_length_out[ii] += 1
78
+ ii += 1
79
+ return cumulative_length_out[:ii]
80
+
81
+
82
+ @numba.guvectorize(
83
+ [
84
+ f"{data_type}[:,:],{size_type}[:],{data_type}[:]"
85
+ for data_type in [
86
+ "b1",
87
+ "i1",
88
+ "i2",
89
+ "i4",
90
+ "i8",
91
+ "u1",
92
+ "u2",
93
+ "u4",
94
+ "u8",
95
+ "f4",
96
+ "f8",
97
+ "c8",
98
+ "c16",
99
+ ]
100
+ for size_type in ["i4", "i8", "u4", "u8"]
101
+ ],
102
+ "(l,m),(l),(n)",
103
+ **nb_kwargs,
104
+ )
105
+ def _nb_fill(aoa_in: NDArray, len_in: NDArray, flattened_array_out: NDArray):
106
+ """Vectorized function to fill flattened array from array of arrays and
107
+ lengths. Values in aoa_in past lengths will not be copied.
108
+
109
+ Parameters
110
+ ----------
111
+ aoa_in
112
+ array of arrays containing values to be copied
113
+ len_in
114
+ array of vector lengths for each row of aoa_in
115
+ flattened_array_out
116
+ flattened array to copy values into. Must be longer than sum of
117
+ lengths in len_in
118
+ """
119
+
120
+ if len(flattened_array_out) < len_in.sum():
121
+ msg = "flattened array not large enough to hold values"
122
+ raise ValueError(msg)
123
+
124
+ start = 0
125
+ for i, ll in enumerate(len_in):
126
+ stop = start + ll
127
+ flattened_array_out[start:stop] = aoa_in[i, :ll]
128
+ start = stop
129
+
130
+
131
+ def explode_cl(cumulative_length: NDArray, array_out: NDArray | None = None) -> NDArray:
132
+ """Explode a `cumulative_length` array.
133
+
134
+ Examples
135
+ --------
136
+ >>> explode_cl(np.array([2, 3]))
137
+ array([0., 0., 1.])
138
+
139
+ This is the inverse of :func:`.build_cl`, in the sense that doing
140
+ ``build_cl(explode_cl(cumulative_length))`` would recover the original
141
+ `cumulative_length`.
142
+
143
+ Parameters
144
+ ----------
145
+ cumulative_length
146
+ the cumulative length array to be exploded.
147
+ array_out
148
+ a pre-allocated array to hold the exploded cumulative length array.
149
+ The length should be equal to ``cumulative_length[-1]``.
150
+
151
+ Returns
152
+ -------
153
+ array_out
154
+ the exploded cumulative length array.
155
+ """
156
+ cumulative_length = np.asarray(cumulative_length)
157
+ out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
158
+ if array_out is None:
159
+ array_out = np.empty(int(out_len), dtype=np.uint64)
160
+ if len(array_out) != out_len:
161
+ msg = f"bad lengths: cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})"
162
+ raise ValueError(msg)
163
+ return _nb_explode_cl(cumulative_length, array_out)
164
+
165
+
166
+ @numba.njit(**nb_kwargs)
167
+ def _nb_explode_cl(cumulative_length: NDArray, array_out: NDArray) -> NDArray:
168
+ """numbified inner loop for explode_cl"""
169
+ out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
170
+ if len(array_out) != out_len:
171
+ msg = "bad lengths"
172
+ raise ValueError(msg)
173
+ start = 0
174
+ for ii in range(len(cumulative_length)):
175
+ nn = int(cumulative_length[ii] - start)
176
+ for jj in range(nn):
177
+ array_out[int(start + jj)] = ii
178
+ start = cumulative_length[ii]
179
+ return array_out
180
+
181
+
182
+ def explode(
183
+ cumulative_length: NDArray, array_in: NDArray, array_out: NDArray | None = None
184
+ ) -> NDArray:
185
+ """Explode a data array using a `cumulative_length` array.
186
+
187
+ This is identical to :func:`.explode_cl`, except `array_in` gets exploded
188
+ instead of `cumulative_length`.
189
+
190
+ Examples
191
+ --------
192
+ >>> explode(np.array([2, 3]), np.array([3, 4]))
193
+ array([3., 3., 4.])
194
+
195
+ Parameters
196
+ ----------
197
+ cumulative_length
198
+ the cumulative length array to use for exploding.
199
+ array_in
200
+ the data to be exploded. Must have same length as `cumulative_length`.
201
+ array_out
202
+ a pre-allocated array to hold the exploded data. The length should be
203
+ equal to ``cumulative_length[-1]``.
204
+
205
+ Returns
206
+ -------
207
+ array_out
208
+ the exploded cumulative length array.
209
+ """
210
+ cumulative_length = np.asarray(cumulative_length)
211
+ array_in = np.asarray(array_in)
212
+ out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
213
+ if array_out is None:
214
+ array_out = np.empty(out_len, dtype=array_in.dtype)
215
+ if len(cumulative_length) != len(array_in) or len(array_out) != out_len:
216
+ msg = (
217
+ f"bad lengths: cl ({len(cumulative_length)}) != in ({len(array_in)}) "
218
+ f"and cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})"
219
+ )
220
+ raise ValueError(msg)
221
+ return _nb_explode(cumulative_length, array_in, array_out)
222
+
223
+
224
+ @numba.njit(**nb_kwargs)
225
+ def _nb_explode(
226
+ cumulative_length: NDArray, array_in: NDArray, array_out: NDArray
227
+ ) -> NDArray:
228
+ """Numbified inner loop for :func:`.explode`."""
229
+ out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
230
+ if len(cumulative_length) != len(array_in) or len(array_out) != out_len:
231
+ msg = "bad lengths"
232
+ raise ValueError(msg)
233
+ ii = 0
234
+ for jj in range(len(array_out)):
235
+ while ii < len(cumulative_length) and jj >= cumulative_length[ii]:
236
+ ii += 1
237
+ array_out[jj] = array_in[ii]
238
+ return array_out
239
+
240
+
241
+ def explode_arrays(
242
+ cumulative_length: Array,
243
+ arrays: Sequence[NDArray],
244
+ arrays_out: Sequence[NDArray] | None = None,
245
+ ) -> list:
246
+ """Explode a set of arrays using a `cumulative_length` array.
247
+
248
+ Parameters
249
+ ----------
250
+ cumulative_length
251
+ the cumulative length array to use for exploding.
252
+ arrays
253
+ the data arrays to be exploded. Each array must have same length as
254
+ `cumulative_length`.
255
+ arrays_out
256
+ a list of pre-allocated arrays to hold the exploded data. The length of
257
+ the list should be equal to the length of `arrays`, and each entry in
258
+ arrays_out should have length ``cumulative_length[-1]``. If not
259
+ provided, output arrays are allocated for the user.
260
+
261
+ Returns
262
+ -------
263
+ arrays_out
264
+ the list of exploded cumulative length arrays.
265
+ """
266
+ cumulative_length = np.asarray(cumulative_length)
267
+ for ii in range(len(arrays)):
268
+ arrays[ii] = np.asarray(arrays[ii])
269
+ out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
270
+ if arrays_out is None:
271
+ arrays_out = []
272
+ for array in arrays:
273
+ arrays_out.append(np.empty(out_len, dtype=array.dtype))
274
+ for ii in range(len(arrays)):
275
+ explode(cumulative_length, arrays[ii], arrays_out[ii])
276
+ return arrays_out
277
+
278
+
279
+ def _ak_is_jagged(type_: ak.types.Type) -> bool:
280
+ """Returns ``True`` if :class:`ak.Array` is jagged at all axes.
281
+
282
+ This assures that :func:`ak.to_buffers` returns the expected data
283
+ structures.
284
+ """
285
+ if isinstance(type_, ak.Array):
286
+ return _ak_is_jagged(type_.type)
287
+
288
+ if isinstance(type_, (ak.types.ArrayType, ak.types.ListType)):
289
+ return _ak_is_jagged(type_.content)
290
+
291
+ if isinstance(type_, ak.types.ScalarType):
292
+ msg = "Expected ArrayType or its content"
293
+ raise TypeError(msg)
294
+
295
+ return not isinstance(type_, ak.types.RegularType)
296
+
297
+
298
+ # https://github.com/scikit-hep/awkward/discussions/3049
299
+ def _ak_is_valid(type_: ak.types.Type) -> bool:
300
+ """Returns ``True`` if :class:`ak.Array` contains only elements we can serialize to LH5."""
301
+ if isinstance(type_, ak.Array):
302
+ return _ak_is_valid(type_.type)
303
+
304
+ if isinstance(type_, (ak.types.ArrayType, ak.types.ListType)):
305
+ return _ak_is_valid(type_.content)
306
+
307
+ if isinstance(type_, ak.types.ScalarType):
308
+ msg = "Expected ArrayType or its content"
309
+ raise TypeError(msg)
310
+
311
+ return not isinstance(
312
+ type_,
313
+ (
314
+ ak.types.OptionType,
315
+ ak.types.UnionType,
316
+ ak.types.RecordType,
317
+ ),
318
+ )
319
+
320
+ return isinstance(type_, ak.types.NumpyType)
@@ -3,6 +3,7 @@ Implements a LEGEND Data Object representing a special
3
3
  :class:`~.lgdo.table.Table` to store blocks of one-dimensional time-series
4
4
  data.
5
5
  """
6
+
6
7
  from __future__ import annotations
7
8
 
8
9
  import logging
lgdo/utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Implements utilities for LEGEND Data Objects."""
2
+
2
3
  from __future__ import annotations
3
4
 
4
5
  import logging
@@ -8,8 +9,6 @@ from typing import Any
8
9
 
9
10
  import numpy as np
10
11
 
11
- from . import types as lgdo
12
-
13
12
  log = logging.getLogger(__name__)
14
13
 
15
14
 
@@ -56,36 +55,6 @@ def get_element_type(obj: object) -> str:
56
55
  raise ValueError(msg, type(obj).__name__)
57
56
 
58
57
 
59
- def copy(obj: lgdo.LGDO, dtype: np.dtype = None) -> lgdo.LGDO:
60
- """Return a copy of an LGDO.
61
-
62
- Parameters
63
- ----------
64
- obj
65
- the LGDO to be copied.
66
- dtype
67
- NumPy dtype to be used for the copied object.
68
-
69
- """
70
- if dtype is None:
71
- dtype = obj.dtype
72
-
73
- if isinstance(obj, lgdo.Array):
74
- return lgdo.Array(
75
- np.array(obj.nda, dtype=dtype, copy=True), attrs=dict(obj.attrs)
76
- )
77
-
78
- if isinstance(obj, lgdo.VectorOfVectors):
79
- return lgdo.VectorOfVectors(
80
- flattened_data=copy(obj.flattened_data, dtype=dtype),
81
- cumulative_length=copy(obj.cumulative_length),
82
- attrs=dict(obj.attrs),
83
- )
84
-
85
- msg = f"copy of {type(obj)} not supported"
86
- raise ValueError(msg)
87
-
88
-
89
58
  def getenv_bool(name: str, default: bool = False) -> bool:
90
59
  """Get environment value as a boolean, returning True for 1, t and true
91
60
  (caps-insensitive), and False for any other value and default if undefined.
@@ -1,36 +0,0 @@
1
- lgdo/__init__.py,sha256=qPZZxzGMSt0Y5609kcwRiCW9qswCUIhRnGhIUHlH3uU,2913
2
- lgdo/_version.py,sha256=W6YuN1JOd6M-rSt9HDXK91AutRDYXTjJT_LQg3rCsjk,411
3
- lgdo/cli.py,sha256=5H-8LoVq-_Q7ufelDno0Sd9TsfOyJPs5O_BAGs4sG8k,1638
4
- lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
5
- lgdo/lh5_store.py,sha256=56TbTMfVdvb3yewZIGW2kZAdhdSLndDjxHMXJKpQuZI,8585
6
- lgdo/logging.py,sha256=nPNxXg553r1ItI9IS1M-PE8kGvi_tGI-Uoq8GK000Rw,1002
7
- lgdo/units.py,sha256=nbJ0JTNqlhHUXiBXT3k6qhRpSfMk5_9yW7EeC0dhMuQ,151
8
- lgdo/utils.py,sha256=N42E50vTXq8qZ0sqJCQq3tr3PCq97Ugb4zgYQCk_uLg,4457
9
- lgdo/compression/__init__.py,sha256=MaJ0G0cuXn2AVx1a9chIIH2F5cQG40J7aRa-xrPw5SI,1126
10
- lgdo/compression/base.py,sha256=82cQJujfvoAOKBFx761dEcx_xM02TBCBBuBo6i78tuI,838
11
- lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2515
12
- lgdo/compression/radware.py,sha256=VbKAvi18h48Fz-ZxMEg64yD1ezaw1NkMZazxurdyMmc,24015
13
- lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
14
- lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
15
- lgdo/lh5/__init__.py,sha256=8ouXSwosLsYc4T-KfKzN5TIe27RKvAxG0tZQ6cMdwFY,616
16
- lgdo/lh5/iterator.py,sha256=x6hJ85xWmAXkDrqVALQwJxYsuphxM6CrobBU3v4Ikmo,12369
17
- lgdo/lh5/store.py,sha256=7gTP4m9kb-Nhk9uaoOR-kF440YfOvzeSF4qQkjj-eAE,55673
18
- lgdo/lh5/tools.py,sha256=Bk1O6m45ArlxptWgovvE7AkFvS3RIsmr67l9txxKzo0,9057
19
- lgdo/lh5/utils.py,sha256=rPLS3QfvTfjrY4ApvSE720AgzaPi7NuDzOIbe13ZXrA,3658
20
- lgdo/types/__init__.py,sha256=WjMO-sEqxBlvpeACRiq78VufmvAXWrI9zgR2H0mhJY8,770
21
- lgdo/types/array.py,sha256=pHlkxhPlyigKa__ai7bYatVaGJszfvd90Y_en6H-A48,6536
22
- lgdo/types/arrayofequalsizedarrays.py,sha256=FP6z4_QaJGCcQ5fc5yKolOvp5E7GM7RydV9eOCD8Nxs,4948
23
- lgdo/types/encoded.py,sha256=KTs0NzZo6LEZyIUdxxYAm7IKlZR10ln-65yjilcxaLw,15304
24
- lgdo/types/fixedsizearray.py,sha256=6Pvkp3OC6bAF37id9p1vy_NnYPRwsuc22UAtrgs4qlU,1524
25
- lgdo/types/lgdo.py,sha256=UnJDi1emQYVgH_H29Vipfs4LelPopxG5pgZUu1eKOlw,2761
26
- lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
27
- lgdo/types/struct.py,sha256=qAIzxrypcIOlWAfNb4K2wDguI9moVXfBMprkAaoMCGY,3978
28
- lgdo/types/table.py,sha256=5MWLLOCgASwK9X2cVaHAszbdBdNvUZGX0d4x38mFb6U,15538
29
- lgdo/types/vectorofvectors.py,sha256=0ukvEhU_AaiVIDBiiYBeFNlScCZSQWttjKVwk8L9wok,28228
30
- lgdo/types/waveformtable.py,sha256=pXoXpy8uZqGPonFjD-VTNBFc5lMMKBrw2JgYsjhk8bc,9900
31
- legend_pydataobj-1.5.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
32
- legend_pydataobj-1.5.1.dist-info/METADATA,sha256=QnKaqJxYPtNOEQKfiJRpykNMpMdmYKhYrLnQQ6ZV_4k,44353
33
- legend_pydataobj-1.5.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
34
- legend_pydataobj-1.5.1.dist-info/entry_points.txt,sha256=j22HoS-1cVhTtKJkDnKB49uNH0nEVER2Tpw-lVh1aws,41
35
- legend_pydataobj-1.5.1.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
36
- legend_pydataobj-1.5.1.dist-info/RECORD,,