legend-pydataobj 1.6.2__tar.gz → 1.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/PKG-INFO +2 -2
  2. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/pyproject.toml +1 -1
  3. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/legend_pydataobj.egg-info/PKG-INFO +2 -2
  4. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/legend_pydataobj.egg-info/requires.txt +1 -1
  5. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/_version.py +2 -2
  6. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/cli.py +12 -1
  7. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/read/composite.py +3 -3
  8. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/write/array.py +1 -1
  9. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +24 -5
  10. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/tools.py +38 -1
  11. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/utils.py +10 -0
  12. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/fixedsizearray.py +1 -1
  13. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/struct.py +22 -4
  14. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/table.py +26 -9
  15. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/vectorofvectors.py +18 -21
  16. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/units.py +1 -1
  17. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/compression/test_uleb128_zigzag_diff.py +1 -1
  18. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_struct.py +21 -0
  19. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_table.py +43 -1
  20. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_vectorofvectors.py +30 -13
  21. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/LICENSE +0 -0
  22. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/README.md +0 -0
  23. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/setup.cfg +0 -0
  24. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/legend_pydataobj.egg-info/SOURCES.txt +0 -0
  25. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
  26. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
  27. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
  28. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
  29. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/__init__.py +0 -0
  30. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/compression/__init__.py +0 -0
  31. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/compression/base.py +0 -0
  32. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/compression/generic.py +0 -0
  33. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/compression/radware.py +0 -0
  34. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/compression/utils.py +0 -0
  35. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/compression/varlen.py +0 -0
  36. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lgdo_utils.py +0 -0
  37. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/__init__.py +0 -0
  38. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/__init__.py +0 -0
  39. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
  40. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/read/array.py +0 -0
  41. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/read/encoded.py +0 -0
  42. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/read/ndarray.py +0 -0
  43. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/read/scalar.py +0 -0
  44. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/read/utils.py +0 -0
  45. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +0 -0
  46. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
  47. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/write/composite.py +0 -0
  48. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/_serializers/write/scalar.py +0 -0
  49. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/core.py +0 -0
  50. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/datatype.py +0 -0
  51. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/exceptions.py +0 -0
  52. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/iterator.py +0 -0
  53. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5/store.py +0 -0
  54. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/lh5_store.py +0 -0
  55. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/logging.py +0 -0
  56. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/__init__.py +0 -0
  57. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/array.py +0 -0
  58. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
  59. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/encoded.py +0 -0
  60. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/lgdo.py +0 -0
  61. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/scalar.py +0 -0
  62. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/vovutils.py +0 -0
  63. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/types/waveformtable.py +0 -0
  64. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/src/lgdo/utils.py +0 -0
  65. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/compression/conftest.py +0 -0
  66. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
  67. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
  68. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/compression/test_compression.py +0 -0
  69. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/compression/test_radware_sigcompress.py +0 -0
  70. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/compression/test_str2wfcodec.py +0 -0
  71. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/conftest.py +0 -0
  72. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/lh5/conftest.py +0 -0
  73. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/lh5/test_core.py +0 -0
  74. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/lh5/test_lh5_datatype.py +0 -0
  75. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/lh5/test_lh5_iterator.py +0 -0
  76. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/lh5/test_lh5_store.py +0 -0
  77. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/lh5/test_lh5_tools.py +0 -0
  78. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/lh5/test_lh5_utils.py +0 -0
  79. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/lh5/test_lh5_write.py +0 -0
  80. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/test_cli.py +0 -0
  81. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/test_lgdo_utils.py +0 -0
  82. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_array.py +0 -0
  83. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_arrayofequalsizedarrays.py +0 -0
  84. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_encoded.py +0 -0
  85. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_fixedsizearray.py +0 -0
  86. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_representations.py +0 -0
  87. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_scalar.py +0 -0
  88. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_table_eval.py +0 -0
  89. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_vovutils.py +0 -0
  90. {legend_pydataobj-1.6.2 → legend_pydataobj-1.7.1}/tests/types/test_waveformtable.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.6.2
3
+ Version: 1.7.1
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -707,7 +707,7 @@ Requires-Dist: numexpr
707
707
  Requires-Dist: numpy>=1.21
708
708
  Requires-Dist: pandas>=1.4.4
709
709
  Requires-Dist: parse
710
- Requires-Dist: pint
710
+ Requires-Dist: pint!=0.24
711
711
  Requires-Dist: pint-pandas
712
712
  Provides-Extra: all
713
713
  Requires-Dist: legend-pydataobj[docs,test]; extra == "all"
@@ -41,7 +41,7 @@ dependencies = [
41
41
  "numpy>=1.21",
42
42
  "pandas>=1.4.4",
43
43
  "parse",
44
- "pint",
44
+ "pint!=0.24",
45
45
  "pint-pandas",
46
46
  ]
47
47
  dynamic = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.6.2
3
+ Version: 1.7.1
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -707,7 +707,7 @@ Requires-Dist: numexpr
707
707
  Requires-Dist: numpy>=1.21
708
708
  Requires-Dist: pandas>=1.4.4
709
709
  Requires-Dist: parse
710
- Requires-Dist: pint
710
+ Requires-Dist: pint!=0.24
711
711
  Requires-Dist: pint-pandas
712
712
  Provides-Extra: all
713
713
  Requires-Dist: legend-pydataobj[docs,test]; extra == "all"
@@ -8,7 +8,7 @@ numexpr
8
8
  numpy>=1.21
9
9
  pandas>=1.4.4
10
10
  parse
11
- pint
11
+ pint!=0.24
12
12
  pint-pandas
13
13
 
14
14
  [all]
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.6.2'
16
- __version_tuple__ = version_tuple = (1, 6, 2)
15
+ __version__ = version = '1.7.1'
16
+ __version_tuple__ = version_tuple = (1, 7, 1)
@@ -52,6 +52,11 @@ def lh5ls(args=None):
52
52
  default=None,
53
53
  help="""Maximum tree depth of groups to print""",
54
54
  )
55
+ parser.add_argument(
56
+ "--detail",
57
+ action="store_true",
58
+ help="""Print details about datasets""",
59
+ )
55
60
 
56
61
  args = parser.parse_args(args)
57
62
 
@@ -66,7 +71,13 @@ def lh5ls(args=None):
66
71
  print(__version__) # noqa: T201
67
72
  sys.exit()
68
73
 
69
- lh5.show(args.lh5_file, args.lh5_group, attrs=args.attributes, depth=args.depth)
74
+ lh5.show(
75
+ args.lh5_file,
76
+ args.lh5_group,
77
+ attrs=args.attributes,
78
+ depth=args.depth,
79
+ detail=args.detail,
80
+ )
70
81
 
71
82
 
72
83
  def lh5concat(args=None):
@@ -56,7 +56,7 @@ def _h5_read_lgdo(
56
56
  lh5_file = list(h5f)
57
57
  n_rows_read = 0
58
58
 
59
- for i, h5f in enumerate(lh5_file):
59
+ for i, _h5f in enumerate(lh5_file):
60
60
  if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
61
61
  # a list of lists: must be one per file
62
62
  idx_i = idx[i]
@@ -65,7 +65,7 @@ def _h5_read_lgdo(
65
65
  if not (isinstance(idx, tuple) and len(idx) == 1):
66
66
  idx = (idx,)
67
67
  # idx is a long continuous array
68
- n_rows_i = read_n_rows(name, h5f)
68
+ n_rows_i = read_n_rows(name, _h5f)
69
69
  # find the length of the subset of idx that contains indices
70
70
  # that are less than n_rows_i
71
71
  n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
@@ -78,7 +78,7 @@ def _h5_read_lgdo(
78
78
 
79
79
  obj_buf, n_rows_read_i = _h5_read_lgdo(
80
80
  name,
81
- h5f,
81
+ _h5f,
82
82
  start_row=start_row,
83
83
  n_rows=n_rows_i,
84
84
  idx=idx_i,
@@ -10,7 +10,7 @@ from ...exceptions import LH5EncodeError
10
10
 
11
11
  log = logging.getLogger(__name__)
12
12
 
13
- DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "lzf"}
13
+ DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
14
14
 
15
15
 
16
16
  def _h5_write_array(
@@ -2,6 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
 
5
+ import numpy as np
6
+
5
7
  from .... import types
6
8
  from ... import utils
7
9
  from ...exceptions import LH5EncodeError
@@ -31,12 +33,15 @@ def _h5_write_vector_of_vectors(
31
33
 
32
34
  # if appending we need to add an appropriate offset to the
33
35
  # cumulative lengths as appropriate for the in-file object
34
- offset = 0 # declare here because we have to subtract it off at the end
36
+ # declare here because we have to subtract it off at the end
37
+ offset = np.int64(0)
35
38
  if (wo_mode in ("a", "o")) and "cumulative_length" in group:
36
39
  len_cl = len(group["cumulative_length"])
40
+ # if append, ignore write_start and set it to total number of vectors
37
41
  if wo_mode == "a":
38
42
  write_start = len_cl
39
43
  if len_cl > 0:
44
+ # set offset to correct number of elements in flattened_data until write_start
40
45
  offset = group["cumulative_length"][write_start - 1]
41
46
 
42
47
  # First write flattened_data array. Only write rows with data.
@@ -71,15 +76,23 @@ def _h5_write_vector_of_vectors(
71
76
  )
72
77
 
73
78
  # now offset is used to give appropriate in-file values for
74
- # cumulative_length. Need to adjust it for start_row
79
+ # cumulative_length. Need to adjust it for start_row, if different from zero
75
80
  if start_row > 0:
76
81
  offset -= obj.cumulative_length.nda[start_row - 1]
77
82
 
78
83
  # Add offset to obj.cumulative_length itself to avoid memory allocation.
79
84
  # Then subtract it off after writing! (otherwise it will be changed
80
85
  # upon return)
81
- cl_dtype = obj.cumulative_length.nda.dtype.type
82
- obj.cumulative_length.nda += cl_dtype(offset)
86
+
87
+ # NOTE: this operation is not numerically safe (uint overflow in the lower
88
+ # part of the array), but this is not a problem because those values are
89
+ # not written to disk and we are going to restore the offset at the end
90
+ np.add(
91
+ obj.cumulative_length.nda,
92
+ offset,
93
+ out=obj.cumulative_length.nda,
94
+ casting="unsafe",
95
+ )
83
96
 
84
97
  _h5_write_array(
85
98
  obj.cumulative_length,
@@ -92,4 +105,10 @@ def _h5_write_vector_of_vectors(
92
105
  write_start=write_start,
93
106
  **h5py_kwargs,
94
107
  )
95
- obj.cumulative_length.nda -= cl_dtype(offset)
108
+
109
+ np.subtract(
110
+ obj.cumulative_length.nda,
111
+ offset,
112
+ out=obj.cumulative_length.nda,
113
+ casting="unsafe",
114
+ )
@@ -87,6 +87,7 @@ def show(
87
87
  indent: str = "",
88
88
  header: bool = True,
89
89
  depth: int | None = None,
90
+ detail: bool = False,
90
91
  ) -> None:
91
92
  """Print a tree of LH5 file contents with LGDO datatype.
92
93
 
@@ -104,6 +105,8 @@ def show(
104
105
  print `lh5_group` at the top of the diagram.
105
106
  depth
106
107
  maximum tree depth of groups to print
108
+ detail
109
+ whether to print additional information about how the data is stored
107
110
 
108
111
  Examples
109
112
  --------
@@ -171,6 +174,39 @@ def show(
171
174
 
172
175
  print(f"{indent}{char} \033[1m{key}\033[0m · {dtype} {_attrs}") # noqa: T201
173
176
 
177
+ if detail and isinstance(val, h5py.Dataset):
178
+ char = "| "
179
+ if killme:
180
+ char = " "
181
+ toprint = f"{indent}{char}"
182
+ try:
183
+ toprint += f"\033[3mdtype\033[0m={val.dtype}"
184
+ toprint += f", \033[3mshape\033[0m={val.shape}"
185
+ toprint += f", \033[3mnbytes\033[0m={utils.fmtbytes(val.nbytes)}"
186
+ if (chunkshape := val.chunks) is None:
187
+ toprint += ", \033[3mnumchunks\033[0m=contiguous"
188
+ else:
189
+ toprint += f", \033[3mnumchunks\033[0m={val.id.get_num_chunks()}"
190
+ toprint += f", \033[3mchunkshape\033[0m={chunkshape}"
191
+ toprint += ", \033[3mfilters\033[0m="
192
+
193
+ numfilters = val.id.get_create_plist().get_nfilters()
194
+ if numfilters == 0:
195
+ toprint += "None"
196
+ else:
197
+ toprint += "("
198
+ for i in range(numfilters):
199
+ thisfilter = val.id.get_create_plist().get_filter(i)[3].decode()
200
+ if "lz4" in thisfilter:
201
+ thisfilter = "lz4"
202
+ toprint += f"{thisfilter},"
203
+ toprint += ")"
204
+
205
+ except TypeError:
206
+ toprint += "(scalar)"
207
+
208
+ print(toprint) # noqa: T201
209
+
174
210
  # if it's a group, call this function recursively
175
211
  if isinstance(val, h5py.Group):
176
212
  show(
@@ -179,6 +215,7 @@ def show(
179
215
  header=False,
180
216
  attrs=attrs,
181
217
  depth=depth - 1 if depth else None,
218
+ detail=detail,
182
219
  )
183
220
 
184
221
  # break or move to next key
@@ -244,7 +281,7 @@ def load_nda(
244
281
  f = sto.gimme_file(ff, "r")
245
282
  for par in par_list:
246
283
  if f"{lh5_group}/{par}" not in f:
247
- msg = f"'{lh5_group}/{par}' not in file {f_list[ii]}"
284
+ msg = f"'{lh5_group}/{par}' not in file {ff}"
248
285
  raise RuntimeError(msg)
249
286
 
250
287
  if idx_list is None:
@@ -221,3 +221,13 @@ def expand_path(
221
221
  return paths[0]
222
222
 
223
223
  return paths
224
+
225
+
226
+ # https://stackoverflow.com/a/1094933
227
+ def fmtbytes(num, suffix="B"):
228
+ """Returns formatted f-string for printing human-readable number of bytes."""
229
+ for unit in ("", "k", "M", "G", "T", "P", "E", "Z"):
230
+ if abs(num) < 1024.0:
231
+ return f"{num:3.1f} {unit}{suffix}"
232
+ num /= 1024.0
233
+ return f"{num:.1f} Y{suffix}"
@@ -50,4 +50,4 @@ class FixedSizeArray(Array):
50
50
  --------
51
51
  .LGDO.view_as
52
52
  """
53
- return super.view_as(library, with_units=with_units)
53
+ return super().view_as(library, with_units=with_units)
@@ -6,6 +6,7 @@ utilities.
6
6
  from __future__ import annotations
7
7
 
8
8
  import logging
9
+ from collections.abc import Mapping
9
10
  from typing import Any
10
11
 
11
12
  import numpy as np
@@ -24,8 +25,8 @@ class Struct(LGDO, dict):
24
25
 
25
26
  def __init__(
26
27
  self,
27
- obj_dict: dict[str, LGDO] | None = None,
28
- attrs: dict[str, Any] | None = None,
28
+ obj_dict: Mapping[str, LGDO] | None = None,
29
+ attrs: Mapping[str, Any] | None = None,
29
30
  ) -> None:
30
31
  """
31
32
  Parameters
@@ -37,8 +38,25 @@ class Struct(LGDO, dict):
37
38
  a set of user attributes to be carried along with this LGDO.
38
39
  """
39
40
  if obj_dict is not None:
40
- self.update(obj_dict)
41
-
41
+ for k, v in obj_dict.items():
42
+ # check if value is another mapping-like object
43
+ # initialize another struct (or derived class) in such a case
44
+ if not isinstance(v, LGDO) and isinstance(v, Mapping):
45
+ # NOTE: calling self.__new__() and then self.__init__() allows for polymorphism
46
+ # but is there a better way?
47
+ nested = self.__new__(type(self), v)
48
+ nested.__init__(v)
49
+ super().update({k: nested})
50
+ else:
51
+ # otherwise object must be an LGDO
52
+ if not isinstance(v, LGDO):
53
+ msg = f"value of '{k}' ({v!r}) is not an LGDO or a dictionary"
54
+ raise ValueError(msg)
55
+
56
+ # assign
57
+ super().update({k: v})
58
+
59
+ # call LGDO constructor to setup attributes
42
60
  super().__init__(attrs)
43
61
 
44
62
  def datatype_name(self) -> str:
@@ -42,9 +42,9 @@ class Table(Struct):
42
42
 
43
43
  def __init__(
44
44
  self,
45
+ col_dict: Mapping[str, LGDO] | pd.DataFrame | ak.Array | None = None,
45
46
  size: int | None = None,
46
- col_dict: dict[str, LGDO] | None = None,
47
- attrs: dict[str, Any] | None = None,
47
+ attrs: Mapping[str, Any] | None = None,
48
48
  ) -> None:
49
49
  r"""
50
50
  Parameters
@@ -56,11 +56,14 @@ class Table(Struct):
56
56
  determined from the length of the first array in `col_dict`. If
57
57
  neither is provided, a default length of 1024 is used.
58
58
  col_dict
59
- instantiate this table using the supplied named array-like LGDO's.
60
- Note 1: no copy is performed, the objects are used directly.
61
- Note 2: if `size` is not ``None``, all arrays will be resized to
62
- match it. Note 3: if the arrays have different lengths, all will
63
- be resized to match the length of the first array.
59
+ instantiate this table using the supplied mapping of column names
60
+ and array-like objects. Supported input types are: mapping of
61
+ strings to LGDOs, :class:`pd.DataFrame` and :class:`ak.Array`.
62
+ Note 1: no copy is performed, the objects are used directly (unless
63
+ :class:`ak.Array` is provided). Note 2: if `size` is not ``None``,
64
+ all arrays will be resized to match it. Note 3: if the arrays have
65
+ different lengths, all will be resized to match the length of the
66
+ first array.
64
67
  attrs
65
68
  A set of user attributes to be carried along with this LGDO.
66
69
 
@@ -68,14 +71,20 @@ class Table(Struct):
68
71
  -----
69
72
  the :attr:`loc` attribute is initialized to 0.
70
73
  """
74
+ if isinstance(col_dict, pd.DataFrame):
75
+ col_dict = {k: Array(v) for k, v in col_dict.items()}
76
+
77
+ if isinstance(col_dict, ak.Array):
78
+ col_dict = _ak_to_lgdo_or_col_dict(col_dict)
79
+
80
+ # call Struct constructor
71
81
  super().__init__(obj_dict=col_dict, attrs=attrs)
72
82
 
73
83
  # if col_dict is not empty, set size according to it
74
84
  # if size is also supplied, resize all fields to match it
75
85
  # otherwise, warn if the supplied fields have varying size
76
86
  if col_dict is not None and len(col_dict) > 0:
77
- do_warn = size is None
78
- self.resize(new_size=size, do_warn=do_warn)
87
+ self.resize(new_size=size, do_warn=(size is None))
79
88
 
80
89
  # if no col_dict, just set the size (default to 1024)
81
90
  else:
@@ -479,3 +488,11 @@ class Table(Struct):
479
488
 
480
489
  msg = f"{library!r} is not a supported third-party format."
481
490
  raise TypeError(msg)
491
+
492
+
493
+ def _ak_to_lgdo_or_col_dict(array: ak.Array):
494
+ if isinstance(array.type.content, ak.types.RecordType):
495
+ return {field: _ak_to_lgdo_or_col_dict(array[field]) for field in array.fields}
496
+ if isinstance(array.type.content, ak.types.NumpyType):
497
+ return Array(ak.to_numpy(array))
498
+ return VectorOfVectors(array)
@@ -302,26 +302,23 @@ class VectorOfVectors(LGDO):
302
302
  [3],
303
303
  ]
304
304
  """
305
- if self.ndim == 2:
306
- vidx = self.cumulative_length
307
- old_s = len(self)
308
- dlen = new_size - old_s
309
- csum = vidx[-1] if len(self) > 0 else 0
310
-
311
- # first resize the cumulative length
312
- self.cumulative_length.resize(new_size)
313
-
314
- # if new_size > size, new elements are filled with zeros, let's fix
315
- # that
316
- if dlen > 0:
317
- self.cumulative_length[old_s:] = csum
318
-
319
- # then resize the data array
320
- # if dlen > 0 this has no effect
321
- if len(self.cumulative_length) > 0:
322
- self.flattened_data.resize(self.cumulative_length[-1])
323
- else:
324
- raise NotImplementedError
305
+ vidx = self.cumulative_length
306
+ old_s = len(self)
307
+ dlen = new_size - old_s
308
+ csum = vidx[-1] if len(self) > 0 else 0
309
+
310
+ # first resize the cumulative length
311
+ self.cumulative_length.resize(new_size)
312
+
313
+ # if new_size > size, new elements are filled with zeros, let's fix
314
+ # that
315
+ if dlen > 0:
316
+ self.cumulative_length[old_s:] = csum
317
+
318
+ # then resize the data array
319
+ # if dlen > 0 this has no effect
320
+ if len(self.cumulative_length) > 0:
321
+ self.flattened_data.resize(self.cumulative_length[-1])
325
322
 
326
323
  def append(self, new: NDArray) -> None:
327
324
  """Append a 1D vector `new` at the end.
@@ -483,7 +480,7 @@ class VectorOfVectors(LGDO):
483
480
  lens = np.array([lens], dtype="u4")
484
481
 
485
482
  # calculate stop index in flattened_data
486
- cum_lens = start + lens.cumsum()
483
+ cum_lens = np.add(start, lens.cumsum(), dtype=int)
487
484
 
488
485
  # fill with fast vectorized routine
489
486
  vovutils._nb_fill(vec, lens, self.flattened_data.nda[start : cum_lens[-1]])
@@ -3,4 +3,4 @@ from __future__ import annotations
3
3
  import pint
4
4
 
5
5
  default_units_registry = pint.get_application_registry()
6
- default_units_registry.default_format = "~P"
6
+ default_units_registry.formatter.default_format = "~P"
@@ -68,7 +68,7 @@ def test_uleb128zzdiff_encode_decode_equality():
68
68
  pos = varlen.uleb128_encode(varlen.zigzag_encode(int(s) - last), encx)
69
69
  assert np.array_equal(sig_out[offset : offset + pos], encx[:pos])
70
70
  offset += pos
71
- last = s
71
+ last = int(s)
72
72
 
73
73
  sig_in_dec = np.empty(100, dtype="uint32")
74
74
  siglen = np.empty(1, dtype="uint32")
@@ -1,5 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import pytest
4
+
3
5
  import lgdo
4
6
 
5
7
 
@@ -26,6 +28,25 @@ def test_init():
26
28
  assert dict(struct) == obj_dict
27
29
  assert struct.attrs == attrs | {"datatype": "struct{scalar1}"}
28
30
 
31
+ with pytest.raises(ValueError):
32
+ lgdo.Struct(obj_dict={"scalar1": 1})
33
+
34
+ with pytest.raises(ValueError):
35
+ lgdo.Struct(obj_dict={"scalar1": lgdo.Scalar(value=10), "thing": int})
36
+
37
+
38
+ def test_init_nested():
39
+ obj_dict = {
40
+ "scalar1": lgdo.Scalar(10),
41
+ "struct1": {"field1": lgdo.Scalar(11), "field2": lgdo.Array([1, 2, 3, 4])},
42
+ }
43
+ struct = lgdo.Struct(obj_dict)
44
+ assert isinstance(struct.struct1, lgdo.Struct)
45
+ assert isinstance(struct.struct1.field1, lgdo.Scalar)
46
+ assert struct.struct1.field1.value == 11
47
+ assert isinstance(struct.struct1.field2, lgdo.Array)
48
+ assert struct.struct1.field2 == lgdo.Array([1, 2, 3, 4])
49
+
29
50
 
30
51
  def test_add_field():
31
52
  struct = lgdo.Struct()
@@ -31,6 +31,48 @@ def test_init():
31
31
  assert tbl.size == 3
32
32
 
33
33
 
34
+ def test_init_nested():
35
+ col_dict = {
36
+ "a": lgdo.Array(nda=np.array([1, 2, 3, 4])),
37
+ "b": lgdo.Array(nda=np.array([5, 6, 7, 8])),
38
+ "c": {
39
+ "f1": lgdo.Array([1, 2, 3, 4]),
40
+ "f2": lgdo.Array([1, 2, 3, 4]),
41
+ },
42
+ }
43
+
44
+ tbl = Table(col_dict=col_dict)
45
+ assert isinstance(tbl.c, Table)
46
+ assert isinstance(tbl.c.f1, lgdo.Array)
47
+ assert tbl.c.f1 == lgdo.Array([1, 2, 3, 4])
48
+
49
+
50
+ def test_pandas_df_init():
51
+ df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
52
+ tbl = Table(col_dict=df)
53
+ assert sorted(tbl.keys()) == ["a", "b"]
54
+ assert isinstance(tbl.a, lgdo.Array)
55
+ assert isinstance(tbl.b, lgdo.Array)
56
+ assert tbl.a == lgdo.Array([1, 2, 3, 4])
57
+ assert tbl.b == lgdo.Array([5, 6, 7, 8])
58
+
59
+
60
+ def test_ak_array_init():
61
+ array = ak.Array(
62
+ {
63
+ "a": [1, 2, 3, 4],
64
+ "b": [[1, 2], [3], [4], [5, 6, 7]],
65
+ "c": {"f1": [[], [5], [3, 7, 6], []], "f2": [5, 6, 7, 8]},
66
+ }
67
+ )
68
+ tbl = Table(array)
69
+ assert isinstance(tbl.a, lgdo.Array)
70
+ assert isinstance(tbl.b, lgdo.VectorOfVectors)
71
+ assert isinstance(tbl.c, Table)
72
+ assert isinstance(tbl.c.f1, lgdo.VectorOfVectors)
73
+ assert isinstance(tbl.c.f2, lgdo.Array)
74
+
75
+
34
76
  def test_datatype_name():
35
77
  tbl = Table()
36
78
  assert tbl.datatype_name() == "table"
@@ -95,7 +137,7 @@ def test_join():
95
137
 
96
138
 
97
139
  def test_view_as():
98
- tbl = Table(3)
140
+ tbl = Table(size=3)
99
141
  tbl.add_column("a", lgdo.Array(np.array([1, 2, 3]), attrs={"units": "m"}))
100
142
  tbl.add_column("b", lgdo.Array(np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])))
101
143
  tbl.add_column(
@@ -174,20 +174,37 @@ def test_getitem(testvov):
174
174
 
175
175
 
176
176
  def test_resize(testvov):
177
- testvov = testvov.v2d
178
-
179
- testvov.resize(3)
180
- assert ak.is_valid(testvov.view_as("ak"))
181
- assert len(testvov.cumulative_length) == 3
182
- assert len(testvov.flattened_data) == testvov.cumulative_length[-1]
183
- assert testvov == VectorOfVectors([[1, 2], [3, 4, 5], [2]])
177
+ vov = testvov.v2d
178
+
179
+ vov.resize(3)
180
+ assert ak.is_valid(vov.view_as("ak"))
181
+ assert len(vov.cumulative_length) == 3
182
+ assert len(vov.flattened_data) == vov.cumulative_length[-1]
183
+ assert vov == VectorOfVectors([[1, 2], [3, 4, 5], [2]])
184
+
185
+ vov.resize(5)
186
+ assert ak.is_valid(vov.view_as("ak"))
187
+ assert len(vov) == 5
188
+ assert len(vov[3]) == 0
189
+ assert len(vov[4]) == 0
190
+ assert vov == VectorOfVectors([[1, 2], [3, 4, 5], [2], [], []])
191
+
192
+ vov = testvov.v3d
193
+
194
+ vov.resize(3)
195
+ assert ak.is_valid(vov.view_as("ak"))
196
+ assert len(vov.cumulative_length) == 3
197
+ assert len(vov.flattened_data) == vov.cumulative_length[-1]
198
+ assert vov == VectorOfVectors(
199
+ [[[1, 2], [3, 4, 5]], [[2], [4, 8, 9, 7]], [[5, 3, 1]]]
200
+ )
184
201
 
185
- testvov.resize(5)
186
- assert ak.is_valid(testvov.view_as("ak"))
187
- assert len(testvov) == 5
188
- assert len(testvov[3]) == 0
189
- assert len(testvov[4]) == 0
190
- assert testvov == VectorOfVectors([[1, 2], [3, 4, 5], [2], [], []])
202
+ vov.resize(5)
203
+ assert ak.is_valid(vov.view_as("ak"))
204
+ assert len(vov) == 5
205
+ assert vov == VectorOfVectors(
206
+ [[[1, 2], [3, 4, 5]], [[2], [4, 8, 9, 7]], [[5, 3, 1]], [], []]
207
+ )
191
208
 
192
209
  v = VectorOfVectors(dtype="i")
193
210
  v.resize(3)