PyPI - legend-pydataobj - Versions diffs - 1.12.0a2__tar.gz → 1.12.0a4__tar.gz - Mend

legend-pydataobj 1.12.0a2tar.gz → 1.12.0a4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: legend_pydataobj
-Version: 1.12.0a2
+Version: 1.12.0a4
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/pyproject.toml RENAMED Viewed

@@ -122,7 +122,7 @@ extend-select = [
   "PIE",      # flake8-pie
   "PL",       # pylint
   "PT",       # flake8-pytest-style
-  #"PTH",      # flake8-use-pathlib
+  "PTH",      # flake8-use-pathlib
   "RET",      # flake8-return
   "RUF",      # Ruff-specific
   "SIM",      # flake8-simplify
@@ -167,7 +167,7 @@ minversion = "6.0"
 addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
 xfail_strict = true
 filterwarnings = ["error", 'ignore:\nPyarrow:DeprecationWarning']
-log_cli_level = "info"
+log_cli_level = "INFO"
 testpaths = "tests"
 [tool.codespell]

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/src/legend_pydataobj.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: legend_pydataobj
-Version: 1.12.0a2
+Version: 1.12.0a4
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/src/lgdo/_version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.12.0a2'
-__version_tuple__ = version_tuple = (1, 12, 0)
+__version__ = version = '1.12.0a4'
+__version_tuple__ = version_tuple = (1, 12, 0, 'a4')

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/src/lgdo/lh5/_serializers/write/composite.py RENAMED Viewed

@@ -1,8 +1,8 @@
 from __future__ import annotations
 import logging
-import os
 from inspect import signature
+from pathlib import Path
 import h5py
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
     # change any object in the file. So we use file:append for
     # write_object:overwrite.
     if not isinstance(lh5_file, h5py.File):
-        mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
+        mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
         lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
     log.debug(

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/src/lgdo/lh5/core.py RENAMED Viewed

@@ -113,7 +113,11 @@ def read(
         lh5_obj = lh5_file[name]
     elif isinstance(lh5_file, str):
         lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
-        lh5_obj = lh5_file[name]
+        try:
+            lh5_obj = lh5_file[name]
+        except KeyError as ke:
+            err = f"Object {name} not found in file {lh5_file.filename}"
+            raise KeyError(err) from ke
     else:
         if obj_buf is not None:
             obj_buf.resize(obj_buf_start)

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/src/lgdo/lh5/store.py RENAMED Viewed

@@ -6,11 +6,11 @@ HDF5 files.
 from __future__ import annotations
 import logging
-import os
 import sys
 from collections import OrderedDict
 from collections.abc import Mapping, Sequence
 from inspect import signature
+from pathlib import Path
 from typing import Any
 import h5py
@@ -92,16 +92,16 @@ class LH5Store:
             return self.files[lh5_file]
         if self.base_path != "":
-            full_path = os.path.join(self.base_path, lh5_file)
+            full_path = Path(self.base_path) / lh5_file
         else:
-            full_path = lh5_file
+            full_path = Path(lh5_file)
-        file_exists = os.path.exists(full_path)
+        file_exists = full_path.exists()
         if mode != "r":
-            directory = os.path.dirname(full_path)
-            if directory != "" and not os.path.exists(directory):
+            directory = full_path.parent
+            if directory != "" and not full_path.parent.exists():
                 log.debug(f"making path {directory}")
-                os.makedirs(directory)
+                directory.mkdir(parents=True, exist_ok=True)
         if mode == "r" and not file_exists:
             msg = f"file {full_path} not found"

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/src/lgdo/lh5/utils.py RENAMED Viewed

@@ -7,6 +7,7 @@ import logging
 import os
 import string
 from collections.abc import Mapping, Sequence
+from pathlib import Path
 from typing import Any
 import h5py
@@ -153,7 +154,7 @@ def expand_vars(expr: str, substitute: dict[str, str] | None = None) -> str:
     # use provided mapping
     # then expand env variables
-    return os.path.expandvars(string.Template(expr).safe_substitute(substitute))
+    return os.path.expandvars(string.Template(str(expr)).safe_substitute(substitute))
 def expand_path(
@@ -183,14 +184,15 @@ def expand_path(
         Unique absolute path, or list of all absolute paths
     """
     if base_path is not None and base_path != "":
-        base_path = os.path.expanduser(os.path.expandvars(base_path))
-        path = os.path.join(base_path, path)
+        base_path = Path(os.path.expandvars(base_path)).expanduser()
+        path = base_path / path
     # first expand variables
     _path = expand_vars(path, substitute)
     # then expand wildcards
-    paths = sorted(glob.glob(os.path.expanduser(_path)))
+    # pathlib glob works differently so use glob for now
+    paths = sorted(glob.glob(str(Path(_path).expanduser())))  # noqa: PTH207
     if base_path is not None and base_path != "":
         paths = [os.path.relpath(p, base_path) for p in paths]

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/src/lgdo/types/vectorofvectors.py RENAMED Viewed

@@ -130,20 +130,48 @@ class VectorOfVectors(LGDOCollection):
             # ak.to_buffer helps in de-serialization
             # NOTE: ak.to_packed() needed?
-            form, length, container = ak.to_buffers(ak.to_packed(data))
-            # NOTE: node#-data is not even in the dict if the awkward array is empty
-            # NOTE: if the data arg was a numpy array, to_buffers() preserves
-            # the original dtype
-            # FIXME: have to copy the buffers, otherwise self will not own the
-            # data and self.resize() will fail. Is it possible to avoid this?
-            flattened_data = np.copy(
-                container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
-            )
+            form, _, container = ak.to_buffers(ak.to_packed(data))
+            # check if bytestring
+            curr = form
+            for _ in range(data.ndim - 1):
+                curr = curr.content
+            if (
+                "__array__" in curr.parameters
+                and curr.parameters["__array__"] == "bytestring"
+            ):
+                diffs = np.diff(container[f"node{data.ndim - 1}-offsets"])
+                if (diffs != diffs[0]).all():
+                    err_msg = "Non uniform string lengths not supported"
+                    raise NotImplementedError(err_msg)
+                flattened_data = np.asarray(
+                    ak.enforce_type(
+                        ak.unflatten(
+                            container.pop(
+                                f"node{data.ndim}-data", np.empty(0, dtype=dtype)
+                            ),
+                            diffs[0],
+                        ),
+                        "bytes",
+                    )
+                )
-            # if user-provided dtype is different than dtype from Awkward, cast
-            # NOTE: makes a copy only if needed
-            flattened_data = np.asarray(flattened_data, dtype=dtype)
+                # if user-provided dtype is different than dtype from Awkward, cast
+                # NOTE: makes a copy only if needed
+                flattened_data = np.asarray(flattened_data, dtype=dtype)
+            else:
+                # NOTE: node#-data is not even in the dict if the awkward array is empty
+                # NOTE: if the data arg was a numpy array, to_buffers() preserves
+                # the original dtype
+                # FIXME: have to copy the buffers, otherwise self will not own the
+                # data and self.resize() will fail. Is it possible to avoid this?
+                flattened_data = np.copy(
+                    container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
+                )
+                # if user-provided dtype is different than dtype from Awkward, cast
+                # NOTE: makes a copy only if needed
+                flattened_data = np.asarray(flattened_data, dtype=dtype)
             # start from innermost VoV and build nested structure
             for i in range(data.ndim - 2, -1, -1):
@@ -630,11 +658,25 @@ class VectorOfVectors(LGDOCollection):
             offsets[1:] = self.cumulative_length.nda
             offsets[0] = 0
-            content = (
-                ak.contents.NumpyArray(self.flattened_data.nda)
-                if self.ndim == 2
-                else self.flattened_data.view_as(library, with_units=with_units).layout
-            )
+            if self.ndim != 2:
+                content = self.flattened_data.view_as(
+                    library, with_units=with_units
+                ).layout
+            # need to handle strings separately
+            elif np.issubdtype(self.flattened_data.nda.dtype, np.bytes_):
+                byte_arrays = []
+                for s in self.flattened_data.nda:
+                    # Convert each string to array of bytes
+                    byte_array = np.frombuffer(s, dtype=np.uint8)
+                    byte_arrays.append(byte_array)
+                max_len = max(len(b) for b in byte_arrays)
+                raw_arrays = ak.contents.NumpyArray(np.concatenate(byte_arrays))
+                array_of_chars = ak.contents.RegularArray(
+                    raw_arrays, max_len, parameters={"__array__": "bytes"}
+                )
+                content = ak.enforce_type(array_of_chars, "bytes", highlevel=False)
+            else:
+                content = ak.contents.NumpyArray(self.flattened_data.nda)
             layout = ak.contents.ListOffsetArray(
                 offsets=ak.index.Index(offsets),

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/tests/compression/test_radware_sigcompress.py RENAMED Viewed

@@ -22,7 +22,7 @@ def read_sigcompress_c_output(filename: str):
     enc_wf_c = np.empty(0, dtype=np.uint16)
     nsig_c = None
     shift = None
-    with open(filename) as f:
+    with Path(filename).open() as f:
         nsig_c = int(f.readline())  # first number in the file
         shift = int(f.readline())  # second number in the file
         for line in f.readlines():  # then the waveform
@@ -35,7 +35,7 @@ def read_sigcompress_c_output_multi(filename: str):
     enc_wf_c = []
     nsig_c = np.empty(0, dtype="uint32")
     shift = np.empty(0, dtype="int32")
-    with open(filename) as f:
+    with Path(filename).open() as f:
         for line in f:
             parts = line.split()
             nsig_c = np.append(nsig_c, np.uint32(parts[0]))

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/tests/conftest.py RENAMED Viewed

@@ -1,20 +1,20 @@
 from __future__ import annotations
-import os
 import shutil
 import uuid
 from getpass import getuser
+from pathlib import Path
 from tempfile import gettempdir
 import pytest
 from legendtestdata import LegendTestData
-_tmptestdir = os.path.join(gettempdir(), f"lgdo-tests-{getuser()}-{uuid.uuid4()!s}")
+_tmptestdir = Path(gettempdir()) / f"lgdo-tests-{getuser()}-{uuid.uuid4()!s}"
 @pytest.fixture(scope="session")
 def tmptestdir():
-    os.mkdir(_tmptestdir)
+    Path(_tmptestdir).mkdir(parents=True, exist_ok=True)
     return _tmptestdir

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/tests/lh5/test_lh5_utils.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import os
+from pathlib import Path
 import pytest
@@ -36,7 +37,7 @@ def test_expand_path(lgnd_test_data):
             "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012228Z-tier_dsp.lh5"
         ),
     ]
-    base_dir = os.path.dirname(files[0])
+    base_dir = Path(files[0]).parent
     assert utils.expand_path(f"{base_dir}/*20230318T012144Z*") == files[0]
@@ -50,3 +51,10 @@ def test_expand_path(lgnd_test_data):
     # Check if it finds a list of files correctly
     assert sorted(utils.expand_path(f"{base_dir}/*.lh5", list=True)) == sorted(files)
+    # check with base_path specified
+    base_path = base_dir.parent
+    assert (
+        utils.expand_path(f"{base_dir.name}/*20230318T012144Z*", base_path=base_path)
+        == Path(files[0]).relative_to(base_path).as_posix()
+    )

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/tests/lh5/test_lh5_write.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 import logging
-import os
+from pathlib import Path
 import awkward as ak
 import h5py
@@ -114,8 +114,8 @@ def test_write_object_overwrite_table_no_deletion(caplog, tmptestdir):
     caplog.set_level(logging.DEBUG)
     caplog.clear()
-    if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"):
-        os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5")
+    if Path(f"{tmptestdir}/write_object_overwrite_test.lh5").exists():
+        Path(f"{tmptestdir}/write_object_overwrite_test.lh5").unlink()
     tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
     tb2 = types.Table(
@@ -145,8 +145,8 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir):
     caplog.set_level(logging.DEBUG)
     caplog.clear()
-    if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"):
-        os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5")
+    if Path(f"{tmptestdir}/write_object_overwrite_test.lh5").exists():
+        Path(f"{tmptestdir}/write_object_overwrite_test.lh5").unlink()
     tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
     tb2 = types.Table(
@@ -170,8 +170,8 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir):
         assert "dset1" not in list(lh5file["my_group"].keys())
     # Make sure the same behavior happens when we nest the table in a group
-    if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"):
-        os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5")
+    if Path(f"{tmptestdir}/write_object_overwrite_test.lh5").exists():
+        Path(f"{tmptestdir}/write_object_overwrite_test.lh5").unlink()
     tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
     tb2 = types.Table(
@@ -209,8 +209,8 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     caplog.clear()
     # Start with an types.WaveformTable
-    if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"):
-        os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5")
+    if Path(f"{tmptestdir}/write_object_overwrite_test.lh5").exists():
+        Path(f"{tmptestdir}/write_object_overwrite_test.lh5").unlink()
     tb1 = types.WaveformTable(
         t0=np.zeros(10),
@@ -316,8 +316,8 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
 # Test that when we try to overwrite an existing column in a table we fail
 def test_write_object_append_column(tmptestdir):
     # Try to append an array to a table
-    if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"):
-        os.remove(f"{tmptestdir}/write_object_append_column_test.lh5")
+    if Path(f"{tmptestdir}/write_object_append_column_test.lh5").exists():
+        Path(f"{tmptestdir}/write_object_append_column_test.lh5").unlink()
     array1 = types.Array(np.zeros(10))
     tb1 = types.Table(col_dict={"dset1`": types.Array(np.ones(10))})
@@ -332,8 +332,8 @@ def test_write_object_append_column(tmptestdir):
         )  # Now, try to append a column to an array
     # Try to append a table that has a same key as the old table
-    if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"):
-        os.remove(f"{tmptestdir}/write_object_append_column_test.lh5")
+    if Path(f"{tmptestdir}/write_object_append_column_test.lh5").exists():
+        Path(f"{tmptestdir}/write_object_append_column_test.lh5").unlink()
     tb1 = types.Table(
         col_dict={
@@ -355,8 +355,8 @@ def test_write_object_append_column(tmptestdir):
         )  # Now, try to append a column with a same field
     # try appending a column that is larger than one that exists
-    if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"):
-        os.remove(f"{tmptestdir}/write_object_append_column_test.lh5")
+    if Path(f"{tmptestdir}/write_object_append_column_test.lh5").exists():
+        Path(f"{tmptestdir}/write_object_append_column_test.lh5").unlink()
     tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
     tb2 = types.Table(
@@ -373,8 +373,8 @@ def test_write_object_append_column(tmptestdir):
         )  # Now, try to append a column with a different field size
     # Finally successfully append a column
-    if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"):
-        os.remove(f"{tmptestdir}/write_object_append_column_test.lh5")
+    if Path(f"{tmptestdir}/write_object_append_column_test.lh5").exists():
+        Path(f"{tmptestdir}/write_object_append_column_test.lh5").unlink()
     tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
     tb2 = types.Table(
@@ -410,8 +410,8 @@ def test_write_histogram(caplog, tmptestdir):
     caplog.clear()
     # Start with an types.Histogram
-    if os.path.exists(f"{tmptestdir}/write_histogram_test.lh5"):
-        os.remove(f"{tmptestdir}/write_histogram_test.lh5")
+    if Path(f"{tmptestdir}/write_histogram_test.lh5").exists():
+        Path(f"{tmptestdir}/write_histogram_test.lh5").unlink()
     h1 = types.Histogram(
         np.array([[1, 1], [1, 1]]), (np.array([0, 1, 2]), np.array([2.1, 2.2, 2.3]))
@@ -484,8 +484,8 @@ def test_write_histogram_variable(caplog, tmptestdir):
     caplog.clear()
     # Start with an types.Histogram
-    if os.path.exists(f"{tmptestdir}/write_histogram_test.lh5"):
-        os.remove(f"{tmptestdir}/write_histogram_test.lh5")
+    if Path(f"{tmptestdir}/write_histogram_test.lh5").exists():
+        Path(f"{tmptestdir}/write_histogram_test.lh5").unlink()
     h1 = types.Histogram(
         np.array([[1, 1], [1, 1]]), (np.array([0, 1.2, 2]), np.array([2.1, 2.5, 2.3]))

{legend_pydataobj-1.12.0a2 → legend_pydataobj-1.12.0a4}/tests/types/test_vectorofvectors.py RENAMED Viewed

@@ -508,3 +508,47 @@ def test_pickle(testvov):
     for i in range(len(desired)):
         assert np.array_equal(desired[i], ex[i])
+def test_bytestrings():
+    for string in [b"a", b"p01", b"V00000A"]:
+        # test bytestring
+        v = VectorOfVectors(
+            flattened_data=np.full(5, string, dtype=f"S{len(string)}"),
+            cumulative_length=np.array([2, 5], dtype="uint32"),
+        )
+        assert v.flattened_data.dtype == f"S{len(string)}"
+        assert v.flattened_data.nda[0] == string
+        # test bytestring view_as
+        v = VectorOfVectors(
+            flattened_data=np.full(5, string, dtype=f"S{len(string)}"),
+            cumulative_length=np.array([2, 5], dtype="uint32"),
+        )
+        ak_arr = v.view_as("ak", with_units=False)
+        assert isinstance(ak_arr, ak.Array)
+        assert ak_arr[0][0] == string
+    v = VectorOfVectors(
+        flattened_data=np.full(5, string, dtype="S7"),
+        cumulative_length=np.array([2, 5], dtype="uint32"),
+    )
+    # test bytestring with ak Array
+    ak_arr = v.view_as("ak", with_units=False)
+    v = VectorOfVectors(ak_arr)
+    assert v.flattened_data.dtype == "S7"
+    assert v.flattened_data.nda[0] == b"V00000A"
+    # test nested bytestring VoVoV
+    v = VectorOfVectors(
+        flattened_data=v,
+        cumulative_length=np.array([2], dtype="uint32"),
+    )
+    assert v.flattened_data.flattened_data.dtype == "S7"
+    assert v.flattened_data.flattened_data.nda[0] == b"V00000A"
+    ak_arr = v.view_as("ak", with_units=False)
+    assert isinstance(ak_arr, ak.Array)
+    assert ak_arr[0][0][0] == b"V00000A"