legend-pydataobj 1.11.6__tar.gz → 1.11.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/PKG-INFO +3 -2
  2. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/pyproject.toml +3 -3
  3. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/legend_pydataobj.egg-info/PKG-INFO +3 -2
  4. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/legend_pydataobj.egg-info/SOURCES.txt +2 -1
  5. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/legend_pydataobj.egg-info/entry_points.txt +1 -1
  6. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/__init__.py +5 -4
  7. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/_version.py +9 -4
  8. legend_pydataobj-1.11.8/src/lgdo/cli.py +183 -0
  9. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/__init__.py +2 -3
  10. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/read/composite.py +1 -3
  11. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/read/utils.py +1 -1
  12. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +1 -1
  13. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/write/composite.py +14 -8
  14. legend_pydataobj-1.11.8/src/lgdo/lh5/concat.py +219 -0
  15. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/core.py +33 -36
  16. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/iterator.py +48 -27
  17. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/store.py +22 -75
  18. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/tools.py +0 -111
  19. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/utils.py +6 -4
  20. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/array.py +84 -15
  21. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/encoded.py +25 -20
  22. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/histogram.py +1 -1
  23. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/lgdo.py +50 -0
  24. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/table.py +49 -28
  25. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/vectorofvectors.py +132 -94
  26. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/vovutils.py +14 -4
  27. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/waveformtable.py +19 -21
  28. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/compression/conftest.py +1 -2
  29. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/compression/test_radware_sigcompress.py +3 -3
  30. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/conftest.py +3 -3
  31. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/lh5/conftest.py +1 -1
  32. legend_pydataobj-1.11.6/tests/test_cli.py → legend_pydataobj-1.11.8/tests/lh5/test_concat.py +39 -43
  33. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/lh5/test_core.py +1 -1
  34. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/lh5/test_lh5_iterator.py +48 -15
  35. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/lh5/test_lh5_store.py +85 -100
  36. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/lh5/test_lh5_utils.py +9 -1
  37. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/lh5/test_lh5_write.py +83 -39
  38. legend_pydataobj-1.11.8/tests/test_cli.py +36 -0
  39. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_array.py +29 -1
  40. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_histogram.py +1 -1
  41. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_table.py +59 -16
  42. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_vectorofvectors.py +121 -27
  43. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_vovutils.py +52 -0
  44. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_waveformtable.py +13 -0
  45. legend_pydataobj-1.11.6/src/lgdo/cli.py +0 -328
  46. legend_pydataobj-1.11.6/src/lgdo/lh5_store.py +0 -284
  47. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/LICENSE +0 -0
  48. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/README.md +0 -0
  49. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/setup.cfg +0 -0
  50. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
  51. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
  52. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/legend_pydataobj.egg-info/requires.txt +0 -0
  53. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
  54. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/compression/__init__.py +0 -0
  55. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/compression/base.py +0 -0
  56. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/compression/generic.py +0 -0
  57. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/compression/radware.py +0 -0
  58. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/compression/utils.py +0 -0
  59. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/compression/varlen.py +0 -0
  60. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lgdo_utils.py +0 -0
  61. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/__init__.py +0 -0
  62. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
  63. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/read/array.py +0 -0
  64. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/read/encoded.py +0 -0
  65. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/read/ndarray.py +0 -0
  66. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/read/scalar.py +0 -0
  67. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
  68. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/write/array.py +0 -0
  69. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/write/scalar.py +0 -0
  70. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
  71. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/datatype.py +0 -0
  72. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/lh5/exceptions.py +0 -0
  73. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/logging.py +0 -0
  74. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/__init__.py +0 -0
  75. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
  76. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/fixedsizearray.py +0 -0
  77. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/scalar.py +0 -0
  78. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/types/struct.py +0 -0
  79. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/units.py +0 -0
  80. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/src/lgdo/utils.py +0 -0
  81. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
  82. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
  83. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/compression/test_compression.py +0 -0
  84. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/compression/test_str2wfcodec.py +0 -0
  85. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
  86. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/lh5/test_exceptions.py +0 -0
  87. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/lh5/test_lh5_datatype.py +0 -0
  88. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/lh5/test_lh5_tools.py +0 -0
  89. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/test_lgdo_utils.py +0 -0
  90. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_arrayofequalsizedarrays.py +0 -0
  91. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_encoded.py +0 -0
  92. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_fixedsizearray.py +0 -0
  93. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_representations.py +0 -0
  94. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_scalar.py +0 -0
  95. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_struct.py +0 -0
  96. {legend_pydataobj-1.11.6 → legend_pydataobj-1.11.8}/tests/types/test_table_eval.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.11.6
3
+ Version: 1.11.8
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -726,6 +726,7 @@ Requires-Dist: pylegendtestdata; extra == "test"
726
726
  Requires-Dist: pytest>=6.0; extra == "test"
727
727
  Requires-Dist: pytest-cov; extra == "test"
728
728
  Requires-Dist: dbetto; extra == "test"
729
+ Dynamic: license-file
729
730
 
730
731
  # legend-pydataobj
731
732
 
@@ -81,7 +81,7 @@ test = [
81
81
 
82
82
  [project.scripts]
83
83
  lh5ls = "lgdo.cli:lh5ls"
84
- lh5concat = "lgdo.cli:lh5concat"
84
+ lh5concat = "lgdo.cli:lh5concat_cli"
85
85
 
86
86
  [tool.setuptools]
87
87
  include-package-data = true
@@ -122,7 +122,7 @@ extend-select = [
122
122
  "PIE", # flake8-pie
123
123
  "PL", # pylint
124
124
  "PT", # flake8-pytest-style
125
- #"PTH", # flake8-use-pathlib
125
+ "PTH", # flake8-use-pathlib
126
126
  "RET", # flake8-return
127
127
  "RUF", # Ruff-specific
128
128
  "SIM", # flake8-simplify
@@ -167,7 +167,7 @@ minversion = "6.0"
167
167
  addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
168
168
  xfail_strict = true
169
169
  filterwarnings = ["error", 'ignore:\nPyarrow:DeprecationWarning']
170
- log_cli_level = "info"
170
+ log_cli_level = "INFO"
171
171
  testpaths = "tests"
172
172
 
173
173
  [tool.codespell]
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.11.6
3
+ Version: 1.11.8
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -726,6 +726,7 @@ Requires-Dist: pylegendtestdata; extra == "test"
726
726
  Requires-Dist: pytest>=6.0; extra == "test"
727
727
  Requires-Dist: pytest-cov; extra == "test"
728
728
  Requires-Dist: dbetto; extra == "test"
729
+ Dynamic: license-file
729
730
 
730
731
  # legend-pydataobj
731
732
 
@@ -12,7 +12,6 @@ src/lgdo/__init__.py
12
12
  src/lgdo/_version.py
13
13
  src/lgdo/cli.py
14
14
  src/lgdo/lgdo_utils.py
15
- src/lgdo/lh5_store.py
16
15
  src/lgdo/logging.py
17
16
  src/lgdo/units.py
18
17
  src/lgdo/utils.py
@@ -23,6 +22,7 @@ src/lgdo/compression/radware.py
23
22
  src/lgdo/compression/utils.py
24
23
  src/lgdo/compression/varlen.py
25
24
  src/lgdo/lh5/__init__.py
25
+ src/lgdo/lh5/concat.py
26
26
  src/lgdo/lh5/core.py
27
27
  src/lgdo/lh5/datatype.py
28
28
  src/lgdo/lh5/exceptions.py
@@ -68,6 +68,7 @@ tests/compression/test_uleb128_zigzag_diff.py
68
68
  tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat
69
69
  tests/compression/sigcompress/special-wf-clipped.dat
70
70
  tests/lh5/conftest.py
71
+ tests/lh5/test_concat.py
71
72
  tests/lh5/test_core.py
72
73
  tests/lh5/test_exceptions.py
73
74
  tests/lh5/test_lh5_datatype.py
@@ -1,3 +1,3 @@
1
1
  [console_scripts]
2
- lh5concat = lgdo.cli:lh5concat
2
+ lh5concat = lgdo.cli:lh5concat_cli
3
3
  lh5ls = lgdo.cli:lh5ls
@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
45
45
  from __future__ import annotations
46
46
 
47
47
  from ._version import version as __version__
48
- from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
48
+ from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
49
49
  from .types import (
50
50
  LGDO,
51
51
  Array,
@@ -69,7 +69,6 @@ __all__ = [
69
69
  "FixedSizeArray",
70
70
  "Histogram",
71
71
  "LH5Iterator",
72
- "LH5Store",
73
72
  "Scalar",
74
73
  "Struct",
75
74
  "Table",
@@ -77,8 +76,10 @@ __all__ = [
77
76
  "VectorOfVectors",
78
77
  "WaveformTable",
79
78
  "__version__",
80
- "load_dfs",
81
- "load_nda",
82
79
  "ls",
80
+ "read",
81
+ "read_as",
82
+ "read_n_rows",
83
83
  "show",
84
+ "write",
84
85
  ]
@@ -1,8 +1,13 @@
1
- # file generated by setuptools_scm
1
+ # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
3
6
  TYPE_CHECKING = False
4
7
  if TYPE_CHECKING:
5
- from typing import Tuple, Union
8
+ from typing import Tuple
9
+ from typing import Union
10
+
6
11
  VERSION_TUPLE = Tuple[Union[int, str], ...]
7
12
  else:
8
13
  VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
12
17
  __version_tuple__: VERSION_TUPLE
13
18
  version_tuple: VERSION_TUPLE
14
19
 
15
- __version__ = version = '1.11.6'
16
- __version_tuple__ = version_tuple = (1, 11, 6)
20
+ __version__ = version = '1.11.8'
21
+ __version_tuple__ = version_tuple = (1, 11, 8)
@@ -0,0 +1,183 @@
1
+ """legend-pydataobj's command line interface utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import logging
7
+ import sys
8
+
9
+ from . import __version__, lh5
10
+ from . import logging as lgdogging # eheheh
11
+ from .lh5.concat import lh5concat
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ def lh5ls(args=None):
17
+ """:func:`.lh5.show` command line interface."""
18
+ parser = argparse.ArgumentParser(
19
+ prog="lh5ls", description="Inspect LEGEND HDF5 (LH5) file contents"
20
+ )
21
+
22
+ # global options
23
+ parser.add_argument(
24
+ "--version",
25
+ action="store_true",
26
+ help="""Print legend-pydataobj version and exit""",
27
+ )
28
+ parser.add_argument(
29
+ "--verbose",
30
+ "-v",
31
+ action="store_true",
32
+ help="""Increase the program verbosity""",
33
+ )
34
+ parser.add_argument(
35
+ "--debug",
36
+ action="store_true",
37
+ help="""Increase the program verbosity to maximum""",
38
+ )
39
+
40
+ parser.add_argument(
41
+ "lh5_file",
42
+ help="""Input LH5 file""",
43
+ )
44
+ parser.add_argument("lh5_group", nargs="?", help="""LH5 group.""", default="/")
45
+ parser.add_argument(
46
+ "--attributes", "-a", action="store_true", help="""Print HDF5 attributes too"""
47
+ )
48
+ parser.add_argument(
49
+ "--depth",
50
+ "-d",
51
+ type=int,
52
+ default=None,
53
+ help="""Maximum tree depth of groups to print""",
54
+ )
55
+ parser.add_argument(
56
+ "--detail",
57
+ action="store_true",
58
+ help="""Print details about datasets""",
59
+ )
60
+
61
+ args = parser.parse_args(args)
62
+
63
+ if args.verbose:
64
+ lgdogging.setup(logging.DEBUG)
65
+ elif args.debug:
66
+ lgdogging.setup(logging.DEBUG, logging.root)
67
+ else:
68
+ lgdogging.setup()
69
+
70
+ if args.version:
71
+ print(__version__) # noqa: T201
72
+ sys.exit()
73
+
74
+ lh5.show(
75
+ args.lh5_file,
76
+ args.lh5_group,
77
+ attrs=args.attributes,
78
+ depth=args.depth,
79
+ detail=args.detail,
80
+ )
81
+
82
+
83
+ def lh5concat_cli(args=None):
84
+ """Command line interface for concatenating array-like LGDOs in LH5 files."""
85
+ parser = argparse.ArgumentParser(
86
+ prog="lh5concat",
87
+ description="""
88
+ Concatenate LGDO Arrays, VectorOfVectors and Tables in LH5 files.
89
+
90
+ Examples
91
+ --------
92
+
93
+ Concatenate all eligible objects in file{1,2}.lh5 into concat.lh5:
94
+
95
+ $ lh5concat -o concat.lh5 file1.lh5 file2.lh5
96
+
97
+ Include only the /data/table1 Table:
98
+
99
+ $ lh5concat -o concat.lh5 -i /data/table1/* file1.lh5 file2.lh5
100
+
101
+ Exclude the /data/table1/col1 Table column:
102
+
103
+ $ lh5concat -o concat.lh5 -e /data/table1/col1 file1.lh5 file2.lh5
104
+ """,
105
+ formatter_class=argparse.RawTextHelpFormatter,
106
+ )
107
+
108
+ # global options
109
+ parser.add_argument(
110
+ "--version",
111
+ action="store_true",
112
+ help="""Print legend-pydataobj version and exit""",
113
+ )
114
+ parser.add_argument(
115
+ "--verbose",
116
+ "-v",
117
+ action="store_true",
118
+ help="""Increase the program verbosity""",
119
+ )
120
+ parser.add_argument(
121
+ "--debug",
122
+ action="store_true",
123
+ help="""Increase the program verbosity to maximum""",
124
+ )
125
+
126
+ parser.add_argument(
127
+ "lh5_file",
128
+ nargs="+",
129
+ help="""Input LH5 files""",
130
+ )
131
+ parser.add_argument(
132
+ "--output",
133
+ "-o",
134
+ help="""Output file""",
135
+ default="lh5concat-output.lh5",
136
+ )
137
+ parser.add_argument(
138
+ "--overwrite",
139
+ "-w",
140
+ action="store_true",
141
+ help="""Overwrite output file""",
142
+ )
143
+ parser.add_argument(
144
+ "--include",
145
+ "-i",
146
+ help="""Regular expression (fnmatch style) for object names that should
147
+ be concatenated. To include full tables, you need to explicitly include
148
+ all its columns with e.g. '/path/to/table/*'. The option can be passed
149
+ multiple times to provide a list of patterns.
150
+ """,
151
+ action="append",
152
+ default=None,
153
+ )
154
+ parser.add_argument(
155
+ "--exclude",
156
+ "-e",
157
+ help="""List of object names that should be excluded. Takes priority
158
+ over --include. See --include help for more details.
159
+ """,
160
+ action="append",
161
+ default=None,
162
+ )
163
+
164
+ args = parser.parse_args(args)
165
+
166
+ if args.verbose:
167
+ lgdogging.setup(logging.INFO, log)
168
+ elif args.debug:
169
+ lgdogging.setup(logging.DEBUG, logging.root)
170
+ else:
171
+ lgdogging.setup()
172
+
173
+ if args.version:
174
+ print(__version__) # noqa: T201
175
+ sys.exit()
176
+
177
+ lh5concat(
178
+ lh5_files=args.lh5_file,
179
+ overwrite=args.overwrite,
180
+ output=args.output,
181
+ include_list=args.include,
182
+ exclude_list=args.exclude,
183
+ )
@@ -11,15 +11,14 @@ from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
11
11
  from .core import read, read_as, write
12
12
  from .iterator import LH5Iterator
13
13
  from .store import LH5Store
14
- from .tools import load_dfs, load_nda, ls, show
14
+ from .tools import ls, show
15
15
  from .utils import read_n_rows
16
16
 
17
17
  __all__ = [
18
18
  "DEFAULT_HDF5_SETTINGS",
19
19
  "LH5Iterator",
20
20
  "LH5Store",
21
- "load_dfs",
22
- "load_nda",
21
+ "concat",
23
22
  "ls",
24
23
  "read",
25
24
  "read_as",
@@ -353,15 +353,13 @@ def _h5_read_table(
353
353
  table = Table(col_dict=col_dict, attrs=attrs)
354
354
 
355
355
  # set (write) loc to end of tree
356
- table.loc = n_rows_read
356
+ table.resize(do_warn=True)
357
357
  return table, n_rows_read
358
358
 
359
359
  # We have read all fields into the object buffer. Run
360
360
  # checks: All columns should be the same size. So update
361
361
  # table's size as necessary, warn if any mismatches are found
362
362
  obj_buf.resize(do_warn=True)
363
- # set (write) loc to end of tree
364
- obj_buf.loc = obj_buf_start + n_rows_read
365
363
 
366
364
  # check attributes
367
365
  utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
@@ -34,7 +34,7 @@ def build_field_mask(field_mask: Mapping[str, bool] | Collection[str]) -> defaul
34
34
  default = not field_mask[next(iter(field_mask.keys()))]
35
35
  return defaultdict(lambda: default, field_mask)
36
36
  if isinstance(field_mask, (list, tuple, set)):
37
- return defaultdict(bool, {field: True for field in field_mask})
37
+ return defaultdict(bool, dict.fromkeys(field_mask, True))
38
38
  if isinstance(field_mask, defaultdict):
39
39
  return field_mask
40
40
  msg = "bad field_mask type"
@@ -123,7 +123,7 @@ def _h5_read_vector_of_vectors(
123
123
  )
124
124
  msg = (
125
125
  f"cumulative_length non-increasing between entries "
126
- f"{start_row} and {start_row+n_rows_read}"
126
+ f"{start_row} and {start_row + n_rows_read}"
127
127
  )
128
128
  raise LH5DecodeError(msg, fname, oname)
129
129
 
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import os
5
4
  from inspect import signature
5
+ from pathlib import Path
6
6
 
7
7
  import h5py
8
8
 
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
53
53
  # change any object in the file. So we use file:append for
54
54
  # write_object:overwrite.
55
55
  if not isinstance(lh5_file, h5py.File):
56
- mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
56
+ mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
57
57
  lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
58
58
 
59
59
  log.debug(
@@ -186,19 +186,20 @@ def _h5_write_struct(
186
186
  write_start=0,
187
187
  **h5py_kwargs,
188
188
  ):
189
+ # this works for structs and derived (tables)
189
190
  assert isinstance(obj, types.Struct)
190
191
 
191
192
  # In order to append a column, we need to update the
192
- # `table{old_fields}` value in `group.attrs['datatype"]` to include
193
+ # `struct/table{old_fields}` value in `group.attrs['datatype"]` to include
193
194
  # the new fields. One way to do this is to override
194
195
  # `obj.attrs["datatype"]` to include old and new fields. Then we
195
- # can write the fields to the table as normal.
196
+ # can write the fields to the struct/table as normal.
196
197
  if wo_mode == "ac":
197
198
  old_group = utils.get_h5_group(name, group)
198
199
  lgdotype = datatype.datatype(old_group.attrs["datatype"])
199
200
  fields = datatype.get_struct_fields(old_group.attrs["datatype"])
200
- if not issubclass(lgdotype, types.Struct):
201
- msg = f"Trying to append columns to an object of type {lgdotype.__name__}"
201
+ if lgdotype is not type(obj):
202
+ msg = f"Trying to append columns to an object of different type {lgdotype.__name__}!={type(obj)}"
202
203
  raise LH5EncodeError(msg, lh5_file, group, name)
203
204
 
204
205
  # If the mode is `append_column`, make sure we aren't appending
@@ -211,8 +212,12 @@ def _h5_write_struct(
211
212
  "column(s) to a table with the same field(s)"
212
213
  )
213
214
  raise LH5EncodeError(msg, lh5_file, group, name)
215
+
214
216
  # It doesn't matter what key we access, as all fields in the old table have the same size
215
- if old_group[next(iter(old_group.keys()))].size != obj.size:
217
+ if (
218
+ isinstance(obj, types.Table)
219
+ and old_group[next(iter(old_group.keys()))].size != obj.size
220
+ ):
216
221
  msg = (
217
222
  f"Table sizes don't match. Trying to append column of size {obj.size} "
218
223
  f"to a table of size {old_group[next(iter(old_group.keys()))].size}."
@@ -222,7 +227,8 @@ def _h5_write_struct(
222
227
  # Now we can append the obj.keys() to the old fields, and then update obj.attrs.
223
228
  fields.extend(list(obj.keys()))
224
229
  obj.attrs.pop("datatype")
225
- obj.attrs["datatype"] = "table" + "{" + ",".join(fields) + "}"
230
+
231
+ obj.attrs["datatype"] = obj.datatype_name() + "{" + ",".join(fields) + "}"
226
232
 
227
233
  group = utils.get_h5_group(
228
234
  name,
@@ -0,0 +1,219 @@
1
+ from __future__ import annotations
2
+
3
+ import fnmatch
4
+ import logging
5
+
6
+ from lgdo.lh5 import LH5Iterator
7
+
8
+ from .. import Array, Scalar, Struct, Table, VectorOfVectors, lh5
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+
13
+ def _get_obj_list(
14
+ lh5_files: list, include_list: list | None = None, exclude_list: list | None = None
15
+ ) -> list[str]:
16
+ """Extract a list of lh5 objects to concatenate.
17
+
18
+ Parameters
19
+ ----------
20
+ lh5_files
21
+ list of input files to concatenate.
22
+ include_list
23
+ patterns for tables to include.
24
+ exclude_list
25
+ patterns for tables to exclude.
26
+
27
+ """
28
+ file0 = lh5_files[0]
29
+ obj_list_full = set(lh5.ls(file0, recursive=True))
30
+
31
+ # let's remove objects with nested LGDOs inside
32
+ to_remove = set()
33
+ for name in obj_list_full:
34
+ if len(fnmatch.filter(obj_list_full, f"{name}/*")) > 1:
35
+ to_remove.add(name)
36
+ obj_list_full -= to_remove
37
+
38
+ obj_list = set()
39
+ # now first remove excluded stuff
40
+ if exclude_list is not None:
41
+ for exc in exclude_list:
42
+ obj_list_full -= set(fnmatch.filter(obj_list_full, exc.strip("/")))
43
+
44
+ # then make list of included, based on latest list
45
+ if include_list is not None:
46
+ for inc in include_list:
47
+ obj_list |= set(fnmatch.filter(obj_list_full, inc.strip("/")))
48
+ else:
49
+ obj_list = obj_list_full
50
+
51
+ # sort
52
+ return sorted(obj_list)
53
+
54
+
55
+ def _get_lgdos(file, obj_list):
56
+ """Get name of LGDO objects."""
57
+
58
+ store = lh5.LH5Store()
59
+ h5f0 = store.gimme_file(file)
60
+
61
+ lgdos = []
62
+ lgdo_structs = {}
63
+
64
+ # loop over object list in the first file
65
+ for name in obj_list:
66
+ # now loop over groups starting from root
67
+ current = ""
68
+ for item in name.split("/"):
69
+ current = f"{current}/{item}".strip("/")
70
+
71
+ if current in lgdos:
72
+ break
73
+
74
+ # not even an LGDO (i.e. a plain HDF5 group)!
75
+ if "datatype" not in h5f0[current].attrs:
76
+ continue
77
+
78
+ # read as little as possible
79
+ obj = store.read(current, h5f0, n_rows=1)
80
+ if isinstance(obj, (Table, Array, VectorOfVectors)):
81
+ lgdos.append(current)
82
+
83
+ elif isinstance(obj, Struct):
84
+ # structs might be used in a "group-like" fashion (i.e. they might only
85
+ # contain array-like objects).
86
+ # note: handle after handling tables, as tables also satisfy this check.
87
+ lgdo_structs[current] = obj.attrs["datatype"]
88
+ continue
89
+
90
+ elif isinstance(obj, Scalar):
91
+ msg = f"cannot concat scalar field {current}"
92
+ log.warning(msg)
93
+
94
+ break
95
+
96
+ msg = f"first-level, array-like objects: {lgdos}"
97
+ log.info(msg)
98
+
99
+ msg = f"nested structs: {lgdo_structs}"
100
+ log.info(msg)
101
+
102
+ h5f0.close()
103
+
104
+ if lgdos == []:
105
+ msg = "did not find any field to concatenate, exit"
106
+ raise RuntimeError(msg)
107
+
108
+ return lgdos, lgdo_structs
109
+
110
+
111
+ def _inplace_table_filter(name, table, obj_list):
112
+ """filter objects nested in this LGDO"""
113
+ skm = fnmatch.filter(obj_list, f"{name}/*")
114
+ kept = {it.removeprefix(name).strip("/").split("/")[0] for it in skm}
115
+
116
+ # now remove fields
117
+ for k in list(table.keys()):
118
+ if k not in kept:
119
+ table.remove_column(k)
120
+
121
+ msg = f"fields left in table '{name}': {table.keys()}"
122
+ log.debug(msg)
123
+
124
+ # recurse!
125
+ for k2, v2 in table.items():
126
+ if not isinstance(v2, Table):
127
+ continue
128
+
129
+ _inplace_table_filter(f"{name}/{k2}", v2, obj_list)
130
+
131
+
132
+ def _remove_nested_fields(lgdos: dict, obj_list: list):
133
+ """Remove (nested) table fields based on obj_list."""
134
+
135
+ for key, val in lgdos.items():
136
+ if not isinstance(val, Table):
137
+ continue
138
+
139
+ _inplace_table_filter(key, val, obj_list)
140
+
141
+
142
+ def lh5concat(
143
+ lh5_files: list,
144
+ output: str,
145
+ overwrite: bool = False,
146
+ *,
147
+ include_list: list | None = None,
148
+ exclude_list: list | None = None,
149
+ ) -> None:
150
+ """Concatenate LGDO Arrays, VectorOfVectors and Tables in LH5 files.
151
+
152
+ Parameters
153
+ ----------
154
+ lh5_files
155
+ list of input files to concatenate.
156
+ output
157
+ path to the output file
158
+ include_list
159
+ patterns for tables to include.
160
+ exclude_list
161
+ patterns for tables to exclude.
162
+ """
163
+
164
+ if len(lh5_files) < 2:
165
+ msg = "you must provide at least two input files"
166
+ raise RuntimeError(msg)
167
+
168
+ # determine list of objects by recursively ls'ing first file
169
+ obj_list = _get_obj_list(
170
+ lh5_files, include_list=include_list, exclude_list=exclude_list
171
+ )
172
+
173
+ msg = f"objects matching include patterns {include_list} in {lh5_files[0]}: {obj_list}"
174
+ log.info(msg)
175
+
176
+ lgdos, lgdo_structs = _get_lgdos(lh5_files[0], obj_list)
177
+ first_done = False
178
+ store = lh5.LH5Store()
179
+
180
+ # loop over lgdo objects
181
+ for lgdo in lgdos:
182
+ # iterate over the files
183
+ for lh5_obj in LH5Iterator(lh5_files, lgdo):
184
+ data = {lgdo: lh5_obj}
185
+
186
+ # remove the nested fields
187
+ _remove_nested_fields(data, obj_list)
188
+
189
+ if first_done is False:
190
+ msg = f"creating output file {output}"
191
+ log.info(msg)
192
+
193
+ store.write(
194
+ data[lgdo],
195
+ lgdo,
196
+ output,
197
+ wo_mode="overwrite_file"
198
+ if (overwrite and not first_done)
199
+ else "write_safe",
200
+ )
201
+ first_done = True
202
+
203
+ else:
204
+ msg = f"appending to {output}"
205
+ log.info(msg)
206
+
207
+ if isinstance(data[lgdo], Table):
208
+ _inplace_table_filter(lgdo, data[lgdo], obj_list)
209
+
210
+ store.write(data[lgdo], lgdo, output, wo_mode="append")
211
+
212
+ if lgdo_structs != {}:
213
+ output_file = store.gimme_file(output, mode="a")
214
+ for struct, struct_dtype in lgdo_structs.items():
215
+ msg = f"reset datatype of struct {struct} to {struct_dtype}"
216
+ log.debug(msg)
217
+
218
+ output_file[struct].attrs["datatype"] = struct_dtype
219
+ output_file.close()