atlas-ftag-tools 0.2.12__tar.gz → 0.2.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {atlas_ftag_tools-0.2.12/atlas_ftag_tools.egg-info → atlas_ftag_tools-0.2.13}/PKG-INFO +2 -2
  2. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13/atlas_ftag_tools.egg-info}/PKG-INFO +2 -2
  3. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/__init__.py +1 -1
  4. atlas_ftag_tools-0.2.13/ftag/vds.py +363 -0
  5. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/pyproject.toml +1 -1
  6. atlas_ftag_tools-0.2.12/ftag/vds.py +0 -142
  7. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/LICENSE +0 -0
  8. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/MANIFEST.in +0 -0
  9. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/README.md +0 -0
  10. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/atlas_ftag_tools.egg-info/SOURCES.txt +0 -0
  11. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/atlas_ftag_tools.egg-info/dependency_links.txt +0 -0
  12. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/atlas_ftag_tools.egg-info/entry_points.txt +0 -0
  13. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/atlas_ftag_tools.egg-info/requires.txt +0 -0
  14. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/atlas_ftag_tools.egg-info/top_level.txt +0 -0
  15. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/cli_utils.py +0 -0
  16. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/cuts.py +0 -0
  17. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/flavours.py +0 -0
  18. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/flavours.yaml +0 -0
  19. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/fraction_optimization.py +0 -0
  20. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/git_check.py +0 -0
  21. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/hdf5/__init__.py +0 -0
  22. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/hdf5/h5add_col.py +0 -0
  23. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/hdf5/h5move.py +0 -0
  24. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/hdf5/h5reader.py +0 -0
  25. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/hdf5/h5split.py +0 -0
  26. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/hdf5/h5utils.py +0 -0
  27. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/hdf5/h5writer.py +0 -0
  28. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/labeller.py +0 -0
  29. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/labels.py +0 -0
  30. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/mock.py +0 -0
  31. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/region.py +0 -0
  32. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/sample.py +0 -0
  33. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/track_selector.py +0 -0
  34. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/transform.py +0 -0
  35. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/utils/__init__.py +0 -0
  36. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/utils/logging.py +0 -0
  37. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/utils/metrics.py +0 -0
  38. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/ftag/working_points.py +0 -0
  39. {atlas_ftag_tools-0.2.12 → atlas_ftag_tools-0.2.13}/setup.cfg +0 -0
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlas-ftag-tools
3
- Version: 0.2.12
3
+ Version: 0.2.13
4
4
  Summary: ATLAS Flavour Tagging Tools
5
5
  Author: Sam Van Stroud, Philipp Gadow
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/umami-hep/atlas-ftag-tools/
8
- Requires-Python: <3.12,>=3.8
8
+ Requires-Python: <3.12,>=3.10
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: h5py>=3.0
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlas-ftag-tools
3
- Version: 0.2.12
3
+ Version: 0.2.13
4
4
  Summary: ATLAS Flavour Tagging Tools
5
5
  Author: Sam Van Stroud, Philipp Gadow
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/umami-hep/atlas-ftag-tools/
8
- Requires-Python: <3.12,>=3.8
8
+ Requires-Python: <3.12,>=3.10
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: h5py>=3.0
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "v0.2.12"
5
+ __version__ = "v0.2.13"
6
6
 
7
7
  from . import hdf5, utils
8
8
  from .cuts import Cuts
@@ -0,0 +1,363 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import glob
5
+ import os
6
+ import re
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ import h5py
11
+ import numpy as np
12
+
13
+
14
+ def parse_args(args=None):
15
+ parser = argparse.ArgumentParser(
16
+ description="Create a lightweight HDF5 wrapper (virtual datasets + "
17
+ "summed cutBookkeeper counts) around a set of .h5 files"
18
+ )
19
+ parser.add_argument(
20
+ "pattern",
21
+ type=Path,
22
+ help="quotes-enclosed glob pattern of files to merge, "
23
+ "or a regex if --use_regex is given",
24
+ )
25
+ parser.add_argument("output", type=Path, help="path to output virtual file")
26
+ parser.add_argument(
27
+ "--use_regex",
28
+ action="store_true",
29
+ help="treat PATTERN as a regular expression instead of a glob",
30
+ )
31
+ parser.add_argument(
32
+ "--regex_path",
33
+ type=str,
34
+ required="--use_regex" in (args or sys.argv),
35
+ default=None,
36
+ help="directory whose entries the regex is applied to "
37
+ "(defaults to the current working directory)",
38
+ )
39
+ return parser.parse_args(args)
40
+
41
+
42
+ def get_virtual_layout(fnames: list[str], group: str) -> h5py.VirtualLayout:
43
+ """Concatenate group from multiple files into a single VirtualDataset.
44
+
45
+ Parameters
46
+ ----------
47
+ fnames : list[str]
48
+ List with the file names
49
+ group : str
50
+ Name of the group that is concatenated
51
+
52
+ Returns
53
+ -------
54
+ h5py.VirtualLayout
55
+ Virtual layout of the new virtual dataset
56
+ """
57
+ sources = []
58
+ total = 0
59
+
60
+ # Loop over the input files
61
+ for fname in fnames:
62
+ with h5py.File(fname, "r") as f:
63
+ # Get the file and append its length
64
+ vsrc = h5py.VirtualSource(f[group])
65
+ total += vsrc.shape[0]
66
+ sources.append(vsrc)
67
+
68
+ # Define the layout of the output vds
69
+ with h5py.File(fnames[0], "r") as f:
70
+ dtype = f[group].dtype
71
+ shape = f[group].shape
72
+
73
+ # Update the shape finalize the output layout
74
+ shape = (total,) + shape[1:]
75
+ layout = h5py.VirtualLayout(shape=shape, dtype=dtype)
76
+
77
+ # Fill the vds
78
+ idx = 0
79
+ for vsrc in sources:
80
+ length = vsrc.shape[0]
81
+ layout[idx : idx + length] = vsrc
82
+ idx += length
83
+
84
+ return layout
85
+
86
+
87
+ def glob_re(pattern: str | None, regex_path: str | None) -> list[str] | None:
88
+ """Return list of filenames that match REGEX pattern inside regex_path.
89
+
90
+ Parameters
91
+ ----------
92
+ pattern : str
93
+ Pattern for the input files
94
+ regex_path : str
95
+ Regex path for the input files
96
+
97
+ Returns
98
+ -------
99
+ list[str]
100
+ List of the file basenames that matched the regex pattern
101
+ """
102
+ if pattern is None or regex_path is None:
103
+ return None
104
+
105
+ return list(filter(re.compile(pattern).match, os.listdir(regex_path)))
106
+
107
+
108
+ def regex_files_from_dir(
109
+ reg_matched_fnames: list[str] | None,
110
+ regex_path: str | None,
111
+ ) -> list[str] | None:
112
+ """Turn a list of basenames into full paths; dive into sub-dirs if needed.
113
+
114
+ Parameters
115
+ ----------
116
+ reg_matched_fnames : list[str]
117
+ List of the regex matched file names
118
+ regex_path : str
119
+ Regex path for the input files
120
+
121
+ Returns
122
+ -------
123
+ list[str]
124
+ List of file paths (as strings) that matched the regex and any subsequent
125
+ globbing inside matched directories.
126
+ """
127
+ if reg_matched_fnames is None or regex_path is None:
128
+ return None
129
+
130
+ parent_dir = regex_path or str(Path.cwd())
131
+ full_paths = [Path(parent_dir) / fname for fname in reg_matched_fnames]
132
+ paths_to_glob = [str(fp / "*.h5") if fp.is_dir() else str(fp) for fp in full_paths]
133
+ nested_fnames = [glob.glob(p) for p in paths_to_glob]
134
+ return sum(nested_fnames, [])
135
+
136
+
137
+ def sum_counts_once(counts: np.ndarray) -> np.ndarray:
138
+ """Reduce the arrays in the counts dataset for one file to a scalar via summation.
139
+
140
+ Parameters
141
+ ----------
142
+ counts : np.ndarray
143
+ Array from the h5py dataset (counts) from the cutBookkeeper groups
144
+
145
+ Returns
146
+ -------
147
+ np.ndarray
148
+ Array with the summed variables for the file
149
+ """
150
+ dtype = counts.dtype
151
+ summed = np.zeros((), dtype=dtype)
152
+ for field in dtype.names:
153
+ summed[field] = counts[field].sum()
154
+ return summed
155
+
156
+
157
+ def check_subgroups(fnames: list[str], group_name: str = "cutBookkeeper") -> list[str]:
158
+ """Check which subgroups are available for the bookkeeper.
159
+
160
+ Find the intersection of sub-group names that have a 'counts' dataset
161
+ in every input file. (Using the intersection makes the script robust
162
+ even if a few files are missing a variation.)
163
+
164
+ Parameters
165
+ ----------
166
+ fnames : list[str]
167
+ List of the input files
168
+ group_name : str, optional
169
+ Group name in the h5 files of the bookkeeper, by default "cutBookkeeper"
170
+
171
+ Returns
172
+ -------
173
+ set[str]
174
+ Returns the files with common sub-groups
175
+
176
+ Raises
177
+ ------
178
+ KeyError
179
+ When a file does not have a bookkeeper
180
+ ValueError
181
+ When no common bookkeeper sub-groups were found
182
+ """
183
+ common: set[str] | None = None
184
+ for fname in fnames:
185
+ with h5py.File(fname, "r") as f:
186
+ if group_name not in f:
187
+ raise KeyError(f"{fname} has no '{group_name}' group")
188
+ these = {
189
+ name
190
+ for name, item in f[group_name].items()
191
+ if isinstance(item, h5py.Group) and "counts" in item
192
+ }
193
+ common = these if common is None else common & these
194
+ if not common:
195
+ raise ValueError("No common cutBookkeeper sub-groups with 'counts' found")
196
+ return sorted(common)
197
+
198
+
199
+ def aggregate_cutbookkeeper(
200
+ fnames: list[str],
201
+ group_name: str = "cutBookkeeper",
202
+ ) -> dict[str, np.ndarray] | None:
203
+ """Aggregate the cutBookkeeper in the input files.
204
+
205
+ For every input file:
206
+ For every sub-group (nominal, sysUp, sysDown, …):
207
+ 1. Sum the 4-entry record array inside each file into 1 record
208
+ 1. Add those records from all files together into grand total
209
+ Returns a dict {subgroup_name: scalar-record-array}
210
+
211
+ Parameters
212
+ ----------
213
+ fnames : list[str]
214
+ List of the input files
215
+
216
+ Returns
217
+ -------
218
+ dict[str, np.ndarray] | None
219
+ Dict with the accumulated cutBookkeeper groups. If the cut bookkeeper
220
+ is not in the files, return None.
221
+ """
222
+ if any(group_name not in h5py.File(f, "r") for f in fnames):
223
+ return None
224
+
225
+ subgroups = check_subgroups(fnames, group_name=group_name)
226
+
227
+ # initialise an accumulator per subgroup (dtype taken from 1st file)
228
+ accum: dict[str, np.ndarray] = {}
229
+ with h5py.File(fnames[0], "r") as f0:
230
+ for sg in subgroups:
231
+ dtype = f0[f"{group_name}/{sg}/counts"].dtype
232
+ accum[sg] = np.zeros((), dtype=dtype)
233
+
234
+ # add each files contribution field-wise
235
+ for fname in fnames:
236
+ with h5py.File(fname, "r") as f:
237
+ for sg in subgroups:
238
+ per_file = sum_counts_once(f[f"{group_name}/{sg}/counts"][()])
239
+ for fld in accum[sg].dtype.names:
240
+ accum[sg][fld] += per_file[fld]
241
+
242
+ return accum
243
+
244
+
245
+ def create_virtual_file(
246
+ pattern: Path | str,
247
+ out_fname: Path | str | None = None,
248
+ use_regex: bool = False,
249
+ regex_path: str | None = None,
250
+ overwrite: bool = False,
251
+ bookkeeper_name: str = "cutBookkeeper",
252
+ ) -> Path:
253
+ """Create the virtual dataset file for the given inputs.
254
+
255
+ Parameters
256
+ ----------
257
+ pattern : Path | str
258
+ Pattern of the input files used. Wildcard is supported
259
+ out_fname : Path | str | None, optional
260
+ Output path to which the virtual dataset file is written. By default None
261
+ use_regex : bool, optional
262
+ If you want to use regex instead of glob, by default False
263
+ regex_path : str | None, optional
264
+ Regex logic used to define the input files, by default None
265
+ overwrite : bool, optional
266
+ Decide, if an existing output file is overwritten, by default False
267
+ bookkeeper_name : str, optional
268
+ Name of the cut bookkeeper in the h5 files.
269
+
270
+ Returns
271
+ -------
272
+ Path
273
+ Path object of the path to which the output file is written
274
+
275
+ Raises
276
+ ------
277
+ FileNotFoundError
278
+ If not input files were found for the given pattern
279
+ ValueError
280
+ If no output file is given and the input comes from multiple directories
281
+ """
282
+ # Get list of filenames
283
+ pattern_str = str(pattern)
284
+
285
+ # Use regex to find input files else use glob
286
+ if use_regex is True:
287
+ matched = glob_re(pattern_str, regex_path)
288
+ fnames = regex_files_from_dir(matched, regex_path)
289
+ else:
290
+ fnames = glob.glob(pattern_str)
291
+
292
+ # Throw error if no input files were found
293
+ if not fnames:
294
+ raise FileNotFoundError(f"No files matched pattern {pattern!r}")
295
+
296
+ # Infer output path if not given
297
+ if out_fname is None:
298
+ if len({Path(f).parent for f in fnames}) != 1:
299
+ raise ValueError("Give --output when files reside in multiple dirs")
300
+ out_fname = Path(fnames[0]).parent / "vds" / "vds.h5"
301
+ else:
302
+ out_fname = Path(out_fname)
303
+
304
+ # If overwrite is not active and a file exists, stop here
305
+ if not overwrite and out_fname.is_file():
306
+ return out_fname
307
+
308
+ # Identify common groups across all files
309
+ common_groups: set[str] = set()
310
+ for fname in fnames:
311
+ with h5py.File(fname, "r") as f:
312
+ groups = set(f.keys())
313
+ common_groups = groups if not common_groups else common_groups & groups
314
+
315
+ # Ditch the bookkeeper. We will process it separately
316
+ common_groups.discard("cutBookkeeper")
317
+
318
+ # Check that the directory of the output file exists
319
+ out_fname.parent.mkdir(parents=True, exist_ok=True)
320
+
321
+ # Build the output file
322
+ with h5py.File(out_fname, "w") as fout:
323
+ # Build "standard" groups
324
+ for gname in sorted(common_groups):
325
+ layout = get_virtual_layout(fnames, gname)
326
+ fout.create_virtual_dataset(gname, layout)
327
+
328
+ # Copy first-file attributes to VDS root object
329
+ with h5py.File(fnames[0], "r") as f0:
330
+ for k, v in f0[gname].attrs.items():
331
+ fout[gname].attrs[k] = v
332
+
333
+ # Build the cutBookkeeper
334
+ counts_total = aggregate_cutbookkeeper(fnames=fnames, group_name=bookkeeper_name)
335
+ if counts_total is not None:
336
+ for sg, record in counts_total.items():
337
+ grp = fout.require_group(f"{bookkeeper_name}/{sg}")
338
+ grp.create_dataset("counts", data=record, shape=(), dtype=record.dtype)
339
+
340
+ return out_fname
341
+
342
+
343
+ def main(args=None) -> None:
344
+ args = parse_args(args)
345
+ matching_mode = "Applying regex to" if args.use_regex else "Globbing"
346
+ print(f"{matching_mode} {args.pattern} ...")
347
+ out_path = create_virtual_file(
348
+ pattern=args.pattern,
349
+ out_fname=args.output,
350
+ use_regex=args.use_regex,
351
+ regex_path=args.regex_path,
352
+ overwrite=True,
353
+ )
354
+
355
+ with h5py.File(out_path, "r") as f:
356
+ key = next(iter(f.keys()))
357
+ print(f"Virtual dataset '{key}' has {len(f[key]):,} entries")
358
+
359
+ print(f"Saved virtual file to {out_path.resolve()}")
360
+
361
+
362
+ if __name__ == "__main__":
363
+ main()
@@ -5,7 +5,7 @@ authors = [{name="Sam Van Stroud"}, {name="Philipp Gadow"}]
5
5
  dynamic = ["version"]
6
6
  license = {text = "MIT"}
7
7
  readme = "README.md"
8
- requires-python = ">=3.8,<3.12"
8
+ requires-python = ">=3.10,<3.12"
9
9
 
10
10
  dependencies = [
11
11
  "h5py>=3.0",
@@ -1,142 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import argparse
4
- import glob
5
- import os
6
- import re
7
- import sys
8
- from pathlib import Path
9
-
10
- import h5py
11
-
12
-
13
- def parse_args(args):
14
- parser = argparse.ArgumentParser(
15
- description="Create a lightweight wrapper around a set of h5 files"
16
- )
17
- parser.add_argument("pattern", type=Path, help="quotes-enclosed glob pattern of files to merge")
18
- parser.add_argument("output", type=Path, help="path to output virtual file")
19
- parser.add_argument("--use_regex", help="if provided pattern is a regex", action="store_true")
20
- parser.add_argument("--regex_path", type=str, required="--regex" in sys.argv, default=None)
21
- return parser.parse_args(args)
22
-
23
-
24
- def get_virtual_layout(fnames: list[str], group: str):
25
- # get sources
26
- sources = []
27
- total = 0
28
- for fname in fnames:
29
- with h5py.File(fname) as f:
30
- vsource = h5py.VirtualSource(f[group])
31
- total += vsource.shape[0]
32
- sources.append(vsource)
33
-
34
- # define layout of the vds
35
- with h5py.File(fnames[0]) as f:
36
- dtype = f[group].dtype
37
- shape = f[group].shape
38
- shape = (total,) + shape[1:]
39
- layout = h5py.VirtualLayout(shape=shape, dtype=dtype)
40
-
41
- # fill the vds
42
- idx = 0
43
- for source in sources:
44
- length = source.shape[0]
45
- layout[idx : idx + length] = source
46
- idx += length
47
-
48
- return layout
49
-
50
-
51
- def glob_re(pattern, regex_path):
52
- return list(filter(re.compile(pattern).match, os.listdir(regex_path)))
53
-
54
-
55
- def regex_files_from_dir(reg_matched_fnames, regex_path):
56
- parent_dir = regex_path or str(Path.cwd())
57
- full_paths = [parent_dir + "/" + fname for fname in reg_matched_fnames]
58
- paths_to_glob = [fname + "/*.h5" if Path(fname).is_dir() else fname for fname in full_paths]
59
- nested_fnames = [glob.glob(fname) for fname in paths_to_glob]
60
- return sum(nested_fnames, [])
61
-
62
-
63
- def create_virtual_file(
64
- pattern: Path | str,
65
- out_fname: Path | None = None,
66
- use_regex: bool = False,
67
- regex_path: str | None = None,
68
- overwrite: bool = False,
69
- ):
70
- # get list of filenames
71
- pattern_str = str(pattern)
72
- if use_regex:
73
- reg_matched_fnames = glob_re(pattern_str, regex_path)
74
- print("reg matched fnames: ", reg_matched_fnames)
75
- fnames = regex_files_from_dir(reg_matched_fnames, regex_path)
76
- else:
77
- fnames = glob.glob(pattern_str)
78
- if not fnames:
79
- raise FileNotFoundError(f"No files matched pattern {pattern}")
80
- print("Files to merge to vds: ", fnames)
81
-
82
- # infer output path if not given
83
- if out_fname is None:
84
- assert len({Path(fname).parent for fname in fnames}) == 1
85
- out_fname = Path(fnames[0]).parent / "vds" / "vds.h5"
86
- else:
87
- out_fname = Path(out_fname)
88
-
89
- # check if file already exists
90
- if not overwrite and out_fname.is_file():
91
- return out_fname
92
-
93
- # identify common groups across all files
94
- common_groups: set[str] = set()
95
- for fname in fnames:
96
- with h5py.File(fname) as f:
97
- groups = set(f.keys())
98
- common_groups = groups if not common_groups else common_groups.intersection(groups)
99
-
100
- if not common_groups:
101
- raise ValueError("No common groups found across files")
102
-
103
- # create virtual file
104
- out_fname.parent.mkdir(exist_ok=True)
105
- with h5py.File(out_fname, "w") as f:
106
- for group in common_groups:
107
- layout = get_virtual_layout(fnames, group)
108
- f.create_virtual_dataset(group, layout)
109
- attrs_dict: dict = {}
110
- for fname in fnames:
111
- with h5py.File(fname) as g:
112
- for name, value in g[group].attrs.items():
113
- if name not in attrs_dict:
114
- attrs_dict[name] = []
115
- attrs_dict[name].append(value)
116
- for name, value in attrs_dict.items():
117
- if len(value) > 0:
118
- f[group].attrs[name] = value[0]
119
-
120
- return out_fname
121
-
122
-
123
- def main(args=None) -> None:
124
- args = parse_args(args)
125
- matching_mode = "Applying regex to" if args.use_regex else "Globbing"
126
- print(f"{matching_mode} {args.pattern}...")
127
- create_virtual_file(
128
- args.pattern,
129
- args.output,
130
- use_regex=args.use_regex,
131
- regex_path=args.regex_path,
132
- overwrite=True,
133
- )
134
- with h5py.File(args.output) as f:
135
- key = next(iter(f.keys()))
136
- num = len(f[key])
137
- print(f"Virtual dataset '{key}' has {num:,} entries")
138
- print(f"Saved virtual file to {args.output.resolve()}")
139
-
140
-
141
- if __name__ == "__main__":
142
- main()