atlas-ftag-tools 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas_ftag_tools-0.2.12.dist-info/METADATA +53 -0
- atlas_ftag_tools-0.2.12.dist-info/RECORD +32 -0
- {atlas_ftag_tools-0.2.10.dist-info → atlas_ftag_tools-0.2.12.dist-info}/WHEEL +1 -1
- {atlas_ftag_tools-0.2.10.dist-info → atlas_ftag_tools-0.2.12.dist-info}/entry_points.txt +1 -0
- atlas_ftag_tools-0.2.12.dist-info/licenses/LICENSE +201 -0
- ftag/__init__.py +11 -11
- ftag/flavours.yaml +18 -13
- ftag/hdf5/__init__.py +5 -3
- ftag/hdf5/h5add_col.py +391 -0
- ftag/hdf5/h5reader.py +17 -4
- ftag/hdf5/h5utils.py +10 -1
- ftag/hdf5/h5writer.py +86 -29
- ftag/labeller.py +1 -1
- ftag/mock.py +2 -2
- ftag/utils/__init__.py +2 -2
- ftag/vds.py +39 -4
- atlas_ftag_tools-0.2.10.dist-info/METADATA +0 -151
- atlas_ftag_tools-0.2.10.dist-info/RECORD +0 -30
- {atlas_ftag_tools-0.2.10.dist-info → atlas_ftag_tools-0.2.12.dist-info}/top_level.txt +0 -0
ftag/mock.py
CHANGED
@@ -106,11 +106,11 @@ def get_mock_scores(labels: np.ndarray, is_xbb: bool = False) -> np.ndarray:
|
|
106
106
|
for i in range(n_classes):
|
107
107
|
tmp_means = []
|
108
108
|
tmp_means = [
|
109
|
-
0 if j != i else mean_scale_list[
|
109
|
+
0 if j != i else mean_scale_list[rng.integers(0, len(mean_scale_list))]
|
110
110
|
for j in range(n_classes)
|
111
111
|
]
|
112
112
|
means.append(tmp_means)
|
113
|
-
scales.append(mean_scale_list[
|
113
|
+
scales.append(mean_scale_list[rng.integers(0, len(mean_scale_list))])
|
114
114
|
|
115
115
|
# Map the labels to the means
|
116
116
|
label_mapping = dict(zip(label_dict.values(), means))
|
ftag/utils/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from
|
4
|
-
from
|
3
|
+
from .logging import logger, set_log_level
|
4
|
+
from .metrics import (
|
5
5
|
calculate_efficiency,
|
6
6
|
calculate_efficiency_error,
|
7
7
|
calculate_rejection,
|
ftag/vds.py
CHANGED
@@ -2,6 +2,9 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import argparse
|
4
4
|
import glob
|
5
|
+
import os
|
6
|
+
import re
|
7
|
+
import sys
|
5
8
|
from pathlib import Path
|
6
9
|
|
7
10
|
import h5py
|
@@ -13,6 +16,8 @@ def parse_args(args):
|
|
13
16
|
)
|
14
17
|
parser.add_argument("pattern", type=Path, help="quotes-enclosed glob pattern of files to merge")
|
15
18
|
parser.add_argument("output", type=Path, help="path to output virtual file")
|
19
|
+
parser.add_argument("--use_regex", help="if provided pattern is a regex", action="store_true")
|
20
|
+
parser.add_argument("--regex_path", type=str, required="--regex" in sys.argv, default=None)
|
16
21
|
return parser.parse_args(args)
|
17
22
|
|
18
23
|
|
@@ -43,13 +48,36 @@ def get_virtual_layout(fnames: list[str], group: str):
|
|
43
48
|
return layout
|
44
49
|
|
45
50
|
|
51
|
+
def glob_re(pattern, regex_path):
|
52
|
+
return list(filter(re.compile(pattern).match, os.listdir(regex_path)))
|
53
|
+
|
54
|
+
|
55
|
+
def regex_files_from_dir(reg_matched_fnames, regex_path):
|
56
|
+
parent_dir = regex_path or str(Path.cwd())
|
57
|
+
full_paths = [parent_dir + "/" + fname for fname in reg_matched_fnames]
|
58
|
+
paths_to_glob = [fname + "/*.h5" if Path(fname).is_dir() else fname for fname in full_paths]
|
59
|
+
nested_fnames = [glob.glob(fname) for fname in paths_to_glob]
|
60
|
+
return sum(nested_fnames, [])
|
61
|
+
|
62
|
+
|
46
63
|
def create_virtual_file(
|
47
|
-
pattern: Path | str,
|
64
|
+
pattern: Path | str,
|
65
|
+
out_fname: Path | None = None,
|
66
|
+
use_regex: bool = False,
|
67
|
+
regex_path: str | None = None,
|
68
|
+
overwrite: bool = False,
|
48
69
|
):
|
49
70
|
# get list of filenames
|
50
|
-
|
71
|
+
pattern_str = str(pattern)
|
72
|
+
if use_regex:
|
73
|
+
reg_matched_fnames = glob_re(pattern_str, regex_path)
|
74
|
+
print("reg matched fnames: ", reg_matched_fnames)
|
75
|
+
fnames = regex_files_from_dir(reg_matched_fnames, regex_path)
|
76
|
+
else:
|
77
|
+
fnames = glob.glob(pattern_str)
|
51
78
|
if not fnames:
|
52
79
|
raise FileNotFoundError(f"No files matched pattern {pattern}")
|
80
|
+
print("Files to merge to vds: ", fnames)
|
53
81
|
|
54
82
|
# infer output path if not given
|
55
83
|
if out_fname is None:
|
@@ -94,8 +122,15 @@ def create_virtual_file(
|
|
94
122
|
|
95
123
|
def main(args=None) -> None:
|
96
124
|
args = parse_args(args)
|
97
|
-
|
98
|
-
|
125
|
+
matching_mode = "Applying regex to" if args.use_regex else "Globbing"
|
126
|
+
print(f"{matching_mode} {args.pattern}...")
|
127
|
+
create_virtual_file(
|
128
|
+
args.pattern,
|
129
|
+
args.output,
|
130
|
+
use_regex=args.use_regex,
|
131
|
+
regex_path=args.regex_path,
|
132
|
+
overwrite=True,
|
133
|
+
)
|
99
134
|
with h5py.File(args.output) as f:
|
100
135
|
key = next(iter(f.keys()))
|
101
136
|
num = len(f[key])
|
@@ -1,151 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: atlas-ftag-tools
|
3
|
-
Version: 0.2.10
|
4
|
-
Summary: ATLAS Flavour Tagging Tools
|
5
|
-
Author: Sam Van Stroud, Philipp Gadow
|
6
|
-
License: MIT
|
7
|
-
Project-URL: Homepage, https://github.com/umami-hep/atlas-ftag-tools/
|
8
|
-
Requires-Python: <3.12,>=3.8
|
9
|
-
Description-Content-Type: text/markdown
|
10
|
-
Requires-Dist: h5py>=3.0
|
11
|
-
Requires-Dist: numpy>=2.2.3
|
12
|
-
Requires-Dist: PyYAML>=5.1
|
13
|
-
Requires-Dist: scipy>=1.15.2
|
14
|
-
Provides-Extra: dev
|
15
|
-
Requires-Dist: ruff==0.6.2; extra == "dev"
|
16
|
-
Requires-Dist: mypy==1.11.2; extra == "dev"
|
17
|
-
Requires-Dist: pre-commit==3.1.1; extra == "dev"
|
18
|
-
Requires-Dist: pytest==7.2.2; extra == "dev"
|
19
|
-
Requires-Dist: pytest-cov==4.0.0; extra == "dev"
|
20
|
-
Requires-Dist: pytest_notebook==0.10.0; extra == "dev"
|
21
|
-
Requires-Dist: ipykernel==6.21.3; extra == "dev"
|
22
|
-
|
23
|
-
[](https://github.com/psf/black)
|
24
|
-
[](https://badge.fury.io/py/atlas-ftag-tools)
|
25
|
-
[](https://codecov.io/gh/umami-hep/atlas-ftag-tools)
|
26
|
-
|
27
|
-
# ATLAS FTAG Python Tools
|
28
|
-
|
29
|
-
This is a collection of Python tools for working with files produced with the FTAG [ntuple dumper](https://gitlab.cern.ch/atlas-flavor-tagging-tools/training-dataset-dumper/).
|
30
|
-
The code is intended to be used a [library](https://iscinumpy.dev/post/app-vs-library/) for other projects.
|
31
|
-
Please see the [example notebook](ftag/example.ipynb) for usage.
|
32
|
-
|
33
|
-
# Quickstart
|
34
|
-
|
35
|
-
## Installation
|
36
|
-
|
37
|
-
If you want to use this package without modification, you can install from [pypi](https://pypi.org/project/atlas-ftag-tools/) using `pip`.
|
38
|
-
|
39
|
-
```bash
|
40
|
-
pip install atlas-ftag-tools
|
41
|
-
```
|
42
|
-
|
43
|
-
To additionally install the development dependencies (for formatting and linting) use
|
44
|
-
```bash
|
45
|
-
pip install atlas-ftag-tools[dev]
|
46
|
-
```
|
47
|
-
|
48
|
-
## Development
|
49
|
-
|
50
|
-
If you plan on making changes to teh code, instead clone the repository and install the package from source in editable mode with
|
51
|
-
|
52
|
-
```bash
|
53
|
-
python -m pip install -e .
|
54
|
-
```
|
55
|
-
|
56
|
-
Include development dependencies with
|
57
|
-
|
58
|
-
```bash
|
59
|
-
python -m pip install -e ".[dev]"
|
60
|
-
```
|
61
|
-
|
62
|
-
You can set up and run pre-commit hooks with
|
63
|
-
|
64
|
-
```bash
|
65
|
-
pre-commit install
|
66
|
-
pre-commmit run --all-files
|
67
|
-
```
|
68
|
-
|
69
|
-
To run the tests you can use the `pytest` or `coverage` command, for example
|
70
|
-
|
71
|
-
```bash
|
72
|
-
coverage run --source ftag -m pytest --show-capture=stdout
|
73
|
-
```
|
74
|
-
|
75
|
-
Running `coverage report` will display the test coverage.
|
76
|
-
|
77
|
-
|
78
|
-
# Usage
|
79
|
-
|
80
|
-
Please see the [example notebook](ftag/example.ipynb) for full usage.
|
81
|
-
Additional functionality is also documented below.
|
82
|
-
|
83
|
-
## Calculate WPs
|
84
|
-
|
85
|
-
This package contains a script to calculate tagger working points (WPs).
|
86
|
-
The script is `working_points.py` and can be run after installing this package with
|
87
|
-
|
88
|
-
```
|
89
|
-
wps \
|
90
|
-
--ttbar "path/to/ttbar/*.h5" \
|
91
|
-
--tagger GN2v01 \
|
92
|
-
--fc 0.1
|
93
|
-
```
|
94
|
-
|
95
|
-
Both the `--tagger` and `--fc` options accept a list if you want to get the WPs for multiple taggers.
|
96
|
-
If you are doing c-tagging or xbb-tagging, dedicated fx arguments are available ()you can find them all with `-h`.
|
97
|
-
|
98
|
-
If you want to use the `ttbar` WPs get the efficiencies and rejections for the `zprime` sample, you can add `--zprime "path/to/zprime/*.h5"` to the command.
|
99
|
-
Note that a default selection of $p_T > 250 ~GeV$ to jets in the `zprime` sample.
|
100
|
-
|
101
|
-
If instead of defining the working points for a series of signal efficiencies, you wish to calculate a WP corresponding to a specific background rejection, the `--rejection` option can be given along with the desired background.
|
102
|
-
|
103
|
-
By default the working points are printed to the terminal, but you can save the results to a YAML file with the `--outfile` option.
|
104
|
-
|
105
|
-
See `wps --help` for more options and information.
|
106
|
-
|
107
|
-
## Calculate efficiency at discriminant cut
|
108
|
-
|
109
|
-
The same script can be used to calculate the efficiency and rejection values at a given discriminant cut value.
|
110
|
-
The script `working_points.py` can be run after intalling this package as follows
|
111
|
-
|
112
|
-
```
|
113
|
-
wps \
|
114
|
-
--ttbar "path/to/ttbar/*.h5" \
|
115
|
-
--tagger GN2v01 \
|
116
|
-
--fx 0.1
|
117
|
-
--disc_cuts 1.0 1.5
|
118
|
-
```
|
119
|
-
The `--tagger`, `--fx`, and `--outfile` follow the same procedure as in the 'Calculate WPs' script as described above.
|
120
|
-
|
121
|
-
## H5 Utils
|
122
|
-
|
123
|
-
### Create virtual file
|
124
|
-
|
125
|
-
This package contains a script to easily merge a set of H5 files.
|
126
|
-
A virtual file is a fast and lightweight way to wrap a set of files.
|
127
|
-
See the [h5py documentation](https://docs.h5py.org/en/stable/vds.html) for more information on virtual datasets.
|
128
|
-
|
129
|
-
The script is `vds.py` and can be run after installing this package with
|
130
|
-
|
131
|
-
```
|
132
|
-
vds <pattern> <output path>
|
133
|
-
```
|
134
|
-
|
135
|
-
The `<pattern>` argument should be a quotes enclosed [glob pattern](https://en.wikipedia.org/wiki/Glob_(programming)), for example `"dsid/path/*.h5"`
|
136
|
-
|
137
|
-
See `vds --help` for more options and information.
|
138
|
-
|
139
|
-
|
140
|
-
### [h5move](ftag/hdf5/h5move.py)
|
141
|
-
|
142
|
-
A script to move/rename datasets inside an h5file.
|
143
|
-
Useful for correcting discrepancies between group names.
|
144
|
-
See [h5move.py](ftag/hdf5/h5move.py) for more info.
|
145
|
-
|
146
|
-
|
147
|
-
### [h5split](ftag/hdf5/h5split.py)
|
148
|
-
|
149
|
-
A script to split a large h5 file into several smaller files.
|
150
|
-
Useful if output files are too large for EOS/grid storage.
|
151
|
-
See [h5split.py](ftag/hdf5/h5split.py) for more info.
|
@@ -1,30 +0,0 @@
|
|
1
|
-
ftag/__init__.py,sha256=v9emuK48Hhd-_TCiirfCNMsZSzk52frz1zEOgk9PViQ,787
|
2
|
-
ftag/cli_utils.py,sha256=w3TtQmUHSyAKChS3ewvOtcSDAUJAZGIIomaNi8f446U,298
|
3
|
-
ftag/cuts.py,sha256=9_ooLZHaO3SnIQBNxwbaPZn-qptGdKnB27FdKQGTiTY,2933
|
4
|
-
ftag/flavours.py,sha256=ShH4M2UjQZpZ_NlCctTm2q1tJbzYxjmGteioQ2GcqEU,114
|
5
|
-
ftag/flavours.yaml,sha256=5Lo9KWe-2KzmGMbc7o_X9gzwUyTl0Q5uVHYExduZ6T4,9502
|
6
|
-
ftag/fraction_optimization.py,sha256=IlMEJe5fD0soX40f-LO4dYAYld2gMqgZRuBLctoPn9A,5566
|
7
|
-
ftag/git_check.py,sha256=Y-XqM80CVXZ5ZKrDdZcYOJt3X64uU6W3OP6Z0D7AZU0,1663
|
8
|
-
ftag/labeller.py,sha256=IXUgU9UBir39PxVWRKs5r5fqI66Tv0x7nJD3-RYpbrg,2780
|
9
|
-
ftag/labels.py,sha256=2nmcmrZD8mWQPxJsGiOgcLDhSVgWfS_cEzqsBV-Qy8o,4198
|
10
|
-
ftag/mock.py,sha256=P2D7nNKAz2jRBbmfpHTDj9sBVU9r7HGd0rpWZOJYZ90,5980
|
11
|
-
ftag/region.py,sha256=ANv0dGI2W6NJqD9fp7EfqAUReH4FOjc1gwl_Qn8llcM,360
|
12
|
-
ftag/sample.py,sha256=3N0FrRcu9l1sX8ohuGOHuMYGD0See6gMO4--7NzR2tE,2538
|
13
|
-
ftag/track_selector.py,sha256=fJNk_kIBQriBqV4CPT_3ReJbOUnavDDzO-u3EQlRuyk,2654
|
14
|
-
ftag/transform.py,sha256=uEGGJSnqoKOzLYQv650XdK_kDNw4Aw-5dc60z9Dp_y0,3963
|
15
|
-
ftag/vds.py,sha256=nRViQZQIORB95nC7NZsW3KsSoGkLzEdOsuCViH5h8-U,3296
|
16
|
-
ftag/working_points.py,sha256=RJws2jPMEDQDspCbXUZBifS1CCBmlMJ5ax0eMyDzCRA,15949
|
17
|
-
ftag/hdf5/__init__.py,sha256=LFDNxVOCp58SvLHwQhdT68Q-KBMS_i6jBrbXoRpHzbM,354
|
18
|
-
ftag/hdf5/h5move.py,sha256=oYpRu0IDCIJIQ2ML52HBAdoyDxmKkHTeM9JdbPEgKfI,947
|
19
|
-
ftag/hdf5/h5reader.py,sha256=i31pDAqmOSaxdeRhc4iSBlld8xJ0pmp4rNd7CugNzw0,13706
|
20
|
-
ftag/hdf5/h5split.py,sha256=4Wy6Xc3J58MdD9aBaSZHf5ZcVFnJSkWsm42R5Pgo-R4,2448
|
21
|
-
ftag/hdf5/h5utils.py,sha256=-4zKTMtNCrDZr_9Ww7uzfsB7M7muBKpmm_1IkKJnHOI,3222
|
22
|
-
ftag/hdf5/h5writer.py,sha256=9FkClV__UbBqmFsq_h2jwiZnbWVm8QFRL_4mDZZBbTs,5316
|
23
|
-
ftag/utils/__init__.py,sha256=C0PgaA6Nk5WVpFqKhBhrHgj2mwsKJbSxoO6Cl67RsaI,544
|
24
|
-
ftag/utils/logging.py,sha256=54NaQiC9Bh4vSznSqzoPfR-7tj1PXfmoH7yKgv_ZHZk,3192
|
25
|
-
ftag/utils/metrics.py,sha256=zQI4nPeRDSyzqKpdOPmu0GU560xSWoW1wgL13rrja-I,12664
|
26
|
-
atlas_ftag_tools-0.2.10.dist-info/METADATA,sha256=VUhrtQML6_bUKlmZNFlUXxTTt5YBzNYupTrdlaF5IAw,5190
|
27
|
-
atlas_ftag_tools-0.2.10.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
28
|
-
atlas_ftag_tools-0.2.10.dist-info/entry_points.txt,sha256=b46bVP_O8Mg6aSdPmyjGgVkaXSdyXZMeKAsofh2IDeA,133
|
29
|
-
atlas_ftag_tools-0.2.10.dist-info/top_level.txt,sha256=qiYQuKcAvMim-31FwkT3MTQu7WQm0s58tPAia5KKWqs,5
|
30
|
-
atlas_ftag_tools-0.2.10.dist-info/RECORD,,
|
File without changes
|