atlas-ftag-tools 0.2.9__py3-none-any.whl → 0.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas_ftag_tools-0.2.11.dist-info/METADATA +53 -0
- atlas_ftag_tools-0.2.11.dist-info/RECORD +32 -0
- {atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.11.dist-info}/WHEEL +1 -1
- {atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.11.dist-info}/entry_points.txt +2 -1
- atlas_ftag_tools-0.2.11.dist-info/licenses/LICENSE +201 -0
- ftag/__init__.py +13 -12
- ftag/flavours.yaml +33 -12
- ftag/fraction_optimization.py +184 -0
- ftag/hdf5/__init__.py +5 -3
- ftag/hdf5/h5add_col.py +391 -0
- ftag/hdf5/h5writer.py +12 -1
- ftag/labels.py +10 -2
- ftag/utils/__init__.py +24 -0
- ftag/utils/logging.py +123 -0
- ftag/utils/metrics.py +431 -0
- ftag/vds.py +39 -4
- ftag/{wps/working_points.py → working_points.py} +1 -1
- atlas_ftag_tools-0.2.9.dist-info/METADATA +0 -150
- atlas_ftag_tools-0.2.9.dist-info/RECORD +0 -28
- ftag/wps/__init__.py +0 -0
- ftag/wps/discriminant.py +0 -84
- {atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.11.dist-info}/top_level.txt +0 -0
ftag/utils/metrics.py
ADDED
@@ -0,0 +1,431 @@
|
|
1
|
+
"""Tools for metrics module."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from scipy.ndimage import gaussian_filter1d
|
9
|
+
|
10
|
+
from ftag.utils import logger
|
11
|
+
|
12
|
+
if TYPE_CHECKING: # pragma: no cover
|
13
|
+
from ftag.labels import Label, LabelContainer
|
14
|
+
|
15
|
+
|
16
|
+
def save_divide(
|
17
|
+
numerator: np.ndarray | float,
|
18
|
+
denominator: np.ndarray | float,
|
19
|
+
default: float = 1.0,
|
20
|
+
):
|
21
|
+
"""Save divide for denominator equal to 0.
|
22
|
+
|
23
|
+
Division using numpy divide function returning default value in cases where
|
24
|
+
denominator is 0.
|
25
|
+
|
26
|
+
Parameters
|
27
|
+
----------
|
28
|
+
numerator: np.ndarray | float,
|
29
|
+
Numerator in the ratio calculation.
|
30
|
+
denominator: np.ndarray | float,
|
31
|
+
Denominator in the ratio calculation.
|
32
|
+
default: float
|
33
|
+
Default value which is returned if denominator is 0.
|
34
|
+
|
35
|
+
Returns
|
36
|
+
-------
|
37
|
+
ratio: np.ndarray | float
|
38
|
+
Result of the division
|
39
|
+
"""
|
40
|
+
logger.debug("Calculating save division.")
|
41
|
+
logger.debug("numerator: %s", numerator)
|
42
|
+
logger.debug("denominator: %s", denominator)
|
43
|
+
logger.debug("default: %s", default)
|
44
|
+
|
45
|
+
if isinstance(numerator, (int, float, np.number)) and isinstance(
|
46
|
+
denominator, (int, float, np.number)
|
47
|
+
):
|
48
|
+
output_shape = 1
|
49
|
+
else:
|
50
|
+
try:
|
51
|
+
output_shape = denominator.shape
|
52
|
+
except AttributeError:
|
53
|
+
output_shape = numerator.shape
|
54
|
+
|
55
|
+
ratio = np.divide(
|
56
|
+
numerator,
|
57
|
+
denominator,
|
58
|
+
out=np.ones(
|
59
|
+
output_shape,
|
60
|
+
dtype=float,
|
61
|
+
)
|
62
|
+
* default,
|
63
|
+
where=(denominator != 0),
|
64
|
+
)
|
65
|
+
if output_shape == 1:
|
66
|
+
return float(ratio)
|
67
|
+
return ratio
|
68
|
+
|
69
|
+
|
70
|
+
def weighted_percentile(
|
71
|
+
arr: np.ndarray,
|
72
|
+
percentile: np.ndarray,
|
73
|
+
weights: np.ndarray = None,
|
74
|
+
):
|
75
|
+
"""Calculate weighted percentile.
|
76
|
+
|
77
|
+
Implementation according to https://stackoverflow.com/a/29677616/11509698
|
78
|
+
(https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method)
|
79
|
+
|
80
|
+
Parameters
|
81
|
+
----------
|
82
|
+
arr : np.ndarray
|
83
|
+
Data array
|
84
|
+
percentile : np.ndarray
|
85
|
+
Percentile array
|
86
|
+
weights : np.ndarray
|
87
|
+
Weights array, by default None
|
88
|
+
|
89
|
+
Returns
|
90
|
+
-------
|
91
|
+
np.ndarray
|
92
|
+
Weighted percentile array
|
93
|
+
"""
|
94
|
+
logger.debug("Calculating weighted percentile.")
|
95
|
+
logger.debug("arr: %s", arr)
|
96
|
+
logger.debug("percentile: %s", percentile)
|
97
|
+
logger.debug("weights: %s", weights)
|
98
|
+
|
99
|
+
# Set weights to one if no weights are given
|
100
|
+
if weights is None:
|
101
|
+
weights = np.ones_like(arr)
|
102
|
+
|
103
|
+
# Set dtype to float64 if the weights are too large
|
104
|
+
dtype = np.float64 if np.sum(weights) > 1000000 else np.float32
|
105
|
+
|
106
|
+
# Get an array sorting and sort the array and the weights
|
107
|
+
ix = np.argsort(arr)
|
108
|
+
arr = arr[ix]
|
109
|
+
weights = weights[ix]
|
110
|
+
|
111
|
+
# Return the cumulative sum
|
112
|
+
cdf = np.cumsum(weights, dtype=dtype) - 0.5 * weights
|
113
|
+
cdf -= cdf[0]
|
114
|
+
cdf /= cdf[-1]
|
115
|
+
|
116
|
+
# Return the linear interpolation
|
117
|
+
return np.interp(percentile, cdf, arr)
|
118
|
+
|
119
|
+
|
120
|
+
def calculate_efficiency(
|
121
|
+
sig_disc: np.ndarray,
|
122
|
+
bkg_disc: np.ndarray,
|
123
|
+
target_eff: float | list | np.ndarray,
|
124
|
+
return_cuts: bool = False,
|
125
|
+
sig_weights: np.ndarray = None,
|
126
|
+
bkg_weights: np.ndarray = None,
|
127
|
+
):
|
128
|
+
"""Calculate efficiency.
|
129
|
+
|
130
|
+
Parameters
|
131
|
+
----------
|
132
|
+
sig_disc : np.ndarray
|
133
|
+
Signal discriminant
|
134
|
+
bkg_disc : np.ndarray
|
135
|
+
Background discriminant
|
136
|
+
target_eff : float or list or np.ndarray
|
137
|
+
Working point which is used for discriminant calculation
|
138
|
+
return_cuts : bool
|
139
|
+
Specifies if the cut values corresponding to the provided WPs are returned.
|
140
|
+
If target_eff is a float, only one cut value will be returned. If target_eff
|
141
|
+
is an array, target_eff is an array as well.
|
142
|
+
sig_weights : np.ndarray
|
143
|
+
Weights for signal events
|
144
|
+
bkg_weights : np.ndarray
|
145
|
+
Weights for background events
|
146
|
+
|
147
|
+
Returns
|
148
|
+
-------
|
149
|
+
eff : float or np.ndarray
|
150
|
+
Efficiency.
|
151
|
+
Return float if target_eff is a float, else np.ndarray
|
152
|
+
cutvalue : float or np.ndarray
|
153
|
+
Cutvalue if return_cuts is True.
|
154
|
+
Return float if target_eff is a float, else np.ndarray
|
155
|
+
"""
|
156
|
+
logger.debug("Calculating efficiency.")
|
157
|
+
logger.debug("sig_disc: %s", sig_disc)
|
158
|
+
logger.debug("bkg_disc: %s", bkg_disc)
|
159
|
+
logger.debug("target_eff: %s", target_eff)
|
160
|
+
logger.debug("return_cuts: %s", return_cuts)
|
161
|
+
logger.debug("sig_weights: %s", sig_weights)
|
162
|
+
logger.debug("bkg_weights: %s", bkg_weights)
|
163
|
+
|
164
|
+
# float | np.ndarray for both target_eff and the returned values
|
165
|
+
return_float = False
|
166
|
+
if isinstance(target_eff, float):
|
167
|
+
return_float = True
|
168
|
+
|
169
|
+
# Flatten the target efficiencies
|
170
|
+
target_eff = np.asarray([target_eff]).flatten()
|
171
|
+
|
172
|
+
# Get the cutvalue for the given target efficiency
|
173
|
+
cutvalue = weighted_percentile(arr=sig_disc, percentile=1.0 - target_eff, weights=sig_weights)
|
174
|
+
|
175
|
+
# Sort the cutvalues to get the correct order
|
176
|
+
sorted_args = np.argsort(1 - target_eff)
|
177
|
+
|
178
|
+
# Get the histogram for the backgrounds
|
179
|
+
hist, _ = np.histogram(bkg_disc, (-np.inf, *cutvalue[sorted_args], np.inf), weights=bkg_weights)
|
180
|
+
|
181
|
+
# Calculate the efficiencies for the calculated cut values
|
182
|
+
eff = hist[::-1].cumsum()[-2::-1] / hist.sum()
|
183
|
+
eff = eff[sorted_args]
|
184
|
+
|
185
|
+
# Ensure that a float is returned if float was given
|
186
|
+
if return_float:
|
187
|
+
eff = eff[0]
|
188
|
+
cutvalue = cutvalue[0]
|
189
|
+
|
190
|
+
# Also return the cuts if wanted
|
191
|
+
if return_cuts:
|
192
|
+
return eff, cutvalue
|
193
|
+
|
194
|
+
return eff
|
195
|
+
|
196
|
+
|
197
|
+
def calculate_rejection(
|
198
|
+
sig_disc: np.ndarray,
|
199
|
+
bkg_disc: np.ndarray,
|
200
|
+
target_eff,
|
201
|
+
return_cuts: bool = False,
|
202
|
+
sig_weights: np.ndarray = None,
|
203
|
+
bkg_weights: np.ndarray = None,
|
204
|
+
smooth: bool = False,
|
205
|
+
):
|
206
|
+
"""Calculate rejection.
|
207
|
+
|
208
|
+
Parameters
|
209
|
+
----------
|
210
|
+
sig_disc : np.ndarray
|
211
|
+
Signal discriminant
|
212
|
+
bkg_disc : np.ndarray
|
213
|
+
Background discriminant
|
214
|
+
target_eff : float or list
|
215
|
+
Working point which is used for discriminant calculation
|
216
|
+
return_cuts : bool
|
217
|
+
Specifies if the cut values corresponding to the provided WPs are returned.
|
218
|
+
If target_eff is a float, only one cut value will be returned. If target_eff
|
219
|
+
is an array, target_eff is an array as well.
|
220
|
+
sig_weights : np.ndarray
|
221
|
+
Weights for signal events, by default None
|
222
|
+
bkg_weights : np.ndarray
|
223
|
+
Weights for background events, by default None
|
224
|
+
|
225
|
+
Returns
|
226
|
+
-------
|
227
|
+
rej : float or np.ndarray
|
228
|
+
Rejection.
|
229
|
+
If target_eff is a float, a float is returned if it's a list a np.ndarray
|
230
|
+
cut_value : float or np.ndarray
|
231
|
+
Cutvalue if return_cuts is True.
|
232
|
+
If target_eff is a float, a float is returned if it's a list a np.ndarray
|
233
|
+
"""
|
234
|
+
logger.debug("Calculating rejection.")
|
235
|
+
logger.debug("sig_disc: %s", sig_disc)
|
236
|
+
logger.debug("bkg_disc: %s", bkg_disc)
|
237
|
+
logger.debug("target_eff: %s", target_eff)
|
238
|
+
logger.debug("return_cuts: %s", return_cuts)
|
239
|
+
logger.debug("sig_weights: %s", sig_weights)
|
240
|
+
logger.debug("bkg_weights: %s", bkg_weights)
|
241
|
+
logger.debug("smooth: %s", smooth)
|
242
|
+
|
243
|
+
# Calculate the efficiency
|
244
|
+
eff = calculate_efficiency(
|
245
|
+
sig_disc=sig_disc,
|
246
|
+
bkg_disc=bkg_disc,
|
247
|
+
target_eff=target_eff,
|
248
|
+
return_cuts=return_cuts,
|
249
|
+
sig_weights=sig_weights,
|
250
|
+
bkg_weights=bkg_weights,
|
251
|
+
)
|
252
|
+
|
253
|
+
# Invert the efficiency to get a rejection
|
254
|
+
rej = save_divide(1, eff[0] if return_cuts else eff, np.inf)
|
255
|
+
|
256
|
+
# Smooth out the rejection if wanted
|
257
|
+
if smooth:
|
258
|
+
rej = gaussian_filter1d(rej, sigma=1, radius=2, mode="nearest")
|
259
|
+
|
260
|
+
# Return also the cut values if wanted
|
261
|
+
if return_cuts:
|
262
|
+
return rej, eff[1]
|
263
|
+
|
264
|
+
return rej
|
265
|
+
|
266
|
+
|
267
|
+
def calculate_efficiency_error(
|
268
|
+
arr: np.ndarray,
|
269
|
+
n_counts: int,
|
270
|
+
suppress_zero_divison_error: bool = False,
|
271
|
+
norm: bool = False,
|
272
|
+
) -> np.ndarray:
|
273
|
+
"""Calculate statistical efficiency uncertainty.
|
274
|
+
|
275
|
+
Parameters
|
276
|
+
----------
|
277
|
+
arr : numpy.array
|
278
|
+
Efficiency values
|
279
|
+
n_counts : int
|
280
|
+
Number of used statistics to calculate efficiency
|
281
|
+
suppress_zero_divison_error : bool
|
282
|
+
Not raising Error for zero division
|
283
|
+
norm : bool, optional
|
284
|
+
If True, normed (relative) error is being calculated, by default False
|
285
|
+
|
286
|
+
Returns
|
287
|
+
-------
|
288
|
+
numpy.array
|
289
|
+
Efficiency uncertainties
|
290
|
+
|
291
|
+
Raises
|
292
|
+
------
|
293
|
+
ValueError
|
294
|
+
If n_counts <=0
|
295
|
+
|
296
|
+
Notes
|
297
|
+
-----
|
298
|
+
This method uses binomial errors as described in section 2.2 of
|
299
|
+
https://inspirehep.net/files/57287ac8e45a976ab423f3dd456af694
|
300
|
+
"""
|
301
|
+
logger.debug("Calculating efficiency error.")
|
302
|
+
logger.debug("arr: %s", arr)
|
303
|
+
logger.debug("n_counts: %i", n_counts)
|
304
|
+
logger.debug("suppress_zero_divison_error: %s", suppress_zero_divison_error)
|
305
|
+
logger.debug("norm: %s", norm)
|
306
|
+
if np.any(n_counts <= 0) and not suppress_zero_divison_error:
|
307
|
+
raise ValueError(f"You passed as argument `N` {n_counts} but it has to be larger 0.")
|
308
|
+
if norm:
|
309
|
+
return np.sqrt(arr * (1 - arr) / n_counts) / arr
|
310
|
+
return np.sqrt(arr * (1 - arr) / n_counts)
|
311
|
+
|
312
|
+
|
313
|
+
def calculate_rejection_error(
|
314
|
+
arr: np.ndarray,
|
315
|
+
n_counts: int,
|
316
|
+
norm: bool = False,
|
317
|
+
) -> np.ndarray:
|
318
|
+
"""Calculate the rejection uncertainties.
|
319
|
+
|
320
|
+
Parameters
|
321
|
+
----------
|
322
|
+
arr : numpy.array
|
323
|
+
Rejection values
|
324
|
+
n_counts : int
|
325
|
+
Number of used statistics to calculate rejection
|
326
|
+
norm : bool, optional
|
327
|
+
If True, normed (relative) error is being calculated, by default False
|
328
|
+
|
329
|
+
Returns
|
330
|
+
-------
|
331
|
+
numpy.array
|
332
|
+
Rejection uncertainties
|
333
|
+
|
334
|
+
Raises
|
335
|
+
------
|
336
|
+
ValueError
|
337
|
+
If n_counts <=0
|
338
|
+
ValueError
|
339
|
+
If any rejection value is 0
|
340
|
+
|
341
|
+
Notes
|
342
|
+
-----
|
343
|
+
Special case of `eff_err()`
|
344
|
+
"""
|
345
|
+
logger.debug("Calculating rejection error.")
|
346
|
+
logger.debug("arr: %s", arr)
|
347
|
+
logger.debug("n_counts: %i", n_counts)
|
348
|
+
logger.debug("norm: %s", norm)
|
349
|
+
if np.any(n_counts <= 0):
|
350
|
+
raise ValueError(f"You passed as argument `n_counts` {n_counts} but it has to be larger 0.")
|
351
|
+
if np.any(arr == 0):
|
352
|
+
raise ValueError("One rejection value is 0, cannot calculate error.")
|
353
|
+
if norm:
|
354
|
+
return np.power(arr, 2) * calculate_efficiency_error(1 / arr, n_counts) / arr
|
355
|
+
return np.power(arr, 2) * calculate_efficiency_error(1 / arr, n_counts)
|
356
|
+
|
357
|
+
|
358
|
+
def get_discriminant(
|
359
|
+
jets: np.ndarray,
|
360
|
+
tagger: str,
|
361
|
+
signal: Label,
|
362
|
+
flavours: LabelContainer,
|
363
|
+
fraction_values: dict[str, float],
|
364
|
+
epsilon: float = 1e-10,
|
365
|
+
) -> np.ndarray:
|
366
|
+
"""Calculate the tagging discriminant for a given tagger.
|
367
|
+
|
368
|
+
Calculated as the logarithm of the ratio of a specified signal probability
|
369
|
+
to a weighted sum ofbackground probabilities.
|
370
|
+
|
371
|
+
Parameters
|
372
|
+
----------
|
373
|
+
jets : np.ndarray
|
374
|
+
Structured array of jets containing tagger outputs
|
375
|
+
tagger : str
|
376
|
+
Name of the tagger
|
377
|
+
signal : Label
|
378
|
+
Signal flavour (bjets/cjets or hbb/hcc)
|
379
|
+
fraction_values : dict
|
380
|
+
Dict with the fraction values for the background classes for the given tagger
|
381
|
+
epsilon : float, optional
|
382
|
+
Small number to avoid division by zero, by default 1e-10
|
383
|
+
|
384
|
+
Returns
|
385
|
+
-------
|
386
|
+
np.ndarray
|
387
|
+
Array of discriminant values.
|
388
|
+
|
389
|
+
Raises
|
390
|
+
------
|
391
|
+
ValueError
|
392
|
+
If the signal flavour is not recognised.
|
393
|
+
"""
|
394
|
+
# Init the denominator
|
395
|
+
denominator = 0.0
|
396
|
+
|
397
|
+
# Loop over background flavours
|
398
|
+
for flav in flavours:
|
399
|
+
# Skip signal flavour for denominator
|
400
|
+
if flav == signal:
|
401
|
+
continue
|
402
|
+
|
403
|
+
# Get the probability name of the tagger/flavour combo + fraction value
|
404
|
+
prob_name = f"{tagger}_{flav.px}"
|
405
|
+
fraction_value = fraction_values[flav.frac_str]
|
406
|
+
|
407
|
+
# If fraction_value for the given flavour is zero, skip it
|
408
|
+
if fraction_value == 0:
|
409
|
+
continue
|
410
|
+
|
411
|
+
# Check that the probability value for the flavour is available
|
412
|
+
if fraction_value > 0 and prob_name not in jets.dtype.names:
|
413
|
+
raise ValueError(
|
414
|
+
f"Nonzero fraction value for {flav.name}, but '{prob_name}' "
|
415
|
+
"not found in input array."
|
416
|
+
)
|
417
|
+
|
418
|
+
# Update denominator
|
419
|
+
denominator += jets[prob_name] * fraction_value if prob_name in jets.dtype.names else 0
|
420
|
+
|
421
|
+
# Calculate numerator
|
422
|
+
signal_field = f"{tagger}_{signal.px}"
|
423
|
+
|
424
|
+
# Check that the probability of the signal is available
|
425
|
+
if signal_field not in jets.dtype.names:
|
426
|
+
raise ValueError(
|
427
|
+
f"No signal probability value(s) found for tagger {tagger}. "
|
428
|
+
f"Missing variable: {signal_field}"
|
429
|
+
)
|
430
|
+
|
431
|
+
return np.log((jets[signal_field] + epsilon) / (denominator + epsilon))
|
ftag/vds.py
CHANGED
@@ -2,6 +2,9 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import argparse
|
4
4
|
import glob
|
5
|
+
import os
|
6
|
+
import re
|
7
|
+
import sys
|
5
8
|
from pathlib import Path
|
6
9
|
|
7
10
|
import h5py
|
@@ -13,6 +16,8 @@ def parse_args(args):
|
|
13
16
|
)
|
14
17
|
parser.add_argument("pattern", type=Path, help="quotes-enclosed glob pattern of files to merge")
|
15
18
|
parser.add_argument("output", type=Path, help="path to output virtual file")
|
19
|
+
parser.add_argument("--use_regex", help="if provided pattern is a regex", action="store_true")
|
20
|
+
parser.add_argument("--regex_path", type=str, required="--regex" in sys.argv, default=None)
|
16
21
|
return parser.parse_args(args)
|
17
22
|
|
18
23
|
|
@@ -43,13 +48,36 @@ def get_virtual_layout(fnames: list[str], group: str):
|
|
43
48
|
return layout
|
44
49
|
|
45
50
|
|
51
|
+
def glob_re(pattern, regex_path):
|
52
|
+
return list(filter(re.compile(pattern).match, os.listdir(regex_path)))
|
53
|
+
|
54
|
+
|
55
|
+
def regex_files_from_dir(reg_matched_fnames, regex_path):
|
56
|
+
parent_dir = regex_path or str(Path.cwd())
|
57
|
+
full_paths = [parent_dir + "/" + fname for fname in reg_matched_fnames]
|
58
|
+
paths_to_glob = [fname + "/*.h5" if Path(fname).is_dir() else fname for fname in full_paths]
|
59
|
+
nested_fnames = [glob.glob(fname) for fname in paths_to_glob]
|
60
|
+
return sum(nested_fnames, [])
|
61
|
+
|
62
|
+
|
46
63
|
def create_virtual_file(
|
47
|
-
pattern: Path | str,
|
64
|
+
pattern: Path | str,
|
65
|
+
out_fname: Path | None = None,
|
66
|
+
use_regex: bool = False,
|
67
|
+
regex_path: str | None = None,
|
68
|
+
overwrite: bool = False,
|
48
69
|
):
|
49
70
|
# get list of filenames
|
50
|
-
|
71
|
+
pattern_str = str(pattern)
|
72
|
+
if use_regex:
|
73
|
+
reg_matched_fnames = glob_re(pattern_str, regex_path)
|
74
|
+
print("reg matched fnames: ", reg_matched_fnames)
|
75
|
+
fnames = regex_files_from_dir(reg_matched_fnames, regex_path)
|
76
|
+
else:
|
77
|
+
fnames = glob.glob(pattern_str)
|
51
78
|
if not fnames:
|
52
79
|
raise FileNotFoundError(f"No files matched pattern {pattern}")
|
80
|
+
print("Files to merge to vds: ", fnames)
|
53
81
|
|
54
82
|
# infer output path if not given
|
55
83
|
if out_fname is None:
|
@@ -94,8 +122,15 @@ def create_virtual_file(
|
|
94
122
|
|
95
123
|
def main(args=None) -> None:
|
96
124
|
args = parse_args(args)
|
97
|
-
|
98
|
-
|
125
|
+
matching_mode = "Applying regex to" if args.use_regex else "Globbing"
|
126
|
+
print(f"{matching_mode} {args.pattern}...")
|
127
|
+
create_virtual_file(
|
128
|
+
args.pattern,
|
129
|
+
args.output,
|
130
|
+
use_regex=args.use_regex,
|
131
|
+
regex_path=args.regex_path,
|
132
|
+
overwrite=True,
|
133
|
+
)
|
99
134
|
with h5py.File(args.output) as f:
|
100
135
|
key = next(iter(f.keys()))
|
101
136
|
num = len(f[key])
|
@@ -14,7 +14,7 @@ from ftag import Flavours
|
|
14
14
|
from ftag.cli_utils import HelpFormatter
|
15
15
|
from ftag.cuts import Cuts
|
16
16
|
from ftag.hdf5 import H5Reader
|
17
|
-
from ftag.
|
17
|
+
from ftag.utils import get_discriminant
|
18
18
|
|
19
19
|
if TYPE_CHECKING: # pragma: no cover
|
20
20
|
from collections.abc import Sequence
|
@@ -1,150 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.2
|
2
|
-
Name: atlas-ftag-tools
|
3
|
-
Version: 0.2.9
|
4
|
-
Summary: ATLAS Flavour Tagging Tools
|
5
|
-
Author: Sam Van Stroud, Philipp Gadow
|
6
|
-
License: MIT
|
7
|
-
Project-URL: Homepage, https://github.com/umami-hep/atlas-ftag-tools/
|
8
|
-
Requires-Python: <3.12,>=3.8
|
9
|
-
Description-Content-Type: text/markdown
|
10
|
-
Requires-Dist: h5py>=3.0
|
11
|
-
Requires-Dist: numpy
|
12
|
-
Requires-Dist: PyYAML>=5.1
|
13
|
-
Provides-Extra: dev
|
14
|
-
Requires-Dist: ruff==0.6.2; extra == "dev"
|
15
|
-
Requires-Dist: mypy==1.11.2; extra == "dev"
|
16
|
-
Requires-Dist: pre-commit==3.1.1; extra == "dev"
|
17
|
-
Requires-Dist: pytest==7.2.2; extra == "dev"
|
18
|
-
Requires-Dist: pytest-cov==4.0.0; extra == "dev"
|
19
|
-
Requires-Dist: pytest_notebook==0.10.0; extra == "dev"
|
20
|
-
Requires-Dist: ipykernel==6.21.3; extra == "dev"
|
21
|
-
|
22
|
-
[](https://github.com/psf/black)
|
23
|
-
[](https://badge.fury.io/py/atlas-ftag-tools)
|
24
|
-
[](https://codecov.io/gh/umami-hep/atlas-ftag-tools)
|
25
|
-
|
26
|
-
# ATLAS FTAG Python Tools
|
27
|
-
|
28
|
-
This is a collection of Python tools for working with files produced with the FTAG [ntuple dumper](https://gitlab.cern.ch/atlas-flavor-tagging-tools/training-dataset-dumper/).
|
29
|
-
The code is intended to be used a [library](https://iscinumpy.dev/post/app-vs-library/) for other projects.
|
30
|
-
Please see the [example notebook](ftag/example.ipynb) for usage.
|
31
|
-
|
32
|
-
# Quickstart
|
33
|
-
|
34
|
-
## Installation
|
35
|
-
|
36
|
-
If you want to use this package without modification, you can install from [pypi](https://pypi.org/project/atlas-ftag-tools/) using `pip`.
|
37
|
-
|
38
|
-
```bash
|
39
|
-
pip install atlas-ftag-tools
|
40
|
-
```
|
41
|
-
|
42
|
-
To additionally install the development dependencies (for formatting and linting) use
|
43
|
-
```bash
|
44
|
-
pip install atlas-ftag-tools[dev]
|
45
|
-
```
|
46
|
-
|
47
|
-
## Development
|
48
|
-
|
49
|
-
If you plan on making changes to teh code, instead clone the repository and install the package from source in editable mode with
|
50
|
-
|
51
|
-
```bash
|
52
|
-
python -m pip install -e .
|
53
|
-
```
|
54
|
-
|
55
|
-
Include development dependencies with
|
56
|
-
|
57
|
-
```bash
|
58
|
-
python -m pip install -e ".[dev]"
|
59
|
-
```
|
60
|
-
|
61
|
-
You can set up and run pre-commit hooks with
|
62
|
-
|
63
|
-
```bash
|
64
|
-
pre-commit install
|
65
|
-
pre-commmit run --all-files
|
66
|
-
```
|
67
|
-
|
68
|
-
To run the tests you can use the `pytest` or `coverage` command, for example
|
69
|
-
|
70
|
-
```bash
|
71
|
-
coverage run --source ftag -m pytest --show-capture=stdout
|
72
|
-
```
|
73
|
-
|
74
|
-
Running `coverage report` will display the test coverage.
|
75
|
-
|
76
|
-
|
77
|
-
# Usage
|
78
|
-
|
79
|
-
Please see the [example notebook](ftag/example.ipynb) for full usage.
|
80
|
-
Additional functionality is also documented below.
|
81
|
-
|
82
|
-
## Calculate WPs
|
83
|
-
|
84
|
-
This package contains a script to calculate tagger working points (WPs).
|
85
|
-
The script is `working_points.py` and can be run after installing this package with
|
86
|
-
|
87
|
-
```
|
88
|
-
wps \
|
89
|
-
--ttbar "path/to/ttbar/*.h5" \
|
90
|
-
--tagger GN2v01 \
|
91
|
-
--fc 0.1
|
92
|
-
```
|
93
|
-
|
94
|
-
Both the `--tagger` and `--fc` options accept a list if you want to get the WPs for multiple taggers.
|
95
|
-
If you are doing c-tagging or xbb-tagging, dedicated fx arguments are available ()you can find them all with `-h`.
|
96
|
-
|
97
|
-
If you want to use the `ttbar` WPs get the efficiencies and rejections for the `zprime` sample, you can add `--zprime "path/to/zprime/*.h5"` to the command.
|
98
|
-
Note that a default selection of $p_T > 250 ~GeV$ to jets in the `zprime` sample.
|
99
|
-
|
100
|
-
If instead of defining the working points for a series of signal efficiencies, you wish to calculate a WP corresponding to a specific background rejection, the `--rejection` option can be given along with the desired background.
|
101
|
-
|
102
|
-
By default the working points are printed to the terminal, but you can save the results to a YAML file with the `--outfile` option.
|
103
|
-
|
104
|
-
See `wps --help` for more options and information.
|
105
|
-
|
106
|
-
## Calculate efficiency at discriminant cut
|
107
|
-
|
108
|
-
The same script can be used to calculate the efficiency and rejection values at a given discriminant cut value.
|
109
|
-
The script `working_points.py` can be run after intalling this package as follows
|
110
|
-
|
111
|
-
```
|
112
|
-
wps \
|
113
|
-
--ttbar "path/to/ttbar/*.h5" \
|
114
|
-
--tagger GN2v01 \
|
115
|
-
--fx 0.1
|
116
|
-
--disc_cuts 1.0 1.5
|
117
|
-
```
|
118
|
-
The `--tagger`, `--fx`, and `--outfile` follow the same procedure as in the 'Calculate WPs' script as described above.
|
119
|
-
|
120
|
-
## H5 Utils
|
121
|
-
|
122
|
-
### Create virtual file
|
123
|
-
|
124
|
-
This package contains a script to easily merge a set of H5 files.
|
125
|
-
A virtual file is a fast and lightweight way to wrap a set of files.
|
126
|
-
See the [h5py documentation](https://docs.h5py.org/en/stable/vds.html) for more information on virtual datasets.
|
127
|
-
|
128
|
-
The script is `vds.py` and can be run after installing this package with
|
129
|
-
|
130
|
-
```
|
131
|
-
vds <pattern> <output path>
|
132
|
-
```
|
133
|
-
|
134
|
-
The `<pattern>` argument should be a quotes enclosed [glob pattern](https://en.wikipedia.org/wiki/Glob_(programming)), for example `"dsid/path/*.h5"`
|
135
|
-
|
136
|
-
See `vds --help` for more options and information.
|
137
|
-
|
138
|
-
|
139
|
-
### [h5move](ftag/hdf5/h5move.py)
|
140
|
-
|
141
|
-
A script to move/rename datasets inside an h5file.
|
142
|
-
Useful for correcting discrepancies between group names.
|
143
|
-
See [h5move.py](ftag/hdf5/h5move.py) for more info.
|
144
|
-
|
145
|
-
|
146
|
-
### [h5split](ftag/hdf5/h5split.py)
|
147
|
-
|
148
|
-
A script to split a large h5 file into several smaller files.
|
149
|
-
Useful if output files are too large for EOS/grid storage.
|
150
|
-
See [h5split.py](ftag/hdf5/h5split.py) for more info.
|
@@ -1,28 +0,0 @@
|
|
1
|
-
ftag/__init__.py,sha256=YRug5UslRbNoQACbEhdenDS6wXmsmeLjlz4JaKP6eHs,737
|
2
|
-
ftag/cli_utils.py,sha256=w3TtQmUHSyAKChS3ewvOtcSDAUJAZGIIomaNi8f446U,298
|
3
|
-
ftag/cuts.py,sha256=9_ooLZHaO3SnIQBNxwbaPZn-qptGdKnB27FdKQGTiTY,2933
|
4
|
-
ftag/flavours.py,sha256=ShH4M2UjQZpZ_NlCctTm2q1tJbzYxjmGteioQ2GcqEU,114
|
5
|
-
ftag/flavours.yaml,sha256=87xBvLkMDkicuRMaXtxcao8gjEAgvlTbgjAzpvx4YFM,9021
|
6
|
-
ftag/git_check.py,sha256=Y-XqM80CVXZ5ZKrDdZcYOJt3X64uU6W3OP6Z0D7AZU0,1663
|
7
|
-
ftag/labeller.py,sha256=IXUgU9UBir39PxVWRKs5r5fqI66Tv0x7nJD3-RYpbrg,2780
|
8
|
-
ftag/labels.py,sha256=C7IylPTnc32dFXq8C2Ks2wuljYK3WaY2EsPLGrhtXy8,3932
|
9
|
-
ftag/mock.py,sha256=P2D7nNKAz2jRBbmfpHTDj9sBVU9r7HGd0rpWZOJYZ90,5980
|
10
|
-
ftag/region.py,sha256=ANv0dGI2W6NJqD9fp7EfqAUReH4FOjc1gwl_Qn8llcM,360
|
11
|
-
ftag/sample.py,sha256=3N0FrRcu9l1sX8ohuGOHuMYGD0See6gMO4--7NzR2tE,2538
|
12
|
-
ftag/track_selector.py,sha256=fJNk_kIBQriBqV4CPT_3ReJbOUnavDDzO-u3EQlRuyk,2654
|
13
|
-
ftag/transform.py,sha256=uEGGJSnqoKOzLYQv650XdK_kDNw4Aw-5dc60z9Dp_y0,3963
|
14
|
-
ftag/vds.py,sha256=nRViQZQIORB95nC7NZsW3KsSoGkLzEdOsuCViH5h8-U,3296
|
15
|
-
ftag/hdf5/__init__.py,sha256=LFDNxVOCp58SvLHwQhdT68Q-KBMS_i6jBrbXoRpHzbM,354
|
16
|
-
ftag/hdf5/h5move.py,sha256=oYpRu0IDCIJIQ2ML52HBAdoyDxmKkHTeM9JdbPEgKfI,947
|
17
|
-
ftag/hdf5/h5reader.py,sha256=i31pDAqmOSaxdeRhc4iSBlld8xJ0pmp4rNd7CugNzw0,13706
|
18
|
-
ftag/hdf5/h5split.py,sha256=4Wy6Xc3J58MdD9aBaSZHf5ZcVFnJSkWsm42R5Pgo-R4,2448
|
19
|
-
ftag/hdf5/h5utils.py,sha256=-4zKTMtNCrDZr_9Ww7uzfsB7M7muBKpmm_1IkKJnHOI,3222
|
20
|
-
ftag/hdf5/h5writer.py,sha256=9FkClV__UbBqmFsq_h2jwiZnbWVm8QFRL_4mDZZBbTs,5316
|
21
|
-
ftag/wps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
ftag/wps/discriminant.py,sha256=GKa0zZlLREdm0mCYSbcWXITYe3VEn3PXOBQiPg5WvgM,2521
|
23
|
-
ftag/wps/working_points.py,sha256=jXyikB-bf73EaYFkngjE977-Ytvb9nDTqIdHxWW6WQQ,15960
|
24
|
-
atlas_ftag_tools-0.2.9.dist-info/METADATA,sha256=lXC-e0iHMDtvJH8h3i7PcCEKh4_CFz5vlqdGXKSEoV4,5153
|
25
|
-
atlas_ftag_tools-0.2.9.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
26
|
-
atlas_ftag_tools-0.2.9.dist-info/entry_points.txt,sha256=LfVLsZHQolqbPnwPgtmc5IQTh527BKkN2v-IpXWTNHw,137
|
27
|
-
atlas_ftag_tools-0.2.9.dist-info/top_level.txt,sha256=qiYQuKcAvMim-31FwkT3MTQu7WQm0s58tPAia5KKWqs,5
|
28
|
-
atlas_ftag_tools-0.2.9.dist-info/RECORD,,
|
ftag/wps/__init__.py
DELETED
File without changes
|