atlas-ftag-tools 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/PKG-INFO +2 -2
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/PKG-INFO +2 -2
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/__init__.py +1 -1
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/flavours.yaml +32 -5
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/mock.py +58 -17
- atlas_ftag_tools-0.2.9/ftag/wps/discriminant.py +84 -0
- atlas_ftag_tools-0.2.9/ftag/wps/working_points.py +547 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/pyproject.toml +2 -1
- atlas_ftag_tools-0.2.7/ftag/wps/discriminant.py +0 -131
- atlas_ftag_tools-0.2.7/ftag/wps/working_points.py +0 -316
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/MANIFEST.in +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/README.md +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/SOURCES.txt +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/dependency_links.txt +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/entry_points.txt +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/requires.txt +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/top_level.txt +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/cli_utils.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/cuts.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/flavours.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/git_check.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/hdf5/__init__.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5move.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5reader.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5split.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5utils.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5writer.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/labeller.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/labels.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/region.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/sample.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/track_selector.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/transform.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/vds.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/ftag/wps/__init__.py +0 -0
- {atlas_ftag_tools-0.2.7 → atlas_ftag_tools-0.2.9}/setup.cfg +0 -0
@@ -60,12 +60,24 @@
|
|
60
60
|
colour: tab:orange
|
61
61
|
category: single-btag-ghost
|
62
62
|
_px: pc
|
63
|
-
- name:
|
64
|
-
label:
|
65
|
-
cuts: ["HadronGhostTruthLabelID == 0"]
|
63
|
+
- name: ghostsjets
|
64
|
+
label: $s$-jets
|
65
|
+
cuts: ["HadronGhostTruthLabelID == 0", "PartonTruthLabelID == 3"]
|
66
|
+
colour: tab:red
|
67
|
+
category: single-btag-ghost
|
68
|
+
_px: ps
|
69
|
+
- name: ghostudjets
|
70
|
+
label: Light-quark-jets
|
71
|
+
cuts: ["HadronGhostTruthLabelID == 0", "PartonTruthLabelID <= 2"]
|
66
72
|
colour: tab:green
|
67
73
|
category: single-btag-ghost
|
68
|
-
_px:
|
74
|
+
_px: pud
|
75
|
+
- name: ghostgjets
|
76
|
+
label: Gluon-jets
|
77
|
+
cuts: ["HadronGhostTruthLabelID == 0", "PartonTruthLabelID == 21"]
|
78
|
+
colour: tab:gray
|
79
|
+
category: single-btag-ghost
|
80
|
+
_px: pg
|
69
81
|
- name: ghosttaujets
|
70
82
|
label: $\tau$-jets
|
71
83
|
cuts: ["HadronGhostTruthLabelID == 15"]
|
@@ -119,6 +131,21 @@
|
|
119
131
|
cuts: ["R10TruthLabel_R22v1 == 10", "GhostBHadronsFinalCount == 0", "GhostCHadronsFinalCount == 0"]
|
120
132
|
colour: "green"
|
121
133
|
category: xbb
|
134
|
+
- name: htauel
|
135
|
+
label: $H \rightarrow \tau e$
|
136
|
+
cuts: ["R10TruthLabel_R22v1 == 14"]
|
137
|
+
colour: "#b40612"
|
138
|
+
category: xbb
|
139
|
+
- name: htaumu
|
140
|
+
label: $H \rightarrow \tau\mu$
|
141
|
+
cuts: ["R10TruthLabel_R22v1 == 15"]
|
142
|
+
colour: "#b40657"
|
143
|
+
category: xbb
|
144
|
+
- name: htauhad
|
145
|
+
label: $H \rightarrow \tau\tau$
|
146
|
+
cuts: ["R10TruthLabel_R22v1 == 16"]
|
147
|
+
colour: "#b406a0"
|
148
|
+
category: xbb
|
122
149
|
|
123
150
|
# extended Xbb tagging
|
124
151
|
- name: tqqb
|
@@ -272,7 +299,7 @@
|
|
272
299
|
category: isolation
|
273
300
|
- name: npxall
|
274
301
|
label: non-prompt lepton
|
275
|
-
cuts: ["iffClass notin (2,3,4,11)"]
|
302
|
+
cuts: ["iffClass notin (0,1,2,3,4,11)"]
|
276
303
|
colour: "#264653"
|
277
304
|
category: isolation
|
278
305
|
- name: npxtau
|
@@ -54,33 +54,74 @@ TRACK_VARS = [
|
|
54
54
|
]
|
55
55
|
|
56
56
|
|
57
|
-
def softmax(x, axis=None):
|
57
|
+
def softmax(x: np.ndarray, axis: int | None = None) -> np.ndarray:
|
58
|
+
"""Softmax function for numpy arrays.
|
59
|
+
|
60
|
+
Parameters
|
61
|
+
----------
|
62
|
+
x : np.ndarray
|
63
|
+
Input array for the softmax
|
64
|
+
axis : int | None, optional
|
65
|
+
Axis along which the softmax is calculated, by default None
|
66
|
+
|
67
|
+
Returns
|
68
|
+
-------
|
69
|
+
np.ndarray
|
70
|
+
Output array with the softmax output
|
71
|
+
"""
|
58
72
|
e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
|
59
73
|
return e_x / e_x.sum(axis=axis, keepdims=True)
|
60
74
|
|
61
75
|
|
62
|
-
def get_mock_scores(labels: np.ndarray, is_xbb: bool = False):
|
63
|
-
means = [
|
64
|
-
[2, 0, 0, 0],
|
65
|
-
[0, 1, 0, 0],
|
66
|
-
[0, 0, 3.5, 0],
|
67
|
-
[0, 0, 0, 1],
|
68
|
-
]
|
76
|
+
def get_mock_scores(labels: np.ndarray, is_xbb: bool = False) -> np.ndarray:
|
69
77
|
if not is_xbb:
|
70
78
|
label_dict = {"u": 0, "c": 4, "b": 5, "tau": 15}
|
71
|
-
label_mapping = dict(zip(label_dict.values(), means))
|
72
|
-
else:
|
73
|
-
label_dict = {"hbb": 11, "hcc": 12, "top": 1, "qcd": 10}
|
74
|
-
label_mapping = dict(zip(label_dict.values(), means))
|
75
79
|
|
80
|
+
else:
|
81
|
+
label_dict = {
|
82
|
+
"hbb": 11,
|
83
|
+
"hcc": 12,
|
84
|
+
"top": 1,
|
85
|
+
"qcd": 10,
|
86
|
+
"htauel": 14,
|
87
|
+
"htaumu": 15,
|
88
|
+
"htauhad": 16,
|
89
|
+
}
|
90
|
+
|
91
|
+
# Set random seed
|
76
92
|
rng = np.random.default_rng(42)
|
77
|
-
|
78
|
-
|
79
|
-
|
93
|
+
|
94
|
+
# Set a list of possible means/scales
|
95
|
+
mean_scale_list = [1, 2, 2.5, 3.5]
|
96
|
+
|
97
|
+
# Get the number of classes
|
98
|
+
n_classes = len(label_dict)
|
99
|
+
|
100
|
+
# Init a scores array
|
101
|
+
scores = np.zeros((len(labels), n_classes))
|
102
|
+
|
103
|
+
# Generate means/scales
|
104
|
+
means = []
|
105
|
+
scales = []
|
106
|
+
for i in range(n_classes):
|
107
|
+
tmp_means = []
|
108
|
+
tmp_means = [
|
109
|
+
0 if j != i else mean_scale_list[np.random.randint(0, len(mean_scale_list))]
|
110
|
+
for j in range(n_classes)
|
111
|
+
]
|
112
|
+
means.append(tmp_means)
|
113
|
+
scales.append(mean_scale_list[np.random.randint(0, len(mean_scale_list))])
|
114
|
+
|
115
|
+
# Map the labels to the means
|
116
|
+
label_mapping = dict(zip(label_dict.values(), means))
|
117
|
+
|
118
|
+
# Generate random mock scores
|
80
119
|
for i, (label, count) in enumerate(zip(*np.unique(labels, return_counts=True))):
|
81
120
|
scores[labels == label] = rng.normal(
|
82
|
-
loc=label_mapping[label], scale=scales[i], size=(count,
|
121
|
+
loc=label_mapping[label], scale=scales[i], size=(count, n_classes)
|
83
122
|
)
|
123
|
+
|
124
|
+
# Pipe scores through softmax
|
84
125
|
scores = softmax(scores, axis=1)
|
85
126
|
name = "MockXbbTagger" if is_xbb else "MockTagger"
|
86
127
|
cols = [f"{name}_p{x}" for x in label_dict]
|
@@ -103,7 +144,7 @@ def mock_jets(num_jets=1000) -> np.ndarray:
|
|
103
144
|
jets["HadronConeExclTruthLabelID"] = rng.choice([0, 4, 5, 15], size=num_jets)
|
104
145
|
jets["GhostBHadronsFinalCount"] = rng.choice([0, 1, 2], size=num_jets)
|
105
146
|
jets["GhostCHadronsFinalCount"] = rng.choice([0, 1, 2], size=num_jets)
|
106
|
-
jets["R10TruthLabel_R22v1"] = rng.choice([1, 10, 11, 12], size=num_jets)
|
147
|
+
jets["R10TruthLabel_R22v1"] = rng.choice([1, 10, 11, 12, 14, 15, 16], size=num_jets)
|
107
148
|
scores = get_mock_scores(jets["HadronConeExclTruthLabelID"])
|
108
149
|
xbb_scores = get_mock_scores(jets["R10TruthLabel_R22v1"], is_xbb=True)
|
109
150
|
return join_structured_arrays([jets, scores, xbb_scores])
|
@@ -0,0 +1,84 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
|
7
|
+
if TYPE_CHECKING: # pragma: no cover
|
8
|
+
from ftag.labels import Label, LabelContainer
|
9
|
+
|
10
|
+
|
11
|
+
def get_discriminant(
|
12
|
+
jets: np.ndarray,
|
13
|
+
tagger: str,
|
14
|
+
signal: Label,
|
15
|
+
flavours: LabelContainer,
|
16
|
+
fraction_values: dict[str, float],
|
17
|
+
epsilon: float = 1e-10,
|
18
|
+
) -> np.ndarray:
|
19
|
+
"""Calculate the tagging discriminant for a given tagger.
|
20
|
+
|
21
|
+
Calculated as the logarithm of the ratio of a specified signal probability
|
22
|
+
to a weighted sum ofbackground probabilities.
|
23
|
+
|
24
|
+
Parameters
|
25
|
+
----------
|
26
|
+
jets : np.ndarray
|
27
|
+
Structured array of jets containing tagger outputs
|
28
|
+
tagger : str
|
29
|
+
Name of the tagger
|
30
|
+
signal : Label
|
31
|
+
Signal flavour (bjets/cjets or hbb/hcc)
|
32
|
+
fraction_values : dict
|
33
|
+
Dict with the fraction values for the background classes for the given tagger
|
34
|
+
epsilon : float, optional
|
35
|
+
Small number to avoid division by zero, by default 1e-10
|
36
|
+
|
37
|
+
Returns
|
38
|
+
-------
|
39
|
+
np.ndarray
|
40
|
+
Array of discriminant values.
|
41
|
+
|
42
|
+
Raises
|
43
|
+
------
|
44
|
+
ValueError
|
45
|
+
If the signal flavour is not recognised.
|
46
|
+
"""
|
47
|
+
# Init the denominator
|
48
|
+
denominator = 0.0
|
49
|
+
|
50
|
+
# Loop over background flavours
|
51
|
+
for flav in flavours:
|
52
|
+
# Skip signal flavour for denominator
|
53
|
+
if flav == signal:
|
54
|
+
continue
|
55
|
+
|
56
|
+
# Get the probability name of the tagger/flavour combo + fraction value
|
57
|
+
prob_name = f"{tagger}_{flav.px}"
|
58
|
+
fraction_value = fraction_values[flav.frac_str]
|
59
|
+
|
60
|
+
# If fraction_value for the given flavour is zero, skip it
|
61
|
+
if fraction_value == 0:
|
62
|
+
continue
|
63
|
+
|
64
|
+
# Check that the probability value for the flavour is available
|
65
|
+
if fraction_value > 0 and prob_name not in jets.dtype.names:
|
66
|
+
raise ValueError(
|
67
|
+
f"Nonzero fraction value for {flav.name}, but '{prob_name}' "
|
68
|
+
"not found in input array."
|
69
|
+
)
|
70
|
+
|
71
|
+
# Update denominator
|
72
|
+
denominator += jets[prob_name] * fraction_value if prob_name in jets.dtype.names else 0
|
73
|
+
|
74
|
+
# Calculate numerator
|
75
|
+
signal_field = f"{tagger}_{signal.px}"
|
76
|
+
|
77
|
+
# Check that the probability of the signal is available
|
78
|
+
if signal_field not in jets.dtype.names:
|
79
|
+
raise ValueError(
|
80
|
+
f"No signal probability value(s) found for tagger {tagger}. "
|
81
|
+
f"Missing variable: {signal_field}"
|
82
|
+
)
|
83
|
+
|
84
|
+
return np.log((jets[signal_field] + epsilon) / (denominator + epsilon))
|