PyPI - atlas-ftag-tools - Versions diffs - 0.2.8__tar.gz → 0.2.9__tar.gz - Mend

atlas-ftag-tools 0.2.8tar.gz → 0.2.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: atlas-ftag-tools
-Version: 0.2.8
+Version: 0.2.9
 Summary: ATLAS Flavour Tagging Tools
 Author: Sam Van Stroud, Philipp Gadow
 License: MIT

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: atlas-ftag-tools
-Version: 0.2.8
+Version: 0.2.9
 Summary: ATLAS Flavour Tagging Tools
 Author: Sam Van Stroud, Philipp Gadow
 License: MIT

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/__init__.py RENAMED Viewed

@@ -2,7 +2,7 @@
 from __future__ import annotations
-__version__ = "v0.2.8"
+__version__ = "v0.2.9"
 from ftag import hdf5
 from ftag.cuts import Cuts

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/flavours.yaml RENAMED Viewed

@@ -60,12 +60,24 @@
   colour: tab:orange
   category: single-btag-ghost
   _px: pc
-- name: ghostujets
-  label: Light-jets
-  cuts: ["HadronGhostTruthLabelID == 0"]
+- name: ghostsjets
+  label: $s$-jets
+  cuts: ["HadronGhostTruthLabelID == 0", "PartonTruthLabelID == 3"]
+  colour: tab:red
+  category: single-btag-ghost
+  _px: ps
+- name: ghostudjets
+  label: Light-quark-jets
+  cuts: ["HadronGhostTruthLabelID == 0", "PartonTruthLabelID <= 2"]
   colour: tab:green
   category: single-btag-ghost
-  _px: pu
+  _px: pud
+- name: ghostgjets
+  label: Gluon-jets
+  cuts: ["HadronGhostTruthLabelID == 0", "PartonTruthLabelID == 21"]
+  colour: tab:gray
+  category: single-btag-ghost
+  _px: pg
 - name: ghosttaujets
   label: $\tau$-jets
   cuts: ["HadronGhostTruthLabelID == 15"]
@@ -119,6 +131,21 @@
   cuts: ["R10TruthLabel_R22v1 == 10", "GhostBHadronsFinalCount == 0", "GhostCHadronsFinalCount == 0"]
   colour: "green"
   category: xbb
+- name: htauel
+  label: $H \rightarrow \tau e$
+  cuts: ["R10TruthLabel_R22v1 == 14"]
+  colour: "#b40612"
+  category: xbb
+- name: htaumu
+  label: $H \rightarrow \tau\mu$
+  cuts: ["R10TruthLabel_R22v1 == 15"]
+  colour: "#b40657"
+  category: xbb
+- name: htauhad
+  label: $H \rightarrow \tau\tau$
+  cuts: ["R10TruthLabel_R22v1 == 16"]
+  colour: "#b406a0"
+  category: xbb
 # extended Xbb tagging
 - name: tqqb

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/mock.py RENAMED Viewed

@@ -54,33 +54,74 @@ TRACK_VARS = [
 ]
-def softmax(x, axis=None):
+def softmax(x: np.ndarray, axis: int | None = None) -> np.ndarray:
+    """Softmax function for numpy arrays.
+    Parameters
+    ----------
+    x : np.ndarray
+        Input array for the softmax
+    axis : int | None, optional
+        Axis along which the softmax is calculated, by default None
+    Returns
+    -------
+    np.ndarray
+        Output array with the softmax output
+    """
     e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
     return e_x / e_x.sum(axis=axis, keepdims=True)
-def get_mock_scores(labels: np.ndarray, is_xbb: bool = False):
-    means = [
-        [2, 0, 0, 0],
-        [0, 1, 0, 0],
-        [0, 0, 3.5, 0],
-        [0, 0, 0, 1],
-    ]
+def get_mock_scores(labels: np.ndarray, is_xbb: bool = False) -> np.ndarray:
     if not is_xbb:
         label_dict = {"u": 0, "c": 4, "b": 5, "tau": 15}
-        label_mapping = dict(zip(label_dict.values(), means))
-    else:
-        label_dict = {"hbb": 11, "hcc": 12, "top": 1, "qcd": 10}
-        label_mapping = dict(zip(label_dict.values(), means))
+    else:
+        label_dict = {
+            "hbb": 11,
+            "hcc": 12,
+            "top": 1,
+            "qcd": 10,
+            "htauel": 14,
+            "htaumu": 15,
+            "htauhad": 16,
+        }
+    # Set random seed
     rng = np.random.default_rng(42)
-    nclass = len(label_dict)
-    scores = np.zeros((len(labels), nclass))
-    scales = [1, 2.5, 5, 1]
+    # Set a list of possible means/scales
+    mean_scale_list = [1, 2, 2.5, 3.5]
+    # Get the number of classes
+    n_classes = len(label_dict)
+    # Init a scores array
+    scores = np.zeros((len(labels), n_classes))
+    # Generate means/scales
+    means = []
+    scales = []
+    for i in range(n_classes):
+        tmp_means = []
+        tmp_means = [
+            0 if j != i else mean_scale_list[np.random.randint(0, len(mean_scale_list))]
+            for j in range(n_classes)
+        ]
+        means.append(tmp_means)
+        scales.append(mean_scale_list[np.random.randint(0, len(mean_scale_list))])
+    # Map the labels to the means
+    label_mapping = dict(zip(label_dict.values(), means))
+    # Generate random mock scores
     for i, (label, count) in enumerate(zip(*np.unique(labels, return_counts=True))):
         scores[labels == label] = rng.normal(
-            loc=label_mapping[label], scale=scales[i], size=(count, nclass)
+            loc=label_mapping[label], scale=scales[i], size=(count, n_classes)
         )
+    # Pipe scores through softmax
     scores = softmax(scores, axis=1)
     name = "MockXbbTagger" if is_xbb else "MockTagger"
     cols = [f"{name}_p{x}" for x in label_dict]
@@ -103,7 +144,7 @@ def mock_jets(num_jets=1000) -> np.ndarray:
     jets["HadronConeExclTruthLabelID"] = rng.choice([0, 4, 5, 15], size=num_jets)
     jets["GhostBHadronsFinalCount"] = rng.choice([0, 1, 2], size=num_jets)
     jets["GhostCHadronsFinalCount"] = rng.choice([0, 1, 2], size=num_jets)
-    jets["R10TruthLabel_R22v1"] = rng.choice([1, 10, 11, 12], size=num_jets)
+    jets["R10TruthLabel_R22v1"] = rng.choice([1, 10, 11, 12, 14, 15, 16], size=num_jets)
     scores = get_mock_scores(jets["HadronConeExclTruthLabelID"])
     xbb_scores = get_mock_scores(jets["R10TruthLabel_R22v1"], is_xbb=True)
     return join_structured_arrays([jets, scores, xbb_scores])

atlas_ftag_tools-0.2.9/ftag/wps/discriminant.py ADDED Viewed

@@ -0,0 +1,84 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import numpy as np
+if TYPE_CHECKING:  # pragma: no cover
+    from ftag.labels import Label, LabelContainer
+def get_discriminant(
+    jets: np.ndarray,
+    tagger: str,
+    signal: Label,
+    flavours: LabelContainer,
+    fraction_values: dict[str, float],
+    epsilon: float = 1e-10,
+) -> np.ndarray:
+    """Calculate the tagging discriminant for a given tagger.
+    Calculated as the logarithm of the ratio of a specified signal probability
+    to a weighted sum ofbackground probabilities.
+    Parameters
+    ----------
+    jets : np.ndarray
+        Structured array of jets containing tagger outputs
+    tagger : str
+        Name of the tagger
+    signal : Label
+        Signal flavour (bjets/cjets or hbb/hcc)
+    fraction_values : dict
+        Dict with the fraction values for the background classes for the given tagger
+    epsilon : float, optional
+        Small number to avoid division by zero, by default 1e-10
+    Returns
+    -------
+    np.ndarray
+        Array of discriminant values.
+    Raises
+    ------
+    ValueError
+        If the signal flavour is not recognised.
+    """
+    # Init the denominator
+    denominator = 0.0
+    # Loop over background flavours
+    for flav in flavours:
+        # Skip signal flavour for denominator
+        if flav == signal:
+            continue
+        # Get the probability name of the tagger/flavour combo + fraction value
+        prob_name = f"{tagger}_{flav.px}"
+        fraction_value = fraction_values[flav.frac_str]
+        # If fraction_value for the given flavour is zero, skip it
+        if fraction_value == 0:
+            continue
+        # Check that the probability value for the flavour is available
+        if fraction_value > 0 and prob_name not in jets.dtype.names:
+            raise ValueError(
+                f"Nonzero fraction value for {flav.name}, but '{prob_name}' "
+                "not found in input array."
+            )
+        # Update denominator
+        denominator += jets[prob_name] * fraction_value if prob_name in jets.dtype.names else 0
+    # Calculate numerator
+    signal_field = f"{tagger}_{signal.px}"
+    # Check that the probability of the signal is available
+    if signal_field not in jets.dtype.names:
+        raise ValueError(
+            f"No signal probability value(s) found for tagger {tagger}. "
+            f"Missing variable: {signal_field}"
+        )
+    return np.log((jets[signal_field] + epsilon) / (denominator + epsilon))

atlas_ftag_tools-0.2.9/ftag/wps/working_points.py ADDED Viewed

@@ -0,0 +1,547 @@
+"""Calculate tagger working points."""
+from __future__ import annotations
+import argparse
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING
+import numpy as np
+import yaml
+from ftag import Flavours
+from ftag.cli_utils import HelpFormatter
+from ftag.cuts import Cuts
+from ftag.hdf5 import H5Reader
+from ftag.wps.discriminant import get_discriminant
+if TYPE_CHECKING:  # pragma: no cover
+    from collections.abc import Sequence
+    from ftag.labels import Label, LabelContainer
+def parse_args(args: Sequence[str]) -> argparse.Namespace:
+    """Parse the input arguments into a Namespace.
+    Parameters
+    ----------
+    args : Sequence[str] | None
+        Sequence of string inputs to the script
+    Returns
+    -------
+    argparse.Namespace
+        Namespace with the parsed arguments
+    Raises
+    ------
+    ValueError
+        When both --effs and --disc_cuts are provided
+    ValueError
+        When neither --effs nor --disc_cuts are provided
+    ValueError
+        When the number of fraction values is not conistent
+    ValueError
+        When the sum of fraction values for a tagger is not equal to one
+    """
+    # Define the pre-parser which checks the --category
+    pre_parser = argparse.ArgumentParser(add_help=False)
+    pre_parser.add_argument(
+        "-c",
+        "--category",
+        default="single-btag",
+        type=str,
+        help="Label category to use for the working point calculation",
+    )
+    pre_parser.add_argument(
+        "-s",
+        "--signal",
+        default="bjets",
+        type=str,
+        help="Signal flavour which is to be used",
+    )
+    # Parse only --category/--signal and ignore for now all other args
+    pre_args, remaining_argv = pre_parser.parse_known_args(args=args)
+    # Create the "real" parser
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=HelpFormatter,
+    )
+    # Add --category/--signal so the help is correctly shown
+    parser.add_argument(
+        "-c",
+        "--category",
+        default="single-btag",
+        type=str,
+        help="Label category to use for the working point calculation",
+    )
+    parser.add_argument(
+        "-s",
+        "--signal",
+        default="bjets",
+        type=str,
+        help="Signal flavour which is to be used",
+    )
+    # Check which label category was chosen and load the corresponding flavours
+    flavours = Flavours.by_category(pre_args.category)
+    # Build the fraction value arguments for all classes (besides signal)
+    for flav in flavours:
+        # Skip signal
+        if flav.name == pre_args.signal:
+            continue
+        # Built fraction values for all background classes
+        parser.add_argument(
+            f"--{flav.frac_str}",
+            nargs="+",
+            required=True,
+            type=float,
+            help=f"{flav.frac_str} value(s) for each tagger",
+        )
+    # # Adding the other arguments
+    parser.add_argument(
+        "--ttbar",
+        required=True,
+        type=Path,
+        help="Path to ttbar sample (supports globbing)",
+    )
+    parser.add_argument(
+        "--zprime",
+        required=False,
+        type=Path,
+        help="Path to zprime (supports globbing). WPs from ttbar will be reused for zprime",
+    )
+    parser.add_argument(
+        "-t",
+        "--tagger",
+        nargs="+",
+        required=True,
+        type=str,
+        help="tagger name(s)",
+    )
+    parser.add_argument(
+        "-e",
+        "--effs",
+        nargs="+",
+        type=float,
+        help="Efficiency working point(s). If -r is specified, values should be 1/efficiency",
+    )
+    parser.add_argument(
+        "-r",
+        "--rejection",
+        default=None,
+        help="Use rejection of specified background class to determine working points",
+    )
+    parser.add_argument(
+        "-d",
+        "--disc_cuts",
+        nargs="+",
+        type=float,
+        help="D_x value(s) to calculate efficiency at",
+    )
+    parser.add_argument(
+        "-n",
+        "--num_jets",
+        default=1_000_000,
+        type=int,
+        help="Use this many jets (post selection)",
+    )
+    parser.add_argument(
+        "--ttbar_cuts",
+        nargs="+",
+        default=["pt > 20e3"],
+        type=list,
+        help="Selection to apply to ttbar (|eta| < 2.5 is always applied)",
+    )
+    parser.add_argument(
+        "--zprime_cuts",
+        nargs="+",
+        default=["pt > 250e3"],
+        type=list,
+        help="Selection to apply to zprime (|eta| < 2.5 is always applied)",
+    )
+    parser.add_argument(
+        "-o",
+        "--outfile",
+        type=Path,
+        help="Save results to yaml instead of printing",
+    )
+    # Final parse of all arguments
+    parsed_args = parser.parse_args(remaining_argv)
+    # Define the signal as an instance of Flavours
+    parsed_args.signal = Flavours[parsed_args.signal]
+    # Check that only --effs or --disc_cuts is given
+    if parsed_args.effs and parsed_args.disc_cuts:
+        raise ValueError("Cannot specify both --effs and --disc_cuts")
+    if not parsed_args.effs and not parsed_args.disc_cuts:
+        raise ValueError("Must specify either --effs or --disc_cuts")
+    # Check that all fraction values have the same length
+    for flav in flavours:
+        if flav.name != parsed_args.signal.name and len(getattr(parsed_args, flav.frac_str)) != len(
+            parsed_args.tagger
+        ):
+            raise ValueError(f"Number of {flav.frac_str} values must match number of taggers")
+    # Check that all fraction value combinations add up to one
+    for tagger_idx in range(len(parsed_args.tagger)):
+        fraction_value_sum = 0
+        for flav in flavours:
+            if flav.name != parsed_args.signal.name:
+                fraction_value_sum += getattr(parsed_args, flav.frac_str)[tagger_idx]
+        # Round the value to take machine precision into account
+        fraction_value_sum = np.round(fraction_value_sum, 8)
+        # Check it's equal to one
+        if fraction_value_sum != 1:
+            raise ValueError(
+                "Sum of the fraction values must be one! You gave "
+                f"{fraction_value_sum} for tagger {parsed_args.tagger[tagger_idx]}"
+            )
+    return parsed_args
+def get_fxs_from_args(args: argparse.Namespace, flavours: LabelContainer) -> list:
+    """Get the fraction values for each tagger from the argparsed inputs.
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Input arguments parsed by the argparser
+    flavours : LabelContainer
+        LabelContainer instance of the labels that are used
+    Returns
+    -------
+    list
+        List of dicts with the fraction values. Each dict is for one tagger.
+    """
+    # Init the fraction_dict dict
+    fraction_dict = {}
+    # Add the fraction values to the dict
+    for flav in flavours:
+        if flav.name != args.signal.name:
+            fraction_dict[flav.frac_str] = vars(args)[flav.frac_str]
+    return [{k: v[i] for k, v in fraction_dict.items()} for i in range(len(args.tagger))]
+def get_eff_rej(
+    jets: np.ndarray,
+    disc: np.ndarray,
+    wp: float,
+    flavours: LabelContainer,
+) -> dict:
+    """Calculate the efficiency/rejection for each flavour.
+    Parameters
+    ----------
+    jets : np.ndarray
+        Loaded jets
+    disc : np.ndarray
+        Discriminant values of the jets
+    wp : float
+        Working point that is used
+    flavours : LabelContainer
+        LabelContainer instance of the flavours used
+    Returns
+    -------
+    dict
+        Dict with the efficiency/rejection values for each flavour
+    """
+    # Init an out dict
+    out: dict[str, dict] = {"eff": {}, "rej": {}}
+    # Loop over the flavours
+    for flav in flavours:
+        # Calculate discriminant values and efficiencies/rejections
+        flav_disc = disc[flav.cuts(jets).idx]
+        eff = sum(flav_disc > wp) / len(flav_disc)
+        out["eff"][flav.name] = float(f"{eff:.3g}")
+        out["rej"][flav.name] = float(f"{1 / eff:.3g}")
+    return out
+def get_rej_eff_at_disc(
+    jets: np.ndarray,
+    tagger: str,
+    signal: Label,
+    disc_cuts: list,
+    flavours: LabelContainer,
+    fraction_values: dict,
+) -> dict:
+    """Calculate the efficiency/rejection at a certain discriminant values.
+    Parameters
+    ----------
+    jets : np.ndarray
+        Loaded jets used
+    tagger : str
+        Name of the tagger
+    signal : Label
+        Label instance of the signal flavour
+    disc_cuts : list
+        List of discriminant cut values for which the efficiency/rejection is calculated
+    flavours : LabelContainer
+        LabelContainer instance of the flavours that are used
+    Returns
+    -------
+    dict
+        Dict with the discriminant cut values and their respective efficiencies/rejections
+    """
+    # Calculate discriminants
+    disc = get_discriminant(
+        jets=jets,
+        tagger=tagger,
+        signal=signal,
+        flavours=flavours,
+        fraction_values=fraction_values,
+    )
+    # Init out dict
+    ref_eff_dict: dict[str, dict] = {}
+    # Loop over the disc cut values
+    for dcut in disc_cuts:
+        ref_eff_dict[str(dcut)] = {"eff": {}, "rej": {}}
+        # Loop over the flavours
+        for flav in flavours:
+            e_discs = disc[flav.cuts(jets).idx]
+            eff = sum(e_discs > dcut) / len(e_discs)
+            ref_eff_dict[str(dcut)]["eff"][str(flav)] = float(f"{eff:.3g}")
+            ref_eff_dict[str(dcut)]["rej"][str(flav)] = 1 / float(f"{eff:.3g}")
+    return ref_eff_dict
+def setup_common_parts(
+    args: argparse.Namespace,
+) -> tuple[np.ndarray, np.ndarray | None, LabelContainer]:
+    """Load the jets from the files and setup the taggers.
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Input arguments from the argparser
+    Returns
+    -------
+    tuple[dict, dict | None, list]
+        Outputs the ttbar jets, the zprime jets (if wanted, else None), and the flavours used.
+    """
+    # Get the used flavours
+    flavours = Flavours.by_category(args.category)
+    # Get the cuts for the samples
+    default_cuts = Cuts.from_list(["eta > -2.5", "eta < 2.5"])
+    ttbar_cuts = Cuts.from_list(args.ttbar_cuts) + default_cuts
+    zprime_cuts = Cuts.from_list(args.zprime_cuts) + default_cuts
+    # Prepare the loading of the jets
+    all_vars = list(set(sum((flav.cuts.variables for flav in flavours), [])))
+    reader = H5Reader(args.ttbar)
+    jet_vars = reader.dtypes()["jets"].names
+    # Create for all taggers the fraction values
+    for tagger in args.tagger:
+        all_vars += [
+            f"{tagger}_{flav.px}" for flav in flavours if (f"{tagger}_{flav.px}" in jet_vars)
+        ]
+    # Load ttbar jets
+    ttbar_jets = reader.load({"jets": all_vars}, args.num_jets, cuts=ttbar_cuts)["jets"]
+    zprime_jets = None
+    # Load zprime jets if needed
+    if args.zprime:
+        zprime_reader = H5Reader(args.zprime)
+        zprime_jets = zprime_reader.load({"jets": all_vars}, args.num_jets, cuts=zprime_cuts)[
+            "jets"
+        ]
+    else:
+        zprime_jets = None
+    return ttbar_jets, zprime_jets, flavours
+def get_working_points(args: argparse.Namespace) -> dict | None:
+    """Calculate the working points.
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Input arguments from the argparser
+    Returns
+    -------
+    dict | None
+        Dict with the working points. If args.outfile is given, the function returns None and
+        stored the resulting dict in a yaml file in args.outfile.
+    """
+    # Load the jets and flavours and get the fraction values
+    ttbar_jets, zprime_jets, flavours = setup_common_parts(args=args)
+    fraction_values = get_fxs_from_args(args=args, flavours=flavours)
+    # Init an out dict
+    out = {}
+    # Loop over taggers
+    for i, tagger in enumerate(args.tagger):
+        # Calculate discriminant
+        out[tagger] = {"signal": str(args.signal), **fraction_values[i]}
+        disc = get_discriminant(
+            jets=ttbar_jets,
+            tagger=tagger,
+            signal=args.signal,
+            flavours=flavours,
+            fraction_values=fraction_values[i],
+        )
+        # Loop over efficiency working points
+        for eff in args.effs:
+            d = out[tagger][f"{eff:.0f}"] = {}
+            # Set the working point
+            wp_flavour = args.signal
+            if args.rejection:
+                eff = 100 / eff  # noqa: PLW2901
+                wp_flavour = args.rejection
+            # Calculate the discriminant value of the working point
+            wp_disc = disc[flavours[wp_flavour].cuts(ttbar_jets).idx]
+            wp = d["cut_value"] = round(float(np.percentile(wp_disc, 100 - eff)), 3)
+            # Calculate efficiency and rejection for each flavour
+            d["ttbar"] = get_eff_rej(
+                jets=ttbar_jets,
+                disc=disc,
+                wp=wp,
+                flavours=flavours,
+            )
+            # calculate for zprime
+            if args.zprime:
+                zprime_disc = get_discriminant(
+                    jets=zprime_jets,
+                    tagger=tagger,
+                    signal=args.signal,
+                    flavours=flavours,
+                    fraction_values=fraction_values[i],
+                )
+                d["zprime"] = get_eff_rej(
+                    jets=zprime_jets,
+                    disc=zprime_disc,
+                    wp=wp,
+                    flavours=flavours,
+                )
+    if args.outfile:
+        with open(args.outfile, "w") as f:
+            yaml.dump(out, f, sort_keys=False)
+            return None
+    else:
+        return out
+def get_efficiencies(args: argparse.Namespace) -> dict | None:
+    """Calculate the efficiencies for the given jets.
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Input arguments from the argparser
+    Returns
+    -------
+    dict | None
+        Dict with the efficiencies. If args.outfile is given, the function returns None and
+        stored the resulting dict in a yaml file in args.outfile.
+    """
+    # Load the jets and flavours and get the fraction values
+    ttbar_jets, zprime_jets, flavours = setup_common_parts(args=args)
+    fraction_values = get_fxs_from_args(args=args, flavours=flavours)
+    # Init an out dict
+    out = {}
+    # Loop over the taggers
+    for i, tagger in enumerate(args.tagger):
+        out[tagger] = {"signal": str(args.signal), **fraction_values[i]}
+        out[tagger]["ttbar"] = get_rej_eff_at_disc(
+            jets=ttbar_jets,
+            tagger=tagger,
+            signal=args.signal,
+            disc_cuts=args.disc_cuts,
+            flavours=flavours,
+            fraction_values=fraction_values[i],
+        )
+        if args.zprime:
+            out[tagger]["zprime"] = get_rej_eff_at_disc(
+                jets=zprime_jets,
+                tagger=tagger,
+                signal=args.signal,
+                disc_cuts=args.disc_cuts,
+                flavours=flavours,
+                fraction_values=fraction_values[i],
+            )
+    if args.outfile:
+        with open(args.outfile, "w") as f:
+            yaml.dump(out, f, sort_keys=False)
+            return None
+    else:
+        return out
+def main(args: Sequence[str]) -> dict | None:
+    """Main function to run working point calculation.
+    Parameters
+    ----------
+    args : Sequence[str] | None, optional
+        Input arguments, by default None
+    Returns
+    -------
+    dict | None
+        The output dict with the calculated values. When --outfile
+        was given, the return value is None
+    """
+    parsed_args = parse_args(args=args)
+    if parsed_args.effs:
+        out = get_working_points(args=parsed_args)
+    elif parsed_args.disc_cuts:
+        out = get_efficiencies(args=parsed_args)
+    if out:
+        print(yaml.dump(out, sort_keys=False))
+        return out
+    return None
+if __name__ == "__main__":  # pragma: no cover
+    main(args=sys.argv[1:])

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/pyproject.toml RENAMED Viewed

@@ -57,7 +57,8 @@ lint.ignore = [
     "ANN001", "ANN002", "ANN003", "ANN101", "ANN201", "ANN202", "ANN204",
     "T201", "PLW1514", "PTH123", "RUF017", "PLR6301", "ISC001", "S307",
     "PT027", "NPY002", "PT009", "PLW1641", "PLR0904", "N817", "S603", "PD011",
-    "S113", "TCH", "PT011", "PLR1702", "S108", "PTH207", "S607", "E203", "SIM115"
+    "S113", "TCH", "PT011", "PLR1702", "S108", "PTH207", "S607", "E203", "SIM115", "PLR0913",
+    "PLR0917"
 ]
 [tool.ruff.lint.flake8-pytest-style]

atlas_ftag_tools-0.2.8/ftag/wps/discriminant.py DELETED Viewed

@@ -1,131 +0,0 @@
-from __future__ import annotations
-from typing import Callable
-import numpy as np
-from ftag import Flavours
-from ftag.labels import Label, remove_suffix
-def discriminant(
-    jets: np.ndarray,
-    tagger: str,
-    signal: Label,
-    fxs: dict[str, float],
-    epsilon: float = 1e-10,
-) -> np.ndarray:
-    """
-    Get the tagging discriminant.
-    Calculated as the logarithm of the ratio of a specified signal probability
-    to a weighted sum ofbackground probabilities.
-    Parameters
-    ----------
-    jets : np.ndarray
-        Structed jet array containing tagger scores.
-    tagger : str
-        Name of the tagger, used to construct field names.
-    signal : str
-        Type of signal.
-    fxs : dict[str, float]
-        Dict of background probability names and their fractions.
-        If a fraction is None, it is calculated as (1 - sum of provided fractions).
-    epsilon : float, optional
-        A small value added to probabilities to prevent division by zero, by default 1e-10.
-    Returns
-    -------
-    np.ndarray
-        The tagger discriminant values for the jets.
-    Raises
-    ------
-    ValueError
-        If a fraction is specified for a denominator that is not present in the input array.
-    """
-    denominator = 0.0
-    for d, fx in fxs.items():
-        name = f"{tagger}_{d}"
-        if fx > 0 and name not in jets.dtype.names:
-            raise ValueError(f"Nonzero fx for {d}, but '{name}' not found in input array.")
-        denominator += jets[name] * fx if name in jets.dtype.names else 0
-    signal_field = f"{tagger}_{signal.px}"
-    if signal_field not in jets.dtype.names:
-        signal_field = f"{tagger}_p{remove_suffix(signal.name, 'jets')}"
-    return np.log((jets[signal_field] + epsilon) / (denominator + epsilon))
-def tautag_dicriminant(jets, tagger, fb, fc, epsilon=1e-10):
-    fxs = {"pb": fb, "pc": fc, "pu": 1 - fb - fc}
-    return discriminant(jets, tagger, Flavours.taujets, fxs, epsilon=epsilon)
-def btag_discriminant(jets, tagger, fc, ftau=0, epsilon=1e-10):
-    fxs = {"pc": fc, "ptau": ftau, "pu": 1 - fc - ftau}
-    return discriminant(jets, tagger, Flavours.bjets, fxs, epsilon=epsilon)
-def ghostbtag_discriminant(jets, tagger, fc, ftau=0, epsilon=1e-10):
-    fxs = {"pghostc": fc, "pghosttau": ftau, "pghostu": 1 - fc - ftau}
-    return discriminant(jets, tagger, Flavours.ghostbjets, fxs, epsilon=epsilon)
-def ctag_discriminant(jets, tagger, fb, ftau=0, epsilon=1e-10):
-    fxs = {"pb": fb, "ptau": ftau, "pu": 1 - fb - ftau}
-    return discriminant(jets, tagger, Flavours.cjets, fxs, epsilon=epsilon)
-def hbb_discriminant(jets, tagger, ftop=0.25, fhcc=0.02, epsilon=1e-10):
-    fxs = {"phcc": fhcc, "ptop": ftop, "pqcd": 1 - ftop - fhcc}
-    return discriminant(jets, tagger, Flavours.hbb, fxs, epsilon=epsilon)
-def hcc_discriminant(jets, tagger, ftop=0.25, fhbb=0.3, epsilon=1e-10):
-    fxs = {"phbb": fhbb, "ptop": ftop, "pqcd": 1 - ftop - fhbb}
-    return discriminant(jets, tagger, Flavours.hcc, fxs, epsilon=epsilon)
-def get_discriminant(
-    jets: np.ndarray, tagger: str, signal: Label | str, epsilon: float = 1e-10, **fxs
-):
-    """Calculate the b-tag or c-tag discriminant for a given tagger.
-    Parameters
-    ----------
-    jets : np.ndarray
-        Structured array of jets containing tagger outputs
-    tagger : str
-        Name of the tagger
-    signal : Label
-        Signal flavour (bjets/cjets or hbb/hcc)
-    epsilon : float, optional
-        Small number to avoid division by zero, by default 1e-10
-    **fxs : dict
-        Fractions for the different background flavours.
-    Returns
-    -------
-    np.ndarray
-        Array of discriminant values.
-    Raises
-    ------
-    ValueError
-        If the signal flavour is not recognised.
-    """
-    tagger_funcs: dict[str, Callable] = {
-        "bjets": btag_discriminant,
-        "cjets": ctag_discriminant,
-        "taujets": tautag_dicriminant,
-        "hbb": hbb_discriminant,
-        "hcc": hcc_discriminant,
-        "ghostbjets": ghostbtag_discriminant,
-    }
-    if str(signal) not in tagger_funcs:
-        raise ValueError(f"Signal flavour must be one of {list(tagger_funcs.keys())}, not {signal}")
-    func: Callable = tagger_funcs[str(Flavours[signal])]
-    return func(jets, tagger, **fxs, epsilon=epsilon)

atlas_ftag_tools-0.2.8/ftag/wps/working_points.py DELETED Viewed

@@ -1,316 +0,0 @@
-"""Calculate tagger working points."""
-from __future__ import annotations
-import argparse
-from pathlib import Path
-import numpy as np
-import yaml
-from ftag import Flavours
-from ftag.cli_utils import HelpFormatter
-from ftag.cuts import Cuts
-from ftag.hdf5 import H5Reader
-from ftag.wps.discriminant import get_discriminant
-def parse_args(args):
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=HelpFormatter,
-    )
-    parser.add_argument(
-        "--ttbar",
-        required=True,
-        type=Path,
-        help="path to ttbar sample (supports globbing)",
-    )
-    parser.add_argument(
-        "--zprime",
-        required=False,
-        type=Path,
-        help="path to zprime (supports globbing). WPs from ttbar will be reused for zprime",
-    )
-    parser.add_argument(
-        "-e",
-        "--effs",
-        nargs="+",
-        type=float,
-        help="efficiency working point(s). If -r is specified, values should be 1/efficiency",
-    )
-    parser.add_argument(
-        "-t",
-        "--tagger",
-        nargs="+",
-        required=True,
-        type=str,
-        help="tagger name(s)",
-    )
-    parser.add_argument(
-        "-s",
-        "--signal",
-        default="bjets",
-        choices=["bjets", "cjets", "hbb", "hcc"],
-        type=str,
-        help='signal flavour ("bjets" or "cjets" for b-tagging, "hbb" or "hcc" for Xbb)',
-    )
-    parser.add_argument(
-        "-r",
-        "--rejection",
-        default=None,
-        choices=["ujets", "cjets", "bjets", "hbb", "hcc", "top", "qcd"],
-        help="use rejection of specified background class to determine working points",
-    )
-    parser.add_argument(
-        "-d",
-        "--disc_cuts",
-        nargs="+",
-        type=float,
-        help="D_x value(s) to calculate efficiency at",
-    )
-    parser.add_argument(
-        "-n",
-        "--num_jets",
-        default=1_000_000,
-        type=int,
-        help="use this many jets (post selection)",
-    )
-    parser.add_argument(
-        "--ttbar_cuts",
-        nargs="+",
-        default=["pt > 20e3"],
-        type=list,
-        help="selection to apply to ttbar (|eta| < 2.5 is always applied)",
-    )
-    parser.add_argument(
-        "--zprime_cuts",
-        nargs="+",
-        default=["pt > 250e3"],
-        type=list,
-        help="selection to apply to zprime (|eta| < 2.5 is always applied)",
-    )
-    parser.add_argument(
-        "-o",
-        "--outfile",
-        type=Path,
-        help="save results to yaml instead of printing",
-    )
-    parser.add_argument(
-        "--xbb",
-        action="store_true",
-        help="Enable Xbb tagging which expects two fx values ftop and fhcc/fhbb for each tagger",
-    )
-    parser.add_argument(
-        "--fb",
-        nargs="+",
-        type=float,
-        help="fb value(s) for each tagger",
-    )
-    parser.add_argument(
-        "--fc",
-        nargs="+",
-        type=float,
-        help="fc value(s) for each tagger",
-    )
-    parser.add_argument(
-        "--ftau",
-        nargs="+",
-        type=float,
-        help="ftau value(s) for each tagger",
-    )
-    parser.add_argument(
-        "--ftop",
-        nargs="+",
-        type=float,
-        help="ftop value(s) for each tagger",
-    )
-    parser.add_argument(
-        "--fhbb",
-        nargs="+",
-        type=float,
-        help="fhbb value(s) for each tagger",
-    )
-    parser.add_argument(
-        "--fhcc",
-        nargs="+",
-        type=float,
-        help="fhcc value(s) for each tagger",
-    )
-    args = parser.parse_args(args)
-    args.signal = Flavours[args.signal]
-    if args.effs and args.disc_cuts:
-        raise ValueError("Cannot specify both --effs and --disc_cuts")
-    if not args.effs and not args.disc_cuts:
-        raise ValueError("Must specify either --effs or --disc_cuts")
-    if args.xbb:
-        if args.signal not in {Flavours.hbb, Flavours.hcc}:
-            raise ValueError("Xbb tagging only supports hbb or hcc signal flavours")
-        if args.fb or args.fc or args.ftau:
-            raise ValueError("For Xbb tagging, fb, fc and ftau should not be specified")
-        if not args.ftop:
-            raise ValueError("For Xbb tagging, ftop should be specified")
-        if args.signal == "hbb" and not args.fhcc:
-            raise ValueError("For hbb tagging, fhcc should be specified")
-        if args.signal == "hcc" and not args.fhbb:
-            raise ValueError("For hcc tagging, fhbb should be specified")
-    else:
-        if args.ftop or args.fhbb or args.fhcc:
-            raise ValueError("For single-b tagging, ftop, fhbb and fhcc should not be specified")
-        if args.signal == "bjets" and not args.fc:
-            raise ValueError("For bjets tagging, fc should be specified")
-        if args.signal == "cjets" and not args.fb:
-            raise ValueError("For cjets tagging, fb should be specified")
-        if args.ftau is None:
-            args.ftau = [0.0] * len(args.tagger)
-    for fx in ["fb", "fc", "ftau", "ftop", "fhbb", "fhcc"]:
-        if getattr(args, fx) and len(getattr(args, fx)) != len(args.tagger):
-            raise ValueError(f"Number of {fx} values must match number of taggers")
-    return args
-def get_fxs_from_args(args):
-    if args.signal == Flavours.bjets:
-        fxs = {"fc": args.fc, "ftau": args.ftau}
-    elif args.signal == Flavours.cjets:
-        fxs = {"fb": args.fb, "ftau": args.ftau}
-    elif args.signal == Flavours.hbb:
-        fxs = {"ftop": args.ftop, "fhcc": args.fhcc}
-    elif args.signal == Flavours.hcc:
-        fxs = {"ftop": args.ftop, "fhbb": args.fhbb}
-    assert fxs is not None
-    return [{k: v[i] for k, v in fxs.items()} for i in range(len(args.tagger))]
-def get_eff_rej(jets, disc, wp, flavs):
-    out = {"eff": {}, "rej": {}}
-    for bkg in list(flavs):
-        bkg_disc = disc[bkg.cuts(jets).idx]
-        eff = sum(bkg_disc > wp) / len(bkg_disc)
-        out["eff"][str(bkg)] = float(f"{eff:.3g}")
-        out["rej"][str(bkg)] = float(f"{1 / eff:.3g}")
-    return out
-def get_rej_eff_at_disc(jets, tagger, signal, disc_cuts, **fxs):
-    disc = get_discriminant(jets, tagger, signal, **fxs)
-    d = {}
-    flavs = Flavours.by_category("single-btag")
-    for dcut in disc_cuts:
-        d[str(dcut)] = {"eff": {}, "rej": {}}
-        for f in flavs:
-            e_discs = disc[f.cuts(jets).idx]
-            eff = sum(e_discs > dcut) / len(e_discs)
-            d[str(dcut)]["eff"][str(f)] = float(f"{eff:.3g}")
-            d[str(dcut)]["rej"][str(f)] = 1 / float(f"{eff:.3g}")
-    return d
-def setup_common_parts(args):
-    flavs = Flavours.by_category("single-btag") if not args.xbb else Flavours.by_category("xbb")
-    default_cuts = Cuts.from_list(["eta > -2.5", "eta < 2.5"])
-    ttbar_cuts = Cuts.from_list(args.ttbar_cuts) + default_cuts
-    zprime_cuts = Cuts.from_list(args.zprime_cuts) + default_cuts
-    # prepare to load jets
-    all_vars = list(set(sum((flav.cuts.variables for flav in flavs), [])))
-    reader = H5Reader(args.ttbar)
-    jet_vars = reader.dtypes()["jets"].names
-    for tagger in args.tagger:
-        all_vars += [f"{tagger}_{f.px}" for f in flavs if (f"{tagger}_{f.px}" in jet_vars)]
-    # load jets
-    jets = reader.load({"jets": all_vars}, args.num_jets, cuts=ttbar_cuts)["jets"]
-    zp_jets = None
-    if args.zprime:
-        zp_reader = H5Reader(args.zprime)
-        zp_jets = zp_reader.load({"jets": all_vars}, args.num_jets, cuts=zprime_cuts)["jets"]
-    return jets, zp_jets, flavs
-def get_working_points(args=None):
-    jets, zp_jets, flavs = setup_common_parts(args)
-    fxs = get_fxs_from_args(args)
-    # loop over taggers
-    out = {}
-    for i, tagger in enumerate(args.tagger):
-        # calculate discriminant
-        out[tagger] = {"signal": str(args.signal), **fxs[i]}
-        disc = get_discriminant(jets, tagger, args.signal, **fxs[i])
-        # loop over efficiency working points
-        for eff in args.effs:
-            d = out[tagger][f"{eff:.0f}"] = {}
-            wp_flavour = args.signal
-            if args.rejection:
-                eff = 100 / eff  # noqa: PLW2901
-                wp_flavour = args.rejection
-            wp_disc = disc[flavs[wp_flavour].cuts(jets).idx]
-            wp = d["cut_value"] = round(float(np.percentile(wp_disc, 100 - eff)), 3)
-            # calculate eff and rej for each flavour
-            d["ttbar"] = get_eff_rej(jets, disc, wp, flavs)
-            # calculate for zprime
-            if args.zprime:
-                zp_disc = get_discriminant(zp_jets, tagger, Flavours[args.signal], **fxs[i])
-                d["zprime"] = get_eff_rej(zp_jets, zp_disc, wp, flavs)
-    if args.outfile:
-        with open(args.outfile, "w") as f:
-            yaml.dump(out, f, sort_keys=False)
-            return None
-    else:
-        return out
-def get_efficiencies(args=None):
-    jets, zp_jets, _ = setup_common_parts(args)
-    fxs = get_fxs_from_args(args)
-    out = {}
-    for i, tagger in enumerate(args.tagger):
-        out[tagger] = {"signal": str(args.signal), **fxs[i]}
-        out[tagger]["ttbar"] = get_rej_eff_at_disc(
-            jets, tagger, args.signal, args.disc_cuts, **fxs[i]
-        )
-        if args.zprime:
-            out[tagger]["zprime"] = get_rej_eff_at_disc(
-                zp_jets, tagger, args.signal, args.disc_cuts, **fxs[i]
-            )
-    if args.outfile:
-        with open(args.outfile, "w") as f:
-            yaml.dump(out, f, sort_keys=False)
-            return None
-    else:
-        return out
-def main(args=None):
-    args = parse_args(args)
-    if args.effs:
-        out = get_working_points(args)
-    elif args.disc_cuts:
-        out = get_efficiencies(args)
-    if out:
-        print(yaml.dump(out, sort_keys=False))
-        return out
-    return None
-if __name__ == "__main__":
-    main()

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/MANIFEST.in RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/README.md RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/entry_points.txt RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/requires.txt RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/atlas_ftag_tools.egg-info/top_level.txt RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/cli_utils.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/cuts.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/flavours.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/git_check.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/hdf5/__init__.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5move.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5reader.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5split.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5utils.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/hdf5/h5writer.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/labeller.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/labels.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/region.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/sample.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/track_selector.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/transform.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/vds.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/ftag/wps/__init__.py RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.8 → atlas_ftag_tools-0.2.9}/setup.cfg RENAMED Viewed

File without changes

atlas-ftag-tools 0.2.8__tar.gz → 0.2.9__tar.gz

atlas-ftag-tools 0.2.8tar.gz → 0.2.9tar.gz