PyPI - atlas-ftag-tools - Versions diffs - 0.2.9__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

atlas-ftag-tools 0.2.9py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/METADATA +4 -3
{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/RECORD +13 -11
{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/WHEEL +1 -1
{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/entry_points.txt +1 -1
ftag/__init__.py +6 -5
ftag/flavours.yaml +16 -0
ftag/fraction_optimization.py +184 -0
ftag/labels.py +10 -2
ftag/utils/__init__.py +24 -0
ftag/utils/logging.py +123 -0
ftag/utils/metrics.py +431 -0
ftag/{wps/working_points.py → working_points.py} +1 -1
ftag/wps/__init__.py +0 -0
ftag/wps/discriminant.py +0 -84
{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/top_level.txt +0 -0

{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: atlas-ftag-tools
-Version: 0.2.9
+Version: 0.2.10
 Summary: ATLAS Flavour Tagging Tools
 Author: Sam Van Stroud, Philipp Gadow
 License: MIT
@@ -8,8 +8,9 @@ Project-URL: Homepage, https://github.com/umami-hep/atlas-ftag-tools/
 Requires-Python: <3.12,>=3.8
 Description-Content-Type: text/markdown
 Requires-Dist: h5py>=3.0
-Requires-Dist: numpy
+Requires-Dist: numpy>=2.2.3
 Requires-Dist: PyYAML>=5.1
+Requires-Dist: scipy>=1.15.2
 Provides-Extra: dev
 Requires-Dist: ruff==0.6.2; extra == "dev"
 Requires-Dist: mypy==1.11.2; extra == "dev"

{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/RECORD RENAMED Viewed

@@ -1,28 +1,30 @@
-ftag/__init__.py,sha256=YRug5UslRbNoQACbEhdenDS6wXmsmeLjlz4JaKP6eHs,737
+ftag/__init__.py,sha256=v9emuK48Hhd-_TCiirfCNMsZSzk52frz1zEOgk9PViQ,787
 ftag/cli_utils.py,sha256=w3TtQmUHSyAKChS3ewvOtcSDAUJAZGIIomaNi8f446U,298
 ftag/cuts.py,sha256=9_ooLZHaO3SnIQBNxwbaPZn-qptGdKnB27FdKQGTiTY,2933
 ftag/flavours.py,sha256=ShH4M2UjQZpZ_NlCctTm2q1tJbzYxjmGteioQ2GcqEU,114
-ftag/flavours.yaml,sha256=87xBvLkMDkicuRMaXtxcao8gjEAgvlTbgjAzpvx4YFM,9021
+ftag/flavours.yaml,sha256=5Lo9KWe-2KzmGMbc7o_X9gzwUyTl0Q5uVHYExduZ6T4,9502
+ftag/fraction_optimization.py,sha256=IlMEJe5fD0soX40f-LO4dYAYld2gMqgZRuBLctoPn9A,5566
 ftag/git_check.py,sha256=Y-XqM80CVXZ5ZKrDdZcYOJt3X64uU6W3OP6Z0D7AZU0,1663
 ftag/labeller.py,sha256=IXUgU9UBir39PxVWRKs5r5fqI66Tv0x7nJD3-RYpbrg,2780
-ftag/labels.py,sha256=C7IylPTnc32dFXq8C2Ks2wuljYK3WaY2EsPLGrhtXy8,3932
+ftag/labels.py,sha256=2nmcmrZD8mWQPxJsGiOgcLDhSVgWfS_cEzqsBV-Qy8o,4198
 ftag/mock.py,sha256=P2D7nNKAz2jRBbmfpHTDj9sBVU9r7HGd0rpWZOJYZ90,5980
 ftag/region.py,sha256=ANv0dGI2W6NJqD9fp7EfqAUReH4FOjc1gwl_Qn8llcM,360
 ftag/sample.py,sha256=3N0FrRcu9l1sX8ohuGOHuMYGD0See6gMO4--7NzR2tE,2538
 ftag/track_selector.py,sha256=fJNk_kIBQriBqV4CPT_3ReJbOUnavDDzO-u3EQlRuyk,2654
 ftag/transform.py,sha256=uEGGJSnqoKOzLYQv650XdK_kDNw4Aw-5dc60z9Dp_y0,3963
 ftag/vds.py,sha256=nRViQZQIORB95nC7NZsW3KsSoGkLzEdOsuCViH5h8-U,3296
+ftag/working_points.py,sha256=RJws2jPMEDQDspCbXUZBifS1CCBmlMJ5ax0eMyDzCRA,15949
 ftag/hdf5/__init__.py,sha256=LFDNxVOCp58SvLHwQhdT68Q-KBMS_i6jBrbXoRpHzbM,354
 ftag/hdf5/h5move.py,sha256=oYpRu0IDCIJIQ2ML52HBAdoyDxmKkHTeM9JdbPEgKfI,947
 ftag/hdf5/h5reader.py,sha256=i31pDAqmOSaxdeRhc4iSBlld8xJ0pmp4rNd7CugNzw0,13706
 ftag/hdf5/h5split.py,sha256=4Wy6Xc3J58MdD9aBaSZHf5ZcVFnJSkWsm42R5Pgo-R4,2448
 ftag/hdf5/h5utils.py,sha256=-4zKTMtNCrDZr_9Ww7uzfsB7M7muBKpmm_1IkKJnHOI,3222
 ftag/hdf5/h5writer.py,sha256=9FkClV__UbBqmFsq_h2jwiZnbWVm8QFRL_4mDZZBbTs,5316
-ftag/wps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ftag/wps/discriminant.py,sha256=GKa0zZlLREdm0mCYSbcWXITYe3VEn3PXOBQiPg5WvgM,2521
-ftag/wps/working_points.py,sha256=jXyikB-bf73EaYFkngjE977-Ytvb9nDTqIdHxWW6WQQ,15960
-atlas_ftag_tools-0.2.9.dist-info/METADATA,sha256=lXC-e0iHMDtvJH8h3i7PcCEKh4_CFz5vlqdGXKSEoV4,5153
-atlas_ftag_tools-0.2.9.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
-atlas_ftag_tools-0.2.9.dist-info/entry_points.txt,sha256=LfVLsZHQolqbPnwPgtmc5IQTh527BKkN2v-IpXWTNHw,137
-atlas_ftag_tools-0.2.9.dist-info/top_level.txt,sha256=qiYQuKcAvMim-31FwkT3MTQu7WQm0s58tPAia5KKWqs,5
-atlas_ftag_tools-0.2.9.dist-info/RECORD,,
+ftag/utils/__init__.py,sha256=C0PgaA6Nk5WVpFqKhBhrHgj2mwsKJbSxoO6Cl67RsaI,544
+ftag/utils/logging.py,sha256=54NaQiC9Bh4vSznSqzoPfR-7tj1PXfmoH7yKgv_ZHZk,3192
+ftag/utils/metrics.py,sha256=zQI4nPeRDSyzqKpdOPmu0GU560xSWoW1wgL13rrja-I,12664
+atlas_ftag_tools-0.2.10.dist-info/METADATA,sha256=VUhrtQML6_bUKlmZNFlUXxTTt5YBzNYupTrdlaF5IAw,5190
+atlas_ftag_tools-0.2.10.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+atlas_ftag_tools-0.2.10.dist-info/entry_points.txt,sha256=b46bVP_O8Mg6aSdPmyjGgVkaXSdyXZMeKAsofh2IDeA,133
+atlas_ftag_tools-0.2.10.dist-info/top_level.txt,sha256=qiYQuKcAvMim-31FwkT3MTQu7WQm0s58tPAia5KKWqs,5
+atlas_ftag_tools-0.2.10.dist-info/RECORD,,

{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (76.1.0)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/entry_points.txt RENAMED Viewed

@@ -2,4 +2,4 @@
 h5move = ftag.hdf5.h5move:main
 h5split = ftag.hdf5.h5split:main
 vds = ftag.vds:main
-wps = ftag.wps.working_points:main
+wps = ftag.working_points:main

ftag/__init__.py CHANGED Viewed

@@ -2,18 +2,18 @@
 from __future__ import annotations
-__version__ = "v0.2.9"
+__version__ = "v0.2.10"
-from ftag import hdf5
+from ftag import hdf5, utils
 from ftag.cuts import Cuts
 from ftag.flavours import Flavours
+from ftag.fraction_optimization import calculate_best_fraction_values
 from ftag.labeller import Labeller
 from ftag.labels import Label, LabelContainer
 from ftag.mock import get_mock_file
 from ftag.sample import Sample
 from ftag.transform import Transform
-from ftag.wps.discriminant import get_discriminant
-from ftag.wps.working_points import get_working_points
+from ftag.working_points import get_working_points
 __all__ = [
     "Cuts",
@@ -24,8 +24,9 @@ __all__ = [
     "Sample",
     "Transform",
     "__version__",
-    "get_discriminant",
+    "calculate_best_fraction_values",
     "get_mock_file",
     "get_working_points",
     "hdf5",
+    "utils",
 ]

ftag/flavours.yaml CHANGED Viewed

@@ -332,3 +332,19 @@
   cuts: ["iffClass == 0"]
   colour: tab:gray
   category: isolation
+# Trigger-Xbb tagging
+- name: dRMatchedHbb
+  label: $H \rightarrow b\bar{b}$
+  cuts: ["HadronConeExclExtendedTruthLabelID == 55", "n_truth_higgs > 0", "n_truth_top == 0"]
+  colour: tab:blue
+  category: trigger-xbb
+- name: dRMatchedTop
+  label: Inclusive Top
+  cuts: ["n_truth_higgs == 0", "n_truth_top > 0"]
+  colour: "#A300A3"
+  category: trigger-xbb
+- name: dRMatchedQCD
+  label: QCD
+  cuts: ["n_truth_higgs == 0", "n_truth_top == 0"]
+  colour: "#38761D"
+  category: trigger-xbb

ftag/fraction_optimization.py ADDED Viewed

@@ -0,0 +1,184 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import numpy as np
+from scipy.optimize import minimize
+from ftag import Flavours
+from ftag.utils import calculate_rejection, get_discriminant, logger
+if TYPE_CHECKING:  # pragma: no cover
+    from ftag.labels import Label, LabelContainer
+def convert_dict(
+    fraction_values: dict | np.ndarray,
+    backgrounds: LabelContainer,
+) -> np.ndarray | dict:
+    if isinstance(fraction_values, dict):
+        return np.array([fraction_values[iter_bkg.frac_str] for iter_bkg in backgrounds])
+    if isinstance(fraction_values, np.ndarray):
+        fraction_values = [
+            float(frac_value / np.sum(fraction_values)) for frac_value in fraction_values
+        ]
+        return dict(zip([iter_bkg.frac_str for iter_bkg in backgrounds], fraction_values))
+    raise ValueError(
+        f"Only input of type `dict` or `np.ndarray` are accepted! You gave {type(fraction_values)}"
+    )
+def get_bkg_norm_dict(
+    jets: np.ndarray,
+    tagger: str,
+    signal: Label,
+    flavours: LabelContainer,
+    working_point: float,
+) -> dict:
+    # Init a dict for the bkg rejection norm values
+    bkg_rej_norm = {}
+    # Get the background classes
+    backgrounds = flavours.backgrounds(signal)
+    # Define a bool array if the jet is signal
+    is_signal = signal.cuts(jets).idx
+    # Loop over backgrounds
+    for bkg in backgrounds:
+        # Get the fraction value dict to maximize rejection for given class
+        frac_dict_bkg = {
+            iter_bkg.frac_str: 1 - (0.01 * len(backgrounds)) if iter_bkg == bkg else 0.01
+            for iter_bkg in backgrounds
+        }
+        # Calculate the disc value using the new fraction dict
+        disc = get_discriminant(
+            jets=jets,
+            tagger=tagger,
+            signal=signal,
+            flavours=flavours,
+            fraction_values=frac_dict_bkg,
+        )
+        # Calculate the discriminant
+        bkg_rej_norm[bkg.name] = calculate_rejection(
+            sig_disc=disc[is_signal],
+            bkg_disc=disc[bkg.cuts(jets).idx],
+            target_eff=working_point,
+        )
+    return bkg_rej_norm
+def calculate_rejection_sum(
+    fraction_dict: dict | np.ndarray,
+    jets: np.ndarray,
+    tagger: str,
+    signal: Label,
+    flavours: LabelContainer,
+    working_point: float,
+    bkg_norm_dict: dict,
+    rejection_weights: dict,
+) -> float:
+    # Get the background classes
+    backgrounds = flavours.backgrounds(signal)
+    # Define a bool array if the jet is signal
+    is_signal = signal.cuts(jets).idx
+    # Check that the fraction dict is a dict
+    if isinstance(fraction_dict, np.ndarray):
+        fraction_dict = convert_dict(
+            fraction_values=fraction_dict,
+            backgrounds=backgrounds,
+        )
+    # Calculate discriminant
+    disc = get_discriminant(
+        jets=jets,
+        tagger=tagger,
+        signal=signal,
+        flavours=flavours,
+        fraction_values=fraction_dict,
+    )
+    # Init a dict to which the bkg rejs are added
+    sum_bkg_rej = 0
+    # Loop over the backgrounds and calculate the rejections
+    for iter_bkg in backgrounds:
+        sum_bkg_rej += (
+            calculate_rejection(
+                sig_disc=disc[is_signal],
+                bkg_disc=disc[iter_bkg.cuts(jets).idx],
+                target_eff=working_point,
+            )
+            / bkg_norm_dict[iter_bkg.name]
+        ) * rejection_weights[iter_bkg.name]
+    # Return the negative sum to enable minimizer
+    return -1 * sum_bkg_rej
+def calculate_best_fraction_values(
+    jets: np.ndarray,
+    tagger: str,
+    signal: Label,
+    flavours: LabelContainer,
+    working_point: float,
+    rejection_weights: dict | None = None,
+    optimizer_method: str = "Powell",
+) -> dict:
+    logger.debug("Calculating best fraction values.")
+    logger.debug(f"tagger: {tagger}")
+    logger.debug(f"signal: {signal}")
+    logger.debug(f"flavours: {flavours}")
+    logger.debug(f"working_point: {working_point}")
+    logger.debug(f"rejection_weights: {rejection_weights}")
+    logger.debug(f"optimizer_method: {optimizer_method}")
+    # Ensure Label instance
+    if isinstance(signal, str):
+        signal = Flavours[signal]
+    # Get the background classes
+    backgrounds = flavours.backgrounds(signal)
+    # Define a default fraction dict
+    def_frac_dict = {iter_bkg.frac_str: 1 / len(backgrounds) for iter_bkg in backgrounds}
+    # Define rejection weights if not set
+    if rejection_weights is None:
+        rejection_weights = {iter_bkg.name: 1 for iter_bkg in backgrounds}
+    # Get the normalisation for all bkg rejections
+    bkg_norm_dict = get_bkg_norm_dict(
+        jets=jets,
+        tagger=tagger,
+        signal=signal,
+        flavours=flavours,
+        working_point=working_point,
+    )
+    # Get the best fraction values combination
+    result = minimize(
+        fun=calculate_rejection_sum,
+        x0=convert_dict(fraction_values=def_frac_dict, backgrounds=backgrounds),
+        method=optimizer_method,
+        bounds=[(0, 1)] * len(backgrounds),
+        args=(jets, tagger, signal, flavours, working_point, bkg_norm_dict, rejection_weights),
+    )
+    # Get the final fraction dict
+    final_frac_dict = convert_dict(fraction_values=result.x, backgrounds=backgrounds)
+    logger.info(f"Minimization Success: {result.success}")
+    logger.info("The following best fraction values were found:")
+    for frac_str, frac_value in final_frac_dict.items():
+        logger.info(f"{frac_str}: {round(frac_value, ndigits=3)}")
+    return final_frac_dict

ftag/labels.py CHANGED Viewed

@@ -62,6 +62,9 @@ class LabelContainer:
         except KeyError as e:
             raise KeyError(f"Label '{key}' not found") from e
+    def __len__(self) -> int:
+        return len(self.labels.keys())
     def __getattr__(self, name) -> Label:
         return self[name]
@@ -120,8 +123,13 @@ class LabelContainer:
     def from_list(cls, labels: list[Label]) -> LabelContainer:
         return cls({f.name: f for f in labels})
-    def backgrounds(self, label: Label, only_signals: bool = True) -> LabelContainer:
-        bkg = [f for f in self if f.category == label.category and f != label]
+    def backgrounds(self, signal: Label, only_signals: bool = True) -> LabelContainer:
+        bkg = [f for f in self if f.category == signal.category and f != signal]
         if not only_signals:
             bkg = [f for f in bkg if f.name not in {"ujets", "qcd"}]
+        if len(bkg) == 0:
+            raise TypeError(
+                "No background flavour could be found in the flavours for signal "
+                f"flavour {signal.name}"
+            )
         return LabelContainer.from_list(bkg)

ftag/utils/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+from __future__ import annotations
+from ftag.utils.logging import logger, set_log_level
+from ftag.utils.metrics import (
+    calculate_efficiency,
+    calculate_efficiency_error,
+    calculate_rejection,
+    calculate_rejection_error,
+    get_discriminant,
+    save_divide,
+    weighted_percentile,
+)
+__all__ = [
+    "calculate_efficiency",
+    "calculate_efficiency_error",
+    "calculate_rejection",
+    "calculate_rejection_error",
+    "get_discriminant",
+    "logger",
+    "save_divide",
+    "set_log_level",
+    "weighted_percentile",
+]

ftag/utils/logging.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""Configuration for logger of atlas-ftag-tools."""
+from __future__ import annotations
+import logging
+import os
+from typing import ClassVar
+class CustomFormatter(logging.Formatter):
+    """
+    Logging Formatter to add colours and count warning / errors using implementation
+    from
+    https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output.
+    """
+    grey = "\x1b[38;21m"
+    yellow = "\x1b[33;21m"
+    green = "\x1b[32;21m"
+    red = "\x1b[31;21m"
+    bold_red = "\x1b[31;1m"
+    reset = "\x1b[0m"
+    debugformat = "%(asctime)s - %(levelname)s:%(name)s: %(message)s (%(filename)s:%(lineno)d)"
+    date_format = "%(levelname)s:%(name)s: %(message)s"
+    formats: ClassVar = {
+        logging.DEBUG: grey + debugformat + reset,
+        logging.INFO: green + date_format + reset,
+        logging.WARNING: yellow + date_format + reset,
+        logging.ERROR: red + debugformat + reset,
+        logging.CRITICAL: bold_red + debugformat + reset,
+    }
+    def format(self, record):
+        log_fmt = self.formats.get(record.levelno)
+        formatter = logging.Formatter(log_fmt)
+        return formatter.format(record)
+def get_log_level(
+    level: str,
+):
+    """Get logging levels with string key.
+    Parameters
+    ----------
+    level : str
+        Log level as string.
+    Returns
+    -------
+    logging level
+        logging object with log level info
+    Raises
+    ------
+    ValueError
+        If non-valid option is given
+    """
+    log_levels = {
+        "CRITICAL": logging.CRITICAL,
+        "ERROR": logging.ERROR,
+        "WARNING": logging.WARNING,
+        "INFO": logging.INFO,
+        "DEBUG": logging.DEBUG,
+        "NOTSET": logging.NOTSET,
+    }
+    if level not in log_levels:
+        raise ValueError(f"The 'DebugLevel' option {level} is not valid.")
+    return log_levels[level]
+def initialise_logger(
+    log_level: str | None = None,
+):
+    """Initialise.
+    Parameters
+    ----------
+    log_level : str, optional
+        Logging level defining the verbose level. Accepted values are:
+        CRITICAL, ERROR, WARNING, INFO, DEBUG, NOTSET, by default None
+        If the log_level is not set, it will be set to info
+    Returns
+    -------
+    logger
+        logger object with new level set
+    """
+    retrieved_log_level = get_log_level(
+        os.environ.get("LOG_LEVEL", "INFO") if log_level is None else log_level
+    )
+    tools_logger = logging.getLogger("atlas-ftag-tools")
+    tools_logger.setLevel(retrieved_log_level)
+    ch_handler = logging.StreamHandler()
+    ch_handler.setLevel(retrieved_log_level)
+    ch_handler.setFormatter(CustomFormatter())
+    tools_logger.addHandler(ch_handler)
+    tools_logger.propagate = False
+    return tools_logger
+def set_log_level(
+    tools_logger,
+    log_level: str,
+):
+    """Setting log level.
+    Parameters
+    ----------
+    tools_logger : logger
+        logger object
+    log_level : str
+        Logging level corresponding CRITICAL, ERROR, WARNING, INFO, DEBUG, NOTSET
+    """
+    tools_logger.setLevel(get_log_level(log_level))
+    for handler in tools_logger.handlers:
+        handler.setLevel(get_log_level(log_level))
+logger = initialise_logger()

ftag/utils/metrics.py ADDED Viewed

@@ -0,0 +1,431 @@
+"""Tools for metrics module."""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import numpy as np
+from scipy.ndimage import gaussian_filter1d
+from ftag.utils import logger
+if TYPE_CHECKING:  # pragma: no cover
+    from ftag.labels import Label, LabelContainer
+def save_divide(
+    numerator: np.ndarray | float,
+    denominator: np.ndarray | float,
+    default: float = 1.0,
+):
+    """Save divide for denominator equal to 0.
+    Division using numpy divide function returning default value in cases where
+    denominator is 0.
+    Parameters
+    ----------
+    numerator: np.ndarray | float,
+        Numerator in the ratio calculation.
+    denominator: np.ndarray | float,
+        Denominator in the ratio calculation.
+    default: float
+        Default value which is returned if denominator is 0.
+    Returns
+    -------
+    ratio: np.ndarray | float
+        Result of the division
+    """
+    logger.debug("Calculating save division.")
+    logger.debug("numerator: %s", numerator)
+    logger.debug("denominator: %s", denominator)
+    logger.debug("default: %s", default)
+    if isinstance(numerator, (int, float, np.number)) and isinstance(
+        denominator, (int, float, np.number)
+    ):
+        output_shape = 1
+    else:
+        try:
+            output_shape = denominator.shape
+        except AttributeError:
+            output_shape = numerator.shape
+    ratio = np.divide(
+        numerator,
+        denominator,
+        out=np.ones(
+            output_shape,
+            dtype=float,
+        )
+        * default,
+        where=(denominator != 0),
+    )
+    if output_shape == 1:
+        return float(ratio)
+    return ratio
+def weighted_percentile(
+    arr: np.ndarray,
+    percentile: np.ndarray,
+    weights: np.ndarray = None,
+):
+    """Calculate weighted percentile.
+    Implementation according to https://stackoverflow.com/a/29677616/11509698
+    (https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method)
+    Parameters
+    ----------
+    arr : np.ndarray
+        Data array
+    percentile : np.ndarray
+        Percentile array
+    weights : np.ndarray
+        Weights array, by default None
+    Returns
+    -------
+    np.ndarray
+        Weighted percentile array
+    """
+    logger.debug("Calculating weighted percentile.")
+    logger.debug("arr: %s", arr)
+    logger.debug("percentile: %s", percentile)
+    logger.debug("weights: %s", weights)
+    # Set weights to one if no weights are given
+    if weights is None:
+        weights = np.ones_like(arr)
+    # Set dtype to float64 if the weights are too large
+    dtype = np.float64 if np.sum(weights) > 1000000 else np.float32
+    # Get an array sorting and sort the array and the weights
+    ix = np.argsort(arr)
+    arr = arr[ix]
+    weights = weights[ix]
+    # Return the cumulative sum
+    cdf = np.cumsum(weights, dtype=dtype) - 0.5 * weights
+    cdf -= cdf[0]
+    cdf /= cdf[-1]
+    # Return the linear interpolation
+    return np.interp(percentile, cdf, arr)
+def calculate_efficiency(
+    sig_disc: np.ndarray,
+    bkg_disc: np.ndarray,
+    target_eff: float | list | np.ndarray,
+    return_cuts: bool = False,
+    sig_weights: np.ndarray = None,
+    bkg_weights: np.ndarray = None,
+):
+    """Calculate efficiency.
+    Parameters
+    ----------
+    sig_disc : np.ndarray
+        Signal discriminant
+    bkg_disc : np.ndarray
+        Background discriminant
+    target_eff : float or list or np.ndarray
+        Working point which is used for discriminant calculation
+    return_cuts : bool
+        Specifies if the cut values corresponding to the provided WPs are returned.
+        If target_eff is a float, only one cut value will be returned. If target_eff
+        is an array, target_eff is an array as well.
+    sig_weights : np.ndarray
+        Weights for signal events
+    bkg_weights : np.ndarray
+        Weights for background events
+    Returns
+    -------
+    eff : float or np.ndarray
+        Efficiency.
+        Return float if target_eff is a float, else np.ndarray
+    cutvalue : float or np.ndarray
+        Cutvalue if return_cuts is True.
+        Return float if target_eff is a float, else np.ndarray
+    """
+    logger.debug("Calculating efficiency.")
+    logger.debug("sig_disc: %s", sig_disc)
+    logger.debug("bkg_disc: %s", bkg_disc)
+    logger.debug("target_eff: %s", target_eff)
+    logger.debug("return_cuts: %s", return_cuts)
+    logger.debug("sig_weights: %s", sig_weights)
+    logger.debug("bkg_weights: %s", bkg_weights)
+    # float | np.ndarray for both target_eff and the returned values
+    return_float = False
+    if isinstance(target_eff, float):
+        return_float = True
+    # Flatten the target efficiencies
+    target_eff = np.asarray([target_eff]).flatten()
+    # Get the cutvalue for the given target efficiency
+    cutvalue = weighted_percentile(arr=sig_disc, percentile=1.0 - target_eff, weights=sig_weights)
+    # Sort the cutvalues to get the correct order
+    sorted_args = np.argsort(1 - target_eff)
+    # Get the histogram for the backgrounds
+    hist, _ = np.histogram(bkg_disc, (-np.inf, *cutvalue[sorted_args], np.inf), weights=bkg_weights)
+    # Calculate the efficiencies for the calculated cut values
+    eff = hist[::-1].cumsum()[-2::-1] / hist.sum()
+    eff = eff[sorted_args]
+    # Ensure that a float is returned if float was given
+    if return_float:
+        eff = eff[0]
+        cutvalue = cutvalue[0]
+    # Also return the cuts if wanted
+    if return_cuts:
+        return eff, cutvalue
+    return eff
+def calculate_rejection(
+    sig_disc: np.ndarray,
+    bkg_disc: np.ndarray,
+    target_eff,
+    return_cuts: bool = False,
+    sig_weights: np.ndarray = None,
+    bkg_weights: np.ndarray = None,
+    smooth: bool = False,
+):
+    """Calculate rejection.
+    Parameters
+    ----------
+    sig_disc : np.ndarray
+        Signal discriminant
+    bkg_disc : np.ndarray
+        Background discriminant
+    target_eff : float or list
+        Working point which is used for discriminant calculation
+    return_cuts : bool
+        Specifies if the cut values corresponding to the provided WPs are returned.
+        If target_eff is a float, only one cut value will be returned. If target_eff
+        is an array, target_eff is an array as well.
+    sig_weights : np.ndarray
+        Weights for signal events, by default None
+    bkg_weights : np.ndarray
+        Weights for background events, by default None
+    Returns
+    -------
+    rej : float or np.ndarray
+        Rejection.
+        If target_eff is a float, a float is returned if it's a list a np.ndarray
+    cut_value : float or np.ndarray
+        Cutvalue if return_cuts is True.
+        If target_eff is a float, a float is returned if it's a list a np.ndarray
+    """
+    logger.debug("Calculating rejection.")
+    logger.debug("sig_disc: %s", sig_disc)
+    logger.debug("bkg_disc: %s", bkg_disc)
+    logger.debug("target_eff: %s", target_eff)
+    logger.debug("return_cuts: %s", return_cuts)
+    logger.debug("sig_weights: %s", sig_weights)
+    logger.debug("bkg_weights: %s", bkg_weights)
+    logger.debug("smooth: %s", smooth)
+    # Calculate the efficiency
+    eff = calculate_efficiency(
+        sig_disc=sig_disc,
+        bkg_disc=bkg_disc,
+        target_eff=target_eff,
+        return_cuts=return_cuts,
+        sig_weights=sig_weights,
+        bkg_weights=bkg_weights,
+    )
+    # Invert the efficiency to get a rejection
+    rej = save_divide(1, eff[0] if return_cuts else eff, np.inf)
+    # Smooth out the rejection if wanted
+    if smooth:
+        rej = gaussian_filter1d(rej, sigma=1, radius=2, mode="nearest")
+    # Return also the cut values if wanted
+    if return_cuts:
+        return rej, eff[1]
+    return rej
+def calculate_efficiency_error(
+    arr: np.ndarray,
+    n_counts: int,
+    suppress_zero_divison_error: bool = False,
+    norm: bool = False,
+) -> np.ndarray:
+    """Calculate statistical efficiency uncertainty.
+    Parameters
+    ----------
+    arr : numpy.array
+        Efficiency values
+    n_counts : int
+        Number of used statistics to calculate efficiency
+    suppress_zero_divison_error : bool
+        Not raising Error for zero division
+    norm : bool, optional
+        If True, normed (relative) error is being calculated, by default False
+    Returns
+    -------
+    numpy.array
+        Efficiency uncertainties
+    Raises
+    ------
+    ValueError
+        If n_counts <=0
+    Notes
+    -----
+    This method uses binomial errors as described in section 2.2 of
+    https://inspirehep.net/files/57287ac8e45a976ab423f3dd456af694
+    """
+    logger.debug("Calculating efficiency error.")
+    logger.debug("arr: %s", arr)
+    logger.debug("n_counts: %i", n_counts)
+    logger.debug("suppress_zero_divison_error: %s", suppress_zero_divison_error)
+    logger.debug("norm: %s", norm)
+    if np.any(n_counts <= 0) and not suppress_zero_divison_error:
+        raise ValueError(f"You passed as argument `N` {n_counts} but it has to be larger 0.")
+    if norm:
+        return np.sqrt(arr * (1 - arr) / n_counts) / arr
+    return np.sqrt(arr * (1 - arr) / n_counts)
+def calculate_rejection_error(
+    arr: np.ndarray,
+    n_counts: int,
+    norm: bool = False,
+) -> np.ndarray:
+    """Calculate the rejection uncertainties.
+    Parameters
+    ----------
+    arr : numpy.array
+        Rejection values
+    n_counts : int
+        Number of used statistics to calculate rejection
+    norm : bool, optional
+        If True, normed (relative) error is being calculated, by default False
+    Returns
+    -------
+    numpy.array
+        Rejection uncertainties
+    Raises
+    ------
+    ValueError
+        If n_counts <=0
+    ValueError
+        If any rejection value is 0
+    Notes
+    -----
+    Special case of `eff_err()`
+    """
+    logger.debug("Calculating rejection error.")
+    logger.debug("arr: %s", arr)
+    logger.debug("n_counts: %i", n_counts)
+    logger.debug("norm: %s", norm)
+    if np.any(n_counts <= 0):
+        raise ValueError(f"You passed as argument `n_counts` {n_counts} but it has to be larger 0.")
+    if np.any(arr == 0):
+        raise ValueError("One rejection value is 0, cannot calculate error.")
+    if norm:
+        return np.power(arr, 2) * calculate_efficiency_error(1 / arr, n_counts) / arr
+    return np.power(arr, 2) * calculate_efficiency_error(1 / arr, n_counts)
+def get_discriminant(
+    jets: np.ndarray,
+    tagger: str,
+    signal: Label,
+    flavours: LabelContainer,
+    fraction_values: dict[str, float],
+    epsilon: float = 1e-10,
+) -> np.ndarray:
+    """Calculate the tagging discriminant for a given tagger.
+    Calculated as the logarithm of the ratio of a specified signal probability
+    to a weighted sum ofbackground probabilities.
+    Parameters
+    ----------
+    jets : np.ndarray
+        Structured array of jets containing tagger outputs
+    tagger : str
+        Name of the tagger
+    signal : Label
+        Signal flavour (bjets/cjets or hbb/hcc)
+    fraction_values : dict
+        Dict with the fraction values for the background classes for the given tagger
+    epsilon : float, optional
+        Small number to avoid division by zero, by default 1e-10
+    Returns
+    -------
+    np.ndarray
+        Array of discriminant values.
+    Raises
+    ------
+    ValueError
+        If the signal flavour is not recognised.
+    """
+    # Init the denominator
+    denominator = 0.0
+    # Loop over background flavours
+    for flav in flavours:
+        # Skip signal flavour for denominator
+        if flav == signal:
+            continue
+        # Get the probability name of the tagger/flavour combo + fraction value
+        prob_name = f"{tagger}_{flav.px}"
+        fraction_value = fraction_values[flav.frac_str]
+        # If fraction_value for the given flavour is zero, skip it
+        if fraction_value == 0:
+            continue
+        # Check that the probability value for the flavour is available
+        if fraction_value > 0 and prob_name not in jets.dtype.names:
+            raise ValueError(
+                f"Nonzero fraction value for {flav.name}, but '{prob_name}' "
+                "not found in input array."
+            )
+        # Update denominator
+        denominator += jets[prob_name] * fraction_value if prob_name in jets.dtype.names else 0
+    # Calculate numerator
+    signal_field = f"{tagger}_{signal.px}"
+    # Check that the probability of the signal is available
+    if signal_field not in jets.dtype.names:
+        raise ValueError(
+            f"No signal probability value(s) found for tagger {tagger}. "
+            f"Missing variable: {signal_field}"
+        )
+    return np.log((jets[signal_field] + epsilon) / (denominator + epsilon))

ftag/{wps/working_points.py → working_points.py} RENAMED Viewed

@@ -14,7 +14,7 @@ from ftag import Flavours
 from ftag.cli_utils import HelpFormatter
 from ftag.cuts import Cuts
 from ftag.hdf5 import H5Reader
-from ftag.wps.discriminant import get_discriminant
+from ftag.utils import get_discriminant
 if TYPE_CHECKING:  # pragma: no cover
     from collections.abc import Sequence

ftag/wps/__init__.py DELETED Viewed

File without changes

ftag/wps/discriminant.py DELETED Viewed

@@ -1,84 +0,0 @@
-from __future__ import annotations
-from typing import TYPE_CHECKING
-import numpy as np
-if TYPE_CHECKING:  # pragma: no cover
-    from ftag.labels import Label, LabelContainer
-def get_discriminant(
-    jets: np.ndarray,
-    tagger: str,
-    signal: Label,
-    flavours: LabelContainer,
-    fraction_values: dict[str, float],
-    epsilon: float = 1e-10,
-) -> np.ndarray:
-    """Calculate the tagging discriminant for a given tagger.
-    Calculated as the logarithm of the ratio of a specified signal probability
-    to a weighted sum ofbackground probabilities.
-    Parameters
-    ----------
-    jets : np.ndarray
-        Structured array of jets containing tagger outputs
-    tagger : str
-        Name of the tagger
-    signal : Label
-        Signal flavour (bjets/cjets or hbb/hcc)
-    fraction_values : dict
-        Dict with the fraction values for the background classes for the given tagger
-    epsilon : float, optional
-        Small number to avoid division by zero, by default 1e-10
-    Returns
-    -------
-    np.ndarray
-        Array of discriminant values.
-    Raises
-    ------
-    ValueError
-        If the signal flavour is not recognised.
-    """
-    # Init the denominator
-    denominator = 0.0
-    # Loop over background flavours
-    for flav in flavours:
-        # Skip signal flavour for denominator
-        if flav == signal:
-            continue
-        # Get the probability name of the tagger/flavour combo + fraction value
-        prob_name = f"{tagger}_{flav.px}"
-        fraction_value = fraction_values[flav.frac_str]
-        # If fraction_value for the given flavour is zero, skip it
-        if fraction_value == 0:
-            continue
-        # Check that the probability value for the flavour is available
-        if fraction_value > 0 and prob_name not in jets.dtype.names:
-            raise ValueError(
-                f"Nonzero fraction value for {flav.name}, but '{prob_name}' "
-                "not found in input array."
-            )
-        # Update denominator
-        denominator += jets[prob_name] * fraction_value if prob_name in jets.dtype.names else 0
-    # Calculate numerator
-    signal_field = f"{tagger}_{signal.px}"
-    # Check that the probability of the signal is available
-    if signal_field not in jets.dtype.names:
-        raise ValueError(
-            f"No signal probability value(s) found for tagger {tagger}. "
-            f"Missing variable: {signal_field}"
-        )
-    return np.log((jets[signal_field] + epsilon) / (denominator + epsilon))

{atlas_ftag_tools-0.2.9.dist-info → atlas_ftag_tools-0.2.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

atlas-ftag-tools 0.2.9__py3-none-any.whl → 0.2.10__py3-none-any.whl

atlas-ftag-tools 0.2.9py3-none-any.whl → 0.2.10py3-none-any.whl