PyPI - atlas-ftag-tools - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

atlas-ftag-tools 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/METADATA +1 -1
{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/RECORD +12 -10
{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/WHEEL +1 -1
ftag/__init__.py +1 -1
ftag/cuts.py +3 -1
ftag/flavour.py +7 -1
ftag/hdf5/h5reader.py +10 -12
ftag/labeller.py +88 -0
ftag/mock.py +27 -12
ftag/track_selector.py +70 -0
{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/entry_points.txt +0 -0
{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/top_level.txt +0 -0

{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: atlas-ftag-tools
-Version: 0.2.2
+Version: 0.2.4
 Summary: ATLAS Flavour Tagging Tools
 Author: Sam Van Stroud, Philipp Gadow
 License: MIT

{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/RECORD RENAMED Viewed

@@ -1,26 +1,28 @@
-ftag/__init__.py,sha256=7IKOa65yKaQWsx6-s7VVQs4t1NQ9hyVAOlj-U5m-VBk,629
+ftag/__init__.py,sha256=Mx2Emsw4TM1YL0wTQMHK36EaQE_ImeV6ukiz1X5BZAU,629
 ftag/cli_utils.py,sha256=w3TtQmUHSyAKChS3ewvOtcSDAUJAZGIIomaNi8f446U,298
-ftag/cuts.py,sha256=a0BJj4cVRunc-hFLPloGvNoSFvRmZg2kVLv7sA0iAaI,2817
-ftag/flavour.py,sha256=qvgp4DarOdcQgjae_NWnd81k_YqdmFY74lOKky2lpb8,3568
+ftag/cuts.py,sha256=9_ooLZHaO3SnIQBNxwbaPZn-qptGdKnB27FdKQGTiTY,2933
+ftag/flavour.py,sha256=EMZZLyl6lSdvkfrYxHhMcSn3aqP_FU7OpCFkvZpTksU,3761
 ftag/flavours.yaml,sha256=lFnVwjh_DwLhOc3mr5n6bSIWyHgxQvAXas4lEmEDncU,7520
 ftag/git_check.py,sha256=Y-XqM80CVXZ5ZKrDdZcYOJt3X64uU6W3OP6Z0D7AZU0,1663
-ftag/mock.py,sha256=QAm0ti6FWDCRtIyay4yozbGNNATDQbq5b1uc8uVhi2s,4275
+ftag/labeller.py,sha256=uDygOhVGSNn96DWw8aErHpTtFsFX0RnxYYpy4g1FRog,2457
+ftag/mock.py,sha256=_oy-r3eLllFy33NAoZaKfAx-Rp2vrCdrGj3UsTMks94,4740
 ftag/region.py,sha256=ANv0dGI2W6NJqD9fp7EfqAUReH4FOjc1gwl_Qn8llcM,360
 ftag/sample.py,sha256=TFXMhDkbPmjkms9-b-bINJ32T3bO86JcU70C0nY7wa8,2500
 ftag/test_cli_utils.py,sha256=xa08vf6SEOow58SSFagYdAselb-dkNOVvWsWheMnW-g,1001
+ftag/track_selector.py,sha256=piSYAN_IkOsrXxKXjXbJpMSseUig5P2BJW5mCwsMUDM,2535
 ftag/transform.py,sha256=uEGGJSnqoKOzLYQv650XdK_kDNw4Aw-5dc60z9Dp_y0,3963
 ftag/vds.py,sha256=nRViQZQIORB95nC7NZsW3KsSoGkLzEdOsuCViH5h8-U,3296
 ftag/hdf5/__init__.py,sha256=LFDNxVOCp58SvLHwQhdT68Q-KBMS_i6jBrbXoRpHzbM,354
 ftag/hdf5/h5move.py,sha256=oYpRu0IDCIJIQ2ML52HBAdoyDxmKkHTeM9JdbPEgKfI,947
-ftag/hdf5/h5reader.py,sha256=H_5Aw0lOyEzK_phMRhD-jR_OSCsXnCA3qJZnRvPqaRU,13569
+ftag/hdf5/h5reader.py,sha256=i31pDAqmOSaxdeRhc4iSBlld8xJ0pmp4rNd7CugNzw0,13706
 ftag/hdf5/h5split.py,sha256=4Wy6Xc3J58MdD9aBaSZHf5ZcVFnJSkWsm42R5Pgo-R4,2448
 ftag/hdf5/h5utils.py,sha256=-4zKTMtNCrDZr_9Ww7uzfsB7M7muBKpmm_1IkKJnHOI,3222
 ftag/hdf5/h5writer.py,sha256=j3Fy8snkiVVfimiUz3rrZOhSV8OF27978Y9pk0QcTGM,5277
 ftag/wps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ftag/wps/discriminant.py,sha256=kJFekUTPNIvCabJCon6OqOAQEzz5hj3XrWFFRLOgGOs,3836
 ftag/wps/working_points.py,sha256=VTU6OD40ULAJQD0MlD1EZd33q8ociUvFX1YrhgJFvXc,9722
-atlas_ftag_tools-0.2.2.dist-info/METADATA,sha256=y2fq23cqtkaoUQxEiCrdoTVuBcG154yjo4k4cwf8P-A,5169
-atlas_ftag_tools-0.2.2.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
-atlas_ftag_tools-0.2.2.dist-info/entry_points.txt,sha256=LfVLsZHQolqbPnwPgtmc5IQTh527BKkN2v-IpXWTNHw,137
-atlas_ftag_tools-0.2.2.dist-info/top_level.txt,sha256=qiYQuKcAvMim-31FwkT3MTQu7WQm0s58tPAia5KKWqs,5
-atlas_ftag_tools-0.2.2.dist-info/RECORD,,
+atlas_ftag_tools-0.2.4.dist-info/METADATA,sha256=f4aCu6JmItUBp5EmTzbrqhC5-Wsy7uiOiBO9yufyacQ,5169
+atlas_ftag_tools-0.2.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+atlas_ftag_tools-0.2.4.dist-info/entry_points.txt,sha256=LfVLsZHQolqbPnwPgtmc5IQTh527BKkN2v-IpXWTNHw,137
+atlas_ftag_tools-0.2.4.dist-info/top_level.txt,sha256=qiYQuKcAvMim-31FwkT3MTQu7WQm0s58tPAia5KKWqs,5
+atlas_ftag_tools-0.2.4.dist-info/RECORD,,

{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (74.0.0)
+Generator: setuptools (75.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

ftag/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from __future__ import annotations
-__version__ = "v0.2.2"
+__version__ = "v0.2.4"
 from ftag import hdf5

ftag/cuts.py CHANGED Viewed

@@ -79,7 +79,9 @@ class Cuts:
     def ignore(self, variables: list[str]):
         return Cuts(tuple(c for c in self if c.variable not in variables))
-    def __call__(self, array) -> CutsResult:
+    def __call__(self, array: np.ndarray) -> CutsResult:
+        if array.ndim == 2:
+            raise ValueError("This interface only supports jet selections")
         keep = np.arange(len(array))
         for cut in self.cuts:
             idx = cut(array)

ftag/flavour.py CHANGED Viewed

@@ -42,6 +42,9 @@ class Flavour:
     def __str__(self) -> str:
         return self.name
+    def __lt__(self, other) -> bool:
+        return self.name < other.name
 @dataclass
 class FlavourContainer:
@@ -81,7 +84,10 @@ class FlavourContainer:
         return list(dict.fromkeys(f.category for f in self))
     def by_category(self, category: str) -> FlavourContainer:
-        return FlavourContainer({k: v for k, v in self.flavours.items() if v.category == category})
+        f = FlavourContainer({k: v for k, v in self.flavours.items() if v.category == category})
+        if not f.flavours:
+            raise KeyError(f"No flavours with category '{category}' found")
+        return f
     def from_cuts(self, cuts: list | Cuts) -> Flavour:
         if isinstance(cuts, list):

ftag/hdf5/h5reader.py CHANGED Viewed

@@ -150,8 +150,13 @@ class H5Reader:
     transform : Transform | None, optional
         Transform to apply to data, by default None
     equal_jets : bool, optional
-        Take the same number of jets (weighted) from each sample, by default True
-        If False, use all jets in each sample.
+        Take the same number of jets (weighted) from each sample, by default True.
+        This is useful when you specify a list of DSIDs for the sample and they are
+        qualitatively different, and you want to ensure that you always return batches
+        with jets from all DSIDs. This is used for example in the QCD resampling for Xbb.
+        If False, use all jets in each sample, allowing for the full available statistics
+        to be used. Useful for example if you have multiple ttbar samples and you want to
+        use all available jets from each sample.
     """
     fname: Path | str | list[Path | str]
@@ -162,17 +167,10 @@ class H5Reader:
     weights: list[float] | None = None
     do_remove_inf: bool = False
     transform: Transform | None = None
-    equal_jets: bool = True
+    equal_jets: bool = False
     def __post_init__(self) -> None:
         self.rng = np.random.default_rng(42)
-        if not self.equal_jets:
-            log.warning(
-                "equal_jets is set to False, which will result in different number of jets taken"
-                " from each sample. Be aware that this can affect the resampling, so make sure you"
-                " know what you are doing."
-            )
         if isinstance(self.fname, (str, Path)):
             self.fname = [self.fname]
@@ -283,8 +281,8 @@ class H5Reader:
                     try:
                         samples.append(next(stream))
-                    # if equal_jets is True, we can stop when any stream is done
-                    # otherwise if sample is exhausted, mark it as done
+                    # if equal_jets is True, stop when any sample is done
+                    # otherwise if stream is exhausted, mark it as such and continue
                     except StopIteration:
                         if self.equal_jets:
                             return

ftag/labeller.py ADDED Viewed

@@ -0,0 +1,88 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from ftag import Flavours
+from ftag.flavour import Flavour, FlavourContainer
+from ftag.hdf5 import join_structured_arrays, structured_from_dict
+@dataclass
+class Labeller:
+    """
+    Defines a labelling scheme.
+    Labels are [0, ..., n] and are assigned using pre-defined selections.
+    Parameters
+    ----------
+    labels : FlavourContainer | list[str | Flavour]
+        The labels to be use.
+    require_labels : bool
+        Whether to require that all objects are labelled.
+    """
+    labels: FlavourContainer | list[str | Flavour]
+    require_labels: bool = True
+    def __post_init__(self) -> None:
+        if isinstance(self.labels, FlavourContainer):
+            self.labels = list(self.labels)
+        self.labels = sorted([Flavours[label] for label in self.labels])
+    def get_labels(self, array: np.ndarray) -> np.ndarray:
+        """
+        Returns the labels for the given array.
+        Parameters
+        ----------
+        array : np.ndarray
+            The array to label.
+        Returns
+        -------
+        np.ndarray
+            The labels for the given array.
+        Raises
+        ------
+        ValueError
+            If the `require_labels` attribute is set to `True` and some objects were not labelled.
+        """
+        labels = -1 * np.ones_like(array, dtype=int)
+        for i, label in enumerate(self.labels):
+            labels[label.cuts(array).idx] = i
+        if self.require_labels and -1 in labels:
+            raise ValueError("Some objects were not labelled")
+        return labels[labels != -1]
+    def add_labels(self, array: np.ndarray, label_name: str = "labels") -> np.ndarray:
+        """
+        Adds the labels to the given array.
+        Parameters
+        ----------
+        array : np.ndarray
+            The array to label.
+        label_name : str
+            The name of the label column.
+        Returns
+        -------
+        np.ndarray
+            The array with the labels added.
+        Raises
+        ------
+        ValueError
+            If the `require_labels` attribute is set to `False`.
+        """
+        if not self.require_labels:
+            raise ValueError("Cannot add labels if require_labels is set to False")
+        labels = self.get_labels(array)
+        labels = structured_from_dict({label_name: labels})
+        return join_structured_arrays([array, labels])

ftag/mock.py CHANGED Viewed

@@ -84,12 +84,7 @@ def get_mock_scores(labels: np.ndarray, is_xbb: bool = False):
     return u2s(scores, dtype=np.dtype([(name, "f4") for name in cols]))
-def get_mock_file(
-    num_jets=1000,
-    fname: str | None = None,
-    tracks_name: str = "tracks",
-    num_tracks: int = 40,
-) -> tuple[str, h5py.File]:
+def mock_jets(num_jets=1000) -> np.ndarray:
     # setup jets
     rng = np.random.default_rng(42)
     jets_dtype = np.dtype(JET_VARS)
@@ -106,7 +101,31 @@ def get_mock_file(
     jets["R10TruthLabel_R22v1"] = rng.choice([1, 10, 11, 12], size=num_jets)
     scores = get_mock_scores(jets["HadronConeExclTruthLabelID"])
     xbb_scores = get_mock_scores(jets["R10TruthLabel_R22v1"], is_xbb=True)
-    jets = join_structured_arrays([jets, scores, xbb_scores])
+    return join_structured_arrays([jets, scores, xbb_scores])
+def mock_tracks(num_jets=1000, num_tracks=40) -> np.ndarray:
+    rng = np.random.default_rng(42)
+    tracks_dtype = np.dtype(TRACK_VARS)
+    tracks = u2s(rng.random((num_jets, num_tracks, len(TRACK_VARS))), tracks_dtype)
+    tracks["d0"] *= 5
+    # for the shared hits, add some reasonable integer values
+    tracks["numberOfPixelSharedHits"] = rng.integers(0, 3, size=(num_jets, num_tracks))
+    tracks["numberOfSCTSharedHits"] = rng.integers(0, 3, size=(num_jets, num_tracks))
+    valid = rng.choice([True, False], size=(num_jets, num_tracks))
+    valid = valid.astype(bool).view(dtype=np.dtype([("valid", bool)]))
+    return join_structured_arrays([tracks, valid])
+def get_mock_file(
+    num_jets=1000,
+    fname: str | None = None,
+    tracks_name: str = "tracks",
+    num_tracks: int = 40,
+) -> tuple[str, h5py.File]:
+    jets = mock_jets(num_jets)
     # create a tempfile in a new folder
     if fname is None:
@@ -120,11 +139,7 @@ def get_mock_file(
     # setup tracks
     if tracks_name:
-        tracks_dtype = np.dtype(TRACK_VARS)
-        tracks = u2s(rng.random((num_jets, num_tracks, len(TRACK_VARS))), tracks_dtype)
-        valid = rng.choice([True, False], size=(num_jets, num_tracks))
-        valid = valid.astype(bool).view(dtype=np.dtype([("valid", bool)]))
-        tracks = join_structured_arrays([tracks, valid])
+        tracks = mock_tracks(num_jets, num_tracks)
         f.create_dataset(tracks_name, data=tracks)
     return fname, f

ftag/track_selector.py ADDED Viewed

@@ -0,0 +1,70 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from ftag.cuts import Cut, Cuts
+@dataclass
+class TrackSelector:
+    """
+    Apply track selections to a set of tracks stored in a structured numpy array.
+    The array is assumed to have shape (n_jets, n_tracks, n_features).
+    Applying cuts will NaN out the tracks that do not pass the cuts,
+    but leave the shape of the array unchanged.
+    Parameters
+    ----------
+    cuts : Cuts
+        The cuts to apply to the tracks
+    valid_str : str
+        The name of the field in the tracks that indicates whether the track is
+    """
+    cuts: Cuts
+    valid_str: str = "valid"
+    def __call__(self, tracks: np.ndarray) -> np.ndarray:
+        # get a bool array for all tracks passing before any cuts
+        rm_idx = np.zeros_like(tracks[self.valid_str], dtype=bool)
+        # apply the cuts
+        for cut in self.cuts.cuts:
+            # remove valid track indices that do not pass the selection
+            keep_idx = self._nshared_cut(cut, tracks) if cut.variable == "NSHARED" else cut(tracks)
+            rm_idx[tracks[self.valid_str] & ~keep_idx] = True
+        # set the values of the tracks that do not pass the cuts to
+        for var in tracks.dtype.names:
+            if issubclass(tracks[var].dtype.type, np.floating):
+                tracks[var][rm_idx] = np.nan
+            elif issubclass(tracks[var].dtype.type, np.integer):
+                tracks[var][rm_idx] = -1
+            elif issubclass(tracks[var].dtype.type, np.bool_):
+                tracks[var][rm_idx] = False
+            else:
+                raise TypeError(f"Unknown dtype {tracks[var].dtype}")
+        # specifically set the valid flag to false (even though it's already false by now)
+        tracks[rm_idx][self.valid_str] = False
+        return tracks
+    def _nshared_cut(self, cut: Cut, tracks: np.ndarray) -> np.ndarray:
+        # hack to apply the FTAG shared hit cut, which requires an intermediate step
+        if cut.variable == "NSHARED" and "NSHARED" in tracks.dtype.names:
+            raise ValueError("NSHARED is a reserved variable name")
+        # compute
+        n_pix_shared = tracks["numberOfPixelSharedHits"]
+        n_sct_shared = tracks["numberOfSCTSharedHits"]
+        n_module_shared = n_pix_shared + n_sct_shared / 2
+        # convert n_module_shared to structured array
+        n_module_shared = n_module_shared.view(dtype=[(cut.variable, n_module_shared.dtype)])
+        # select
+        return cut(n_module_shared)

{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{atlas_ftag_tools-0.2.2.dist-info → atlas_ftag_tools-0.2.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

atlas-ftag-tools 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

atlas-ftag-tools 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl