PyPI - atlas-ftag-tools - Versions diffs - 0.0.4__tar.gz → 0.0.6__tar.gz - Mend

atlas-ftag-tools 0.0.4tar.gz → 0.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/PKG-INFO RENAMED Viewed

@@ -1,17 +1,18 @@
 Metadata-Version: 2.1
 Name: atlas-ftag-tools
-Version: 0.0.4
+Version: 0.0.6
 Summary: ATLAS Flavour Tagging Tools
 Author: Sam Van Stroud, Philipp Gadow
 License: MIT
 Project-URL: Homepage, https://github.com/umami-hep/atlas-ftag-tools/
-Requires-Python: >=3.10
+Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 Provides-Extra: dev
 # ATLAS FTAG Python Tools
 This is a collection of Python tools for working with files produced with the FTAG [ntuple dumper](https://gitlab.cern.ch/atlas-flavor-tagging-tools/training-dataset-dumper/).
+The code is intended to be used a [library](https://iscinumpy.dev/post/app-vs-library/) for other projects.
 Please see the [example notebook](ftag/example.ipynb) for usage.
 ## Installation

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/README.md RENAMED Viewed

@@ -1,6 +1,7 @@
 # ATLAS FTAG Python Tools
 This is a collection of Python tools for working with files produced with the FTAG [ntuple dumper](https://gitlab.cern.ch/atlas-flavor-tagging-tools/training-dataset-dumper/).
+The code is intended to be used a [library](https://iscinumpy.dev/post/app-vs-library/) for other projects.
 Please see the [example notebook](ftag/example.ipynb) for usage.
 ## Installation

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/atlas_ftag_tools.egg-info/PKG-INFO RENAMED Viewed

@@ -1,17 +1,18 @@
 Metadata-Version: 2.1
 Name: atlas-ftag-tools
-Version: 0.0.4
+Version: 0.0.6
 Summary: ATLAS Flavour Tagging Tools
 Author: Sam Van Stroud, Philipp Gadow
 License: MIT
 Project-URL: Homepage, https://github.com/umami-hep/atlas-ftag-tools/
-Requires-Python: >=3.10
+Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 Provides-Extra: dev
 # ATLAS FTAG Python Tools
 This is a collection of Python tools for working with files produced with the FTAG [ntuple dumper](https://gitlab.cern.ch/atlas-flavor-tagging-tools/training-dataset-dumper/).
+The code is intended to be used a [library](https://iscinumpy.dev/post/app-vs-library/) for other projects.
 Please see the [example notebook](ftag/example.ipynb) for usage.
 ## Installation

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/atlas_ftag_tools.egg-info/SOURCES.txt RENAMED Viewed

@@ -10,6 +10,7 @@ ftag/__init__.py
 ftag/cuts.py
 ftag/flavour.py
 ftag/flavours.yaml
+ftag/mock.py
 ftag/region.py
 ftag/sample.py
 ftag/vds.py

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/atlas_ftag_tools.egg-info/requires.txt RENAMED Viewed

@@ -1,6 +1,6 @@
-h5py==3.8.0
-numpy==1.24.*
-PyYAML==6.0
+h5py>=3.0
+numpy
+PyYAML>=5.1
 [dev]
 black==23.1.0

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/__init__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """atlas-ftag-tools - Common tools for ATLAS flavour tagging software."""
-__version__ = "v0.0.4"
+__version__ = "v0.0.6"
 from pathlib import Path
@@ -10,6 +10,7 @@ import yaml
 import ftag.hdf5 as hdf5
 from ftag.cuts import Cuts
 from ftag.flavour import Flavour, FlavourContainer
+from ftag.mock import get_mock_file
 from ftag.sample import Sample
 # load flavours
@@ -19,4 +20,4 @@ flavours_dict = {f["name"]: Flavour(cuts=Cuts.from_list(f.pop("cuts")), **f) for
 assert len(flavours_dict) == len(flavours_yaml), "Duplicate flavour names detected"
 Flavours = FlavourContainer(flavours_dict)
-__all__ = ["Cuts", "Flavours", "Sample", "hdf5", "__version__"]
+__all__ = ["Cuts", "Flavours", "Sample", "hdf5", "get_mock_file", "__version__"]

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/flavour.py RENAMED Viewed

@@ -16,7 +16,17 @@ class Flavour:
     @property
     def px(self) -> str:
-        return f"p{self.name.removesuffix('jets')}"
+        if self.name.endswith("jets"):
+            return f"p{self.name[: -len('jets')]}"
+        return f"p{self.name}"
+    @property
+    def eff_str(self) -> str:
+        return self.label.replace("jets", "jet") + " efficiency"
+    @property
+    def rej_str(self) -> str:
+        return self.label.replace("jets", "jet") + " rejection"
     def __str__(self) -> str:
         return self.name
@@ -38,6 +48,11 @@ class FlavourContainer:
     def __getattr__(self, name) -> Flavour:
         return self[name]
+    def __contains__(self, flavour: str | Flavour) -> bool:
+        if isinstance(flavour, Flavour):
+            flavour = flavour.name
+        return flavour in self.flavours
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}({', '.join(list(f.name for f in self))})"
@@ -47,3 +62,11 @@ class FlavourContainer:
     def by_category(self, category: str) -> FlavourContainer:
         return FlavourContainer({k: v for k, v in self.flavours.items() if v.category == category})
+    def from_cuts(self, cuts: list | Cuts) -> Flavour:
+        if isinstance(cuts, list):
+            cuts = Cuts.from_list(cuts)
+        for flavour in self:
+            if flavour.cuts == cuts:
+                return flavour
+        raise KeyError(f"Flavour with {cuts} not found")

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/hdf5/__init__.py RENAMED Viewed

@@ -1,17 +1,13 @@
 from ftag.hdf5.h5reader import H5Reader
-from ftag.hdf5.h5utils import (
-    cast_dtype,
-    get_dtype,
-    get_dummy_file,
-    join_structured_arrays,
-)
+from ftag.hdf5.h5utils import cast_dtype, get_dtype, join_structured_arrays
 from ftag.hdf5.h5writer import H5Writer
+from ftag.mock import get_mock_file
 __all__ = [
     "H5Reader",
     "H5Writer",
-    "get_dummy_file",
     "get_dtype",
     "cast_dtype",
     "join_structured_arrays",
+    "get_mock_file",
 ]

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/hdf5/h5reader.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import logging as log
 import math
 from collections.abc import Generator
@@ -58,43 +60,28 @@ class H5SingleReader:
                     )
         return {name: array[keep_idx] for name, array in data.items()}
-    def stream(self, variables: dict, num_jets: int, cuts: Cuts | None = None) -> Generator:
-        """Generate batches of selected jets.
-        Parameters
-        ----------
-        variables : dict
-            Dictionary of variables to for each group.
-        num_jets : int
-            Total number of selected jets to generate.
-        cuts : Cuts | None, optional
-            Cuts to apply, by default None
-        Yields
-        ------
-        Generator
-            Generator of batches of selected jets.
-        Raises
-        ------
-        ValueError
-            If more jets are requested than available.
-        """
+    def stream(
+        self, variables: dict | None = None, num_jets: int | None = None, cuts: Cuts | None = None
+    ) -> Generator:
+        if num_jets is None:
+            num_jets = self.num_jets
         if num_jets > self.num_jets:
             raise ValueError(
                 f"{num_jets:,} jets requested but only {self.num_jets:,} available in {self.fname}"
             )
-        jet_vars = list(variables.get(self.jets_name, []))
-        variables[self.jets_name] = jet_vars + (cuts.variables if cuts else [])
+        if variables is None:
+            variables = {self.jets_name: None}
         total = 0
+        rng = np.random.default_rng(42)
         with h5py.File(self.fname) as f:
             data = {name: self.empty(f[name], var) for name, var in variables.items()}
             # get indices
             indices = list(range(0, self.num_jets, self.batch_size))
             if self.shuffle:
-                np.random.default_rng(42).shuffle(indices)
+                rng.shuffle(indices)
             # loop over batches and read file
             for low in indices:
@@ -130,7 +117,7 @@ class H5Reader:
     weights: list[float] | None = None
     def __post_init__(self) -> None:
-        if isinstance(self.fname, str | Path):
+        if isinstance(self.fname, (str, Path)):
             self.fname = [self.fname]
         # calculate batch sizes
@@ -141,7 +128,7 @@ class H5Reader:
         # create readers
         self.readers = [
             H5SingleReader(fname, batch_size, self.jets_name, self.precision, self.shuffle)
-            for fname, batch_size in zip(self.fname, self.batch_sizes, strict=True)
+            for fname, batch_size in zip(self.fname, self.batch_sizes)
         ]
     @property
@@ -158,7 +145,33 @@ class H5Reader:
         with h5py.File(self.readers[0].fname) as f:
             return f[name].dtype
-    def stream(self, variables: dict, num_jets, cuts: Cuts | None = None) -> Generator:
+    def stream(
+        self, variables: dict | None = None, num_jets: int | None = None, cuts: Cuts | None = None
+    ) -> Generator:
+        """Generate batches of selected jets.
+        Parameters
+        ----------
+        variables : dict | None, optional
+            Dictionary of variables to for each group, by default use all jet variables.
+        num_jets : int | None, optional
+            Total number of selected jets to generate, by default all.
+        cuts : Cuts | None, optional
+            Selection cuts to apply, by default None
+        Yields
+        ------
+        Generator
+            Generator of batches of selected jets.
+        """
+        if num_jets is None:
+            num_jets = self.num_jets
+        if variables is None:
+            variables = {self.jets_name: None}
+        if self.jets_name not in variables or variables[self.jets_name] is not None:
+            jet_vars = variables.get(self.jets_name, [])
+            variables[self.jets_name] = list(jet_vars) + (cuts.variables if cuts else [])
         # get streams for selected jets from each reader
         streams = [
             r.stream(variables, int(r.num_jets / self.num_jets * num_jets), cuts)
@@ -184,11 +197,16 @@ class H5Reader:
             # select
             yield data
-    def load(self, variables: dict, num_jets: int, cuts: Cuts | None = None) -> dict:
+    def load(
+        self, variables: dict | None = None, num_jets: int | None = None, cuts: Cuts | None = None
+    ) -> dict:
+        if variables is None:
+            variables = {self.jets_name: None}
         data: dict[str, list] = {name: [] for name in variables}
         for sample in self.stream(variables, num_jets, cuts):
             for name, array in sample.items():
-                data[name].append(array)
+                if name in data:
+                    data[name].append(array)
         return {name: np.concatenate(array) for name, array in data.items()}
     def estimate_available_jets(self, cuts: Cuts, num: int = 1_000_000) -> int:

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/hdf5/h5utils.py RENAMED Viewed

@@ -1,10 +1,8 @@
-from tempfile import NamedTemporaryFile, mkdtemp
+from __future__ import annotations
-import h5py
 import numpy as np
-from numpy.lib.recfunctions import unstructured_to_structured as u2s
-__all__ = ["get_dummy_file", "join_structured_arrays"]
+__all__ = ["join_structured_arrays"]
 def get_dtype(ds, variables: list[str] | None = None, precision: str | None = None) -> np.dtype:
@@ -74,58 +72,6 @@ def cast_dtype(typestr: str, precision: str) -> np.dtype:
     raise ValueError(f"Invalid precision {precision}")
-def get_dummy_file():
-    jet_vars = [
-        "pt",
-        "eta",
-        "abs_eta",
-        "mass",
-        "HadronConeExclTruthLabelID",
-        "n_tracks",
-        "n_truth_promptLepton",
-    ]
-    track_vars = ["pt", "deta", "dphi", "dr"]
-    # settings
-    n_jets = 1000
-    n_tracks_per_jet = 40
-    # setup jets
-    shapes_jets = {
-        "inputs": [n_jets, len(jet_vars)],
-    }
-    # setup tracks
-    shapes_tracks = {
-        "inputs": [n_jets, n_tracks_per_jet, len(track_vars)],
-        "valid": [n_jets, n_tracks_per_jet],
-    }
-    # setup jets
-    rng = np.random.default_rng()
-    jets_dtype = np.dtype([(n, "f4") for n in jet_vars])
-    jets = u2s(rng.random(shapes_jets["inputs"]), jets_dtype)
-    jets["HadronConeExclTruthLabelID"] = np.random.choice([0, 4, 5], size=n_jets)
-    jets["pt"] *= 400e3
-    jets["eta"] = (jets["eta"] - 0.5) * 6.0
-    jets["abs_eta"] = np.abs(jets["eta"])
-    # setup tracks
-    tracks_dtype = np.dtype([(n, "f4") for n in track_vars])
-    tracks = u2s(rng.random(shapes_tracks["inputs"]), tracks_dtype)
-    valid = rng.random(shapes_tracks["valid"])
-    valid = valid.astype(bool).view(dtype=np.dtype([("valid", bool)]))
-    tracks = join_structured_arrays([tracks, valid])
-    fname = NamedTemporaryFile(suffix=".h5", dir=mkdtemp()).name
-    f = h5py.File(fname, "w")
-    f.create_dataset("jets", data=jets)
-    f.create_dataset("tracks", data=tracks)
-    f.create_dataset("flow", data=tracks)
-    return fname, f
 def join_structured_arrays(arrays: list):
     """Join a list of structured numpy arrays.

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/hdf5/h5writer.py RENAMED Viewed

@@ -1,5 +1,8 @@
+from __future__ import annotations
 from dataclasses import dataclass
 from pathlib import Path
+from subprocess import check_output
 import h5py
 import numpy as np
@@ -27,6 +30,8 @@ class H5Writer:
         self.dst.parent.mkdir(parents=True, exist_ok=True)
         self.file = h5py.File(self.dst, "w")
         self.add_attr("srcfile", str(self.src))
+        self.git_hash = check_output(["git", "rev-parse", "HEAD"]).decode("ascii").strip()
+        self.add_attr("git_hash", self.git_hash)
         for name, var in self.variables.items():
             self.create_ds(name, var)

atlas-ftag-tools-0.0.6/ftag/mock.py ADDED Viewed

@@ -0,0 +1,105 @@
+from __future__ import annotations
+from tempfile import NamedTemporaryFile, mkdtemp
+import h5py
+import numpy as np
+from numpy.lib.recfunctions import unstructured_to_structured as u2s
+from ftag.hdf5 import join_structured_arrays
+__all__ = ["get_mock_file"]
+JET_VARS = [
+    ("pt", "f4"),
+    ("eta", "f4"),
+    ("abs_eta", "f4"),
+    ("mass", "f4"),
+    ("pt_btagJes", "f4"),
+    ("eta_btagJes", "f4"),
+    ("n_tracks", "i4"),
+    ("HadronConeExclTruthLabelID", "i4"),
+    ("HadronConeExclTruthLabelPt", "f4"),
+    ("n_truth_promptLepton", "i4"),
+    ("flavour_label", "i4"),
+]
+TRACK_VARS = [
+    ("d0", "f4"),
+    ("z0SinTheta", "f4"),
+    ("dphi", "f4"),
+    ("deta", "f4"),
+    ("qOverP", "f4"),
+    ("IP3D_signed_d0_significance", "f4"),
+    ("IP3D_signed_z0_significance", "f4"),
+    ("phiUncertainty", "f4"),
+    ("thetaUncertainty", "f4"),
+    ("qOverPUncertainty", "f4"),
+    ("numberOfPixelHits", "i4"),
+    ("numberOfSCTHits", "i4"),
+    ("numberOfInnermostPixelLayerHits", "i4"),
+    ("numberOfNextToInnermostPixelLayerHits", "i4"),
+    ("numberOfInnermostPixelLayerSharedHits", "i4"),
+    ("numberOfInnermostPixelLayerSplitHits", "i4"),
+    ("numberOfPixelSharedHits", "i4"),
+    ("numberOfPixelSplitHits", "i4"),
+    ("numberOfSCTSharedHits", "i4"),
+    ("numberOfPixelHoles", "i4"),
+    ("numberOfSCTHoles", "i4"),
+]
+def softmax(x, axis=None):
+    """Compute softmax values for each sets of scores in x."""
+    e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
+    return e_x / e_x.sum(axis=axis, keepdims=True)
+def get_mock_scores(labels: np.ndarray):
+    rng = np.random.default_rng(42)
+    scores = np.zeros((len(labels), 3))
+    for label, count in zip(*np.unique(labels, return_counts=True)):
+        if label == 0:
+            scores[labels == label] = rng.normal(loc=[2, 0, 0], scale=1, size=(count, 3))
+        elif label == 4:
+            scores[labels == label] = rng.normal(loc=[0, 1, 0], scale=2.5, size=(count, 3))
+        elif label == 5:
+            scores[labels == label] = rng.normal(loc=[0, 0, 3.5], scale=5, size=(count, 3))
+    scores = softmax(scores, axis=1)
+    cols = [f"MockTagger_p{x}" for x in ["u", "c", "b"]]
+    scores = u2s(scores, dtype=np.dtype([(name, "f4") for name in cols]))
+    return scores
+def get_mock_file(num_jets=1000, tracks_name: str = "tracks", num_tracks: int = 40):
+    # setup jets
+    rng = np.random.default_rng(42)
+    jets_dtype = np.dtype(JET_VARS)
+    jets = u2s(rng.random((num_jets, len(JET_VARS))), jets_dtype)
+    jets["HadronConeExclTruthLabelID"] = rng.choice([0, 4, 5], size=num_jets)
+    jets["flavour_label"] = rng.choice([0, 4, 5], size=num_jets)
+    jets["pt"] *= 400e3
+    jets["mass"] *= 50e3
+    jets["eta"] = (jets["eta"] - 0.5) * 6.0
+    jets["abs_eta"] = np.abs(jets["eta"])
+    jets["n_truth_promptLepton"] = 0
+    # add tagger scores
+    scores = get_mock_scores(jets["HadronConeExclTruthLabelID"])
+    jets = join_structured_arrays([jets, scores])
+    # create a tempfile in a new folder
+    fname = NamedTemporaryFile(suffix=".h5", dir=mkdtemp()).name
+    f = h5py.File(fname, "w")
+    f.create_dataset("jets", data=jets)
+    # setup tracks
+    if tracks_name:
+        tracks_dtype = np.dtype(TRACK_VARS)
+        tracks = u2s(rng.random((num_jets, num_tracks, len(TRACK_VARS))), tracks_dtype)
+        valid = rng.choice([True, False], size=(num_jets, num_tracks))
+        valid = valid.astype(bool).view(dtype=np.dtype([("valid", bool)]))
+        tracks = join_structured_arrays([tracks, valid])
+        f.create_dataset(tracks_name, data=tracks)
+    return fname, f

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/sample.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from dataclasses import dataclass
 from pathlib import Path

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/vds.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import glob
 from pathlib import Path

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/pyproject.toml RENAMED Viewed

@@ -5,11 +5,11 @@ authors = [{name="Sam Van Stroud"}, {name="Philipp Gadow"}]
 dynamic = ["version"]
 license = {text = "MIT"}
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.8"
 dependencies = [
-  "h5py==3.8.0",
-  "numpy==1.24.*",
-  "PyYAML==6.0"
+  "h5py>=3.0",  # requires numpy
+  "numpy",
+  "PyYAML>=5.1"
 ]
 [project.urls]
@@ -42,6 +42,7 @@ line-length = 100
 preview = "True"
 [tool.ruff]
+target-version = "py38"
 select = ["I", "E", "W", "F", "B", "UP", "ARG", "SIM", "TID", "RUF", "D2", "D3", "D4"]
 ignore = ["D211", "D213", "RUF005"]
 line-length = 100

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/MANIFEST.in RENAMED Viewed

File without changes

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/atlas_ftag_tools.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/atlas_ftag_tools.egg-info/top_level.txt RENAMED Viewed

File without changes

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/cuts.py RENAMED Viewed

File without changes

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/flavours.yaml RENAMED Viewed

File without changes

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/ftag/region.py RENAMED Viewed

File without changes

{atlas-ftag-tools-0.0.4 → atlas-ftag-tools-0.0.6}/setup.cfg RENAMED Viewed

File without changes

atlas-ftag-tools 0.0.4__tar.gz → 0.0.6__tar.gz

atlas-ftag-tools 0.0.4tar.gz → 0.0.6tar.gz