PyPI - atlas-schema - Versions diffs - 0.2.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

atlas-schema 0.2.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

atlas_schema/_version.py +22 -4
atlas_schema/enums.py +1 -1
atlas_schema/methods.py +189 -79
atlas_schema/schema.py +234 -82
atlas_schema/utils.py +6 -7
{atlas_schema-0.2.4.dist-info → atlas_schema-0.4.0.dist-info}/METADATA +68 -18
atlas_schema-0.4.0.dist-info/RECORD +13 -0
atlas_schema-0.2.4.dist-info/RECORD +0 -13
{atlas_schema-0.2.4.dist-info → atlas_schema-0.4.0.dist-info}/WHEEL +0 -0
{atlas_schema-0.2.4.dist-info → atlas_schema-0.4.0.dist-info}/licenses/LICENSE +0 -0

atlas_schema/_version.py CHANGED Viewed

@@ -1,16 +1,34 @@
-# file generated by setuptools_scm
+# file generated by setuptools-scm
 # don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
 TYPE_CHECKING = False
 if TYPE_CHECKING:
-    from typing import Tuple, Union
+    from typing import Tuple
+    from typing import Union
     VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
 else:
     VERSION_TUPLE = object
+    COMMIT_ID = object
 version: str
 __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '0.4.0'
+__version_tuple__ = version_tuple = (0, 4, 0)
-__version__ = version = '0.2.4'
-__version_tuple__ = version_tuple = (0, 2, 4)
+__commit_id__ = commit_id = None

atlas_schema/enums.py CHANGED Viewed

@@ -18,7 +18,7 @@ class MultipleEnumAccessMeta(EnumType):
     Enum Metaclass to provide a way to access multiple values all at once.
     """
-    def __getitem__(self: type[_E], key: str | tuple[str]) -> _E | list[_E]:  # type:ignore[misc,override]
+    def __getitem__(cls: type[_E], key: str | tuple[str]) -> _E | list[_E]:  # type:ignore[misc,override]
         getitem = cast(Callable[[str], _E], super().__getitem__)  # type:ignore[misc]
         if isinstance(key, tuple):
             return [getitem(name) for name in key]

atlas_schema/methods.py CHANGED Viewed

@@ -8,7 +8,6 @@ from operator import ior
 import awkward
 import particle
 from coffea.nanoevents.methods import base, candidate, vector
-from dask_awkward import dask_method
 from atlas_schema.enums import PhotonID
 from atlas_schema.typing_compat import Behavior
@@ -19,22 +18,127 @@ behavior.update(base.behavior)
 behavior.update(candidate.behavior)
+def _set_repr_name(classname):
+    def namefcn(_self):
+        return classname
+    behavior[("__typestr__", classname)] = classname[0].lower() + classname[1:]
+    behavior[classname].__repr__ = namefcn
 class NtupleEvents(behavior["NanoEvents"]):  # type: ignore[misc, valid-type, name-defined]
+    """Individual systematic variation of events."""
     def __repr__(self):
-        return f"<event {getattr(self, 'runNumber', '??')}:\
-                {getattr(self, 'eventNumber', '??')}:\
-                {getattr(self, 'mcChannelNumber', '??')}>"
+        return f"<event {getattr(self, 'runNumber', '??')}:{getattr(self, 'eventNumber', '??')}:{getattr(self, 'mcChannelNumber', '??')}>"
+    def __getitem__(self, key):
+        """Support accessing systematic variations via bracket notation.
-behavior["NanoEvents"] = NtupleEvents
+        Args:
+            key: The systematic variation name. "NOSYS" returns the nominal events.
+        Returns:
+            The requested systematic variation or nominal events for "NOSYS".
+        """
+        if key == "NOSYS":
+            return self
+        return super().__getitem__(key)
-def _set_repr_name(classname):
-    def namefcn(_self):
-        return classname
+    @property
+    def systematic(self):
+        """Get the systematic variation name for this event collection."""
+        return "nominal"
-    behavior[("__typestr__", classname)] = classname[0].lower() + classname[1:]
-    behavior[classname].__repr__ = namefcn
+    @property
+    def systematic_names(self):
+        """Get all systematic variations available in this event collection.
+        Returns a list of systematic variation names, including 'NOSYS' for nominal.
+        """
+        # Get systematics from metadata stored during schema building
+        systematics = self.metadata.get("systematics", [])
+        return ["NOSYS", *systematics]
+    @property
+    def systematics(self):
+        """Get all systematic variations available in this event collection.
+        Returns a list of systematic variation names, excluding 'nominal'.
+        """
+        # Get systematics from metadata stored during schema building
+        return [
+            getattr(self, systematic)
+            for systematic in self.systematic_names
+            if systematic != "NOSYS"
+        ]
+behavior["NtupleEvents"] = NtupleEvents
+class NtupleEventsArray(behavior[("*", "NanoEvents")]):  # type: ignore[misc, valid-type, name-defined]
+    """Collection of NtupleEvents objects, one for each systematic variation."""
+    def __getitem__(self, key):
+        """Support accessing systematic variations via bracket notation.
+        Args:
+            key: The systematic variation name. "NOSYS" returns the nominal events.
+        Returns:
+            The requested systematic variation or nominal events for "NOSYS".
+        """
+        if key == "NOSYS":
+            return self
+        return super().__getitem__(key)
+    @property
+    def systematic_names(self):
+        """Get all systematic variations available in this event collection.
+        Returns a list of systematic variation names, including 'NOSYS' for nominal.
+        """
+        # Get systematics from metadata stored during schema building
+        systematics = self.metadata.get("systematics", [])
+        return ["NOSYS", *systematics]
+    @property
+    def systematics(self):
+        """Get all systematic variations available in this event collection.
+        Returns a list of systematic variation names, excluding 'nominal'.
+        """
+        # Get systematics from metadata stored during schema building
+        return [
+            getattr(self, systematic)
+            for systematic in self.systematic_names
+            if systematic != "NOSYS"
+        ]
+behavior[("*", "NtupleEvents")] = NtupleEventsArray
+@awkward.mixin_class(behavior)
+class Systematic(base.NanoCollection, base.Systematic):
+    """Base class for systematic variations."""
+    @property
+    def metadata(self):
+        """Arbitrary metadata"""
+        return self.layout.purelist_parameter("metadata")  # pylint: disable=no-member
+    @property
+    def systematic(self):
+        """Get the systematic variation name for this event collection."""
+        return self.metadata["systematic"]
+    def __repr__(self):
+        return f"<event {self.systematic}>"
+_set_repr_name("Systematic")
 @awkward.mixin_class(behavior)
@@ -51,7 +155,7 @@ class Pass(base.NanoCollection, base.Systematic): ...
 _set_repr_name("Pass")
 behavior.update(
-    awkward._util.copy_behaviors("PtEtaPhiMLorentzVector", "Particle", behavior)
+    awkward._util.copy_behaviors("PtEtaPhiMLorentzVector", "Particle", behavior)  # pylint: disable=protected-access
 )
@@ -63,22 +167,9 @@ class Particle(vector.PtEtaPhiMLorentzVector):
     - '{obj}_select'
     """
-    @property
-    def mass(self):
-        r"""Invariant mass (+, -, -, -)
-        :math:`\sqrt{t^2-x^2-y^2-z^2}`
-        """
-        return self["mass"] / 1.0e3
-    @dask_method
     def passes(self, name):
         return self[f"select_{name}"] == 1
-    @passes.dask
-    def passes(self, dask_array, name):
-        return dask_array[f"select_{name}"] == 1
     # NB: fields with the name 'pt' take precedence over this
     # @dask_property
     # def pt(self):
@@ -102,17 +193,19 @@ class Particle(vector.PtEtaPhiMLorentzVector):
 _set_repr_name("Particle")
-ParticleArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821
-ParticleArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821
-ParticleArray.ProjectionClass4D = ParticleArray  # noqa: F821
-ParticleArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821
+ParticleArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+ParticleArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+ParticleArray.ProjectionClass4D = ParticleArray  # noqa: F821  # pylint: disable=undefined-variable
+ParticleArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
-behavior.update(awkward._util.copy_behaviors("PolarTwoVector", "MissingET", behavior))
+behavior.update(awkward._util.copy_behaviors("PolarTwoVector", "MissingET", behavior))  # pylint: disable=protected-access
 @awkward.mixin_class(behavior)
 class MissingET(vector.PolarTwoVector, base.NanoCollection, base.Systematic):
+    """Missing transverse energy collection."""
     @property
     def r(self):
         """Distance from origin in XY plane"""
@@ -121,16 +214,18 @@ class MissingET(vector.PolarTwoVector, base.NanoCollection, base.Systematic):
 _set_repr_name("MissingET")
-MissingETArray.ProjectionClass2D = MissingETArray  # noqa: F821
-MissingETArray.ProjectionClass3D = vector.SphericalThreeVectorArray  # noqa: F821
-MissingETArray.ProjectionClass4D = vector.LorentzVectorArray  # noqa: F821
-MissingETArray.MomentumClass = MissingETArray  # noqa: F821
+MissingETArray.ProjectionClass2D = MissingETArray  # noqa: F821  # pylint: disable=undefined-variable
+MissingETArray.ProjectionClass3D = vector.SphericalThreeVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+MissingETArray.ProjectionClass4D = vector.LorentzVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+MissingETArray.MomentumClass = MissingETArray  # noqa: F821  # pylint: disable=undefined-variable
-behavior.update(awkward._util.copy_behaviors("Particle", "Photon", behavior))
+behavior.update(awkward._util.copy_behaviors("Particle", "Photon", behavior))  # pylint: disable=protected-access
 @awkward.mixin_class(behavior)
 class Photon(Particle, base.NanoCollection, base.Systematic):
+    """Photon particle collection."""
     @property
     def mass(self):
         """Return zero mass for photon."""
@@ -143,111 +238,126 @@ class Photon(Particle, base.NanoCollection, base.Systematic):
     @property
     def isEM(self):
-        return self.isEM_syst.NOSYS == 0
+        return self.isEM_syst.NOSYS == 0  # pylint: disable=no-member
     def pass_isEM(self, words: list[PhotonID]):
         # 0 is pass, 1 is fail
         return (
-            self.isEM_syst.NOSYS & reduce(ior, (1 << word.value for word in words))
+            self.isEM_syst.NOSYS & reduce(ior, (1 << word.value for word in words))  # pylint: disable=no-member
         ) == 0
 _set_repr_name("Photon")
-PhotonArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821
-PhotonArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821
-PhotonArray.ProjectionClass4D = PhotonArray  # noqa: F821
-PhotonArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821
+PhotonArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+PhotonArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+PhotonArray.ProjectionClass4D = PhotonArray  # noqa: F821  # pylint: disable=undefined-variable
+PhotonArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
-behavior.update(awkward._util.copy_behaviors("Particle", "Electron", behavior))
+behavior.update(awkward._util.copy_behaviors("Particle", "Electron", behavior))  # pylint: disable=protected-access
 @awkward.mixin_class(behavior)
 class Electron(Particle, base.NanoCollection, base.Systematic):
+    """Electron particle collection."""
     @property
     def mass(self):
-        """Electron mass in GeV"""
-        return particle.literals.e_minus.mass / 1.0e3
+        """Electron mass in MeV"""
+        return awkward.ones_like(self.pt) * particle.literals.e_minus.mass  # pylint: disable=no-member
 _set_repr_name("Electron")
-ElectronArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821
-ElectronArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821
-ElectronArray.ProjectionClass4D = ElectronArray  # noqa: F821
-ElectronArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821
+ElectronArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+ElectronArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+ElectronArray.ProjectionClass4D = ElectronArray  # noqa: F821  # pylint: disable=undefined-variable
+ElectronArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
-behavior.update(awkward._util.copy_behaviors("Particle", "Muon", behavior))
+behavior.update(awkward._util.copy_behaviors("Particle", "Muon", behavior))  # pylint: disable=protected-access
 @awkward.mixin_class(behavior)
 class Muon(Particle, base.NanoCollection, base.Systematic):
+    """Muon particle collection."""
     @property
     def mass(self):
-        """Muon mass in GeV"""
-        return particle.literals.mu_minus.mass / 1.0e3
+        """Muon mass in MeV"""
+        return awkward.ones_like(self.pt) * particle.literals.mu_minus.mass  # pylint: disable=no-member
 _set_repr_name("Muon")
-MuonArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821
-MuonArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821
-MuonArray.ProjectionClass4D = MuonArray  # noqa: F821
-MuonArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821
+MuonArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+MuonArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+MuonArray.ProjectionClass4D = MuonArray  # noqa: F821  # pylint: disable=undefined-variable
+MuonArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
-behavior.update(awkward._util.copy_behaviors("Particle", "Tau", behavior))
+behavior.update(awkward._util.copy_behaviors("Particle", "Tau", behavior))  # pylint: disable=protected-access
 @awkward.mixin_class(behavior)
 class Tau(Particle, base.NanoCollection, base.Systematic):
+    """Tau particle collection."""
     @property
     def mass(self):
-        """Tau mass in GeV"""
-        return particle.literals.tau_minus.mass / 1.0e3
+        """Tau mass in MeV"""
+        return awkward.ones_like(self.pt) * particle.literals.tau_minus.mass  # pylint: disable=no-member
 _set_repr_name("Tau")
-TauArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821
-TauArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821
-TauArray.ProjectionClass4D = TauArray  # noqa: F821
-TauArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821
+TauArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+TauArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+TauArray.ProjectionClass4D = TauArray  # noqa: F821  # pylint: disable=undefined-variable
+TauArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
-behavior.update(awkward._util.copy_behaviors("Particle", "Jet", behavior))
+behavior.update(awkward._util.copy_behaviors("Particle", "Jet", behavior))  # pylint: disable=protected-access
 @awkward.mixin_class(behavior)
-class Jet(Particle, base.NanoCollection, base.Systematic): ...
+class Jet(Particle, base.NanoCollection, base.Systematic):
+    """Jet particle collection."""
+    @property
+    def mass(self):
+        r"""Invariant mass (+, -, -, -)
+        :math:`\sqrt{t^2-x^2-y^2-z^2}`
+        """
+        return self["m"]
 _set_repr_name("Jet")
-JetArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821
-JetArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821
-JetArray.ProjectionClass4D = JetArray  # noqa: F821
-JetArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821
+JetArray.ProjectionClass2D = vector.TwoVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+JetArray.ProjectionClass3D = vector.ThreeVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
+JetArray.ProjectionClass4D = JetArray  # noqa: F821  # pylint: disable=undefined-variable
+JetArray.MomentumClass = vector.LorentzVectorArray  # noqa: F821  # pylint: disable=undefined-variable,no-member
 __all__ = [
     "Electron",
-    "ElectronArray",  # noqa: F822
-    "ElectronRecord",  # noqa: F822
+    "ElectronArray",  # noqa: F822  # pylint: disable=undefined-all-variable
+    "ElectronRecord",  # noqa: F822  # pylint: disable=undefined-all-variable
     "Jet",
-    "JetArray",  # noqa: F822
-    "JetRecord",  # noqa: F822
+    "JetArray",  # noqa: F822  # pylint: disable=undefined-all-variable
+    "JetRecord",  # noqa: F822  # pylint: disable=undefined-all-variable
     "MissingET",
-    "MissingETArray",  # noqa: F822
-    "MissingETRecord",  # noqa: F822
+    "MissingETArray",  # noqa: F822  # pylint: disable=undefined-all-variable
+    "MissingETRecord",  # noqa: F822  # pylint: disable=undefined-all-variable
     "Muon",
-    "MuonArray",  # noqa: F822
-    "MuonRecord",  # noqa: F822
+    "MuonArray",  # noqa: F822  # pylint: disable=undefined-all-variable
+    "MuonRecord",  # noqa: F822  # pylint: disable=undefined-all-variable
     "NtupleEvents",
     "Particle",
-    "ParticleArray",  # noqa: F822
-    "ParticleRecord",  # noqa: F822
+    "ParticleArray",  # noqa: F822  # pylint: disable=undefined-all-variable
+    "ParticleRecord",  # noqa: F822  # pylint: disable=undefined-all-variable
     "Pass",
     "Photon",
-    "PhotonArray",  # noqa: F822
-    "PhotonRecord",  # noqa: F822
+    "PhotonArray",  # noqa: F822  # pylint: disable=undefined-all-variable
+    "PhotonRecord",  # noqa: F822  # pylint: disable=undefined-all-variable
     "Weight",
 ]

atlas_schema/schema.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Any, ClassVar
 from coffea.nanoevents.schemas.base import BaseSchema, zip_forms
+from atlas_schema.methods import behavior as roaster
 from atlas_schema.typing_compat import Behavior, Self
@@ -170,10 +171,12 @@ class NtupleSchema(BaseSchema):  # type: ignore[misc]
             pass
         else:
             pass
-        self._form["fields"], self._form["contents"] = self._build_collections(
-            self._form["fields"], self._form["contents"]
+        self._form["fields"], self._form["contents"], discovered_systematics = (
+            self._build_collections(self._form["fields"], self._form["contents"])
         )
         self._form["parameters"]["metadata"]["version"] = self._version
+        self._form["parameters"]["metadata"]["systematics"] = discovered_systematics
+        self._form["parameters"]["__record__"] = "NtupleEvents"
     @classmethod
     def v1(cls, base_form: dict[str, Any]) -> Self:
@@ -186,11 +189,13 @@ class NtupleSchema(BaseSchema):  # type: ignore[misc]
     def _build_collections(
         self, field_names: list[str], input_contents: list[Any]
-    ) -> tuple[KeysView[str], ValuesView[dict[str, Any]]]:
+    ) -> tuple[KeysView[str], ValuesView[dict[str, Any]], list[str]]:
         branch_forms = dict(zip(field_names, input_contents))
         # parse into high-level records (collections, list collections, and singletons)
-        collections = {k.split("_")[0] for k in branch_forms}
+        collections = {
+            k.split("_")[0] for k in branch_forms if k not in self.singletons
+        }
         collections -= self.event_ids
         collections -= set(self.singletons)
@@ -227,17 +232,38 @@ class NtupleSchema(BaseSchema):  # type: ignore[misc]
             branch_forms[k.replace("_NOSYS", "") + "_NOSYS"] = branch_forms.pop(k)
         # these are collections with systematic variations
-        subcollections = {
-            k.split("__")[0].split("_", 1)[1].replace("_NOSYS", "")
-            for k in branch_forms
-            if "NOSYS" in k
-        }
+        try:
+            subcollections = {
+                k.split("__")[0].split("_", 1)[1].replace("_NOSYS", "")
+                for k in branch_forms
+                if "NOSYS" in k and k not in self.singletons
+            }
+        except IndexError as exc:
+            msg = "One of the branches does not follow the assumed pattern for this schema. [invalid-branch-name]"
+            raise RuntimeError(msg) from exc
+        all_systematics = self._discover_systematics(
+            branch_forms, collections, subcollections
+        )
+        # Pre-compute systematic branch patterns for O(1) lookups
+        # This replaces the expensive O(m*s) nested condition checks
+        systematic_branch_patterns = set()
+        for collection in collections:
+            for subcoll in subcollections:
+                for sys in all_systematics:
+                    if sys != "NOSYS":
+                        systematic_branch_patterns.add(f"{collection}_{subcoll}_{sys}")
         # Check the presence of the event_ids
         missing_event_ids = [
             event_id for event_id in self.event_ids if event_id not in branch_forms
         ]
+        missing_singletons = [
+            singleton for singleton in self.singletons if singleton not in branch_forms
+        ]
         if len(missing_event_ids) > 0:
             if self.error_missing_event_ids:
                 msg = f"There are missing event ID fields: {missing_event_ids} \n\n\
@@ -253,99 +279,227 @@ class NtupleSchema(BaseSchema):  # type: ignore[misc]
                 stacklevel=2,
             )
+        if len(missing_singletons) > 0:
+            # These singletons are simply branches we do not parse or handle
+            # explicitly in atlas-schema (e.g. they are copied directly to the
+            # output structure we provide you), however there can be false
+            # positives when you submit multiple files with different branch
+            # structures and this warning could be safely ignored.
+            warnings.warn(
+                f"Missing singletons : {missing_singletons}. [singleton-missing]",
+                RuntimeWarning,
+                stacklevel=2,
+            )
         output = {}
         # first, register singletons (event-level, others)
         for name in {*self.event_ids, *self.singletons}:
-            if name in missing_event_ids:
+            if name in [*missing_event_ids, *missing_singletons]:
                 continue
             output[name] = branch_forms[name]
-        # next, go through and start grouping up collections
-        for name in collections:
-            content = {}
+        # First, build nominal collections the traditional way
+        nominal_collections = {}
+        for collection_name in collections:
+            collection_content = {}
             used = set()
+            # Process subcollections with NOSYS variations
             for subname in subcollections:
-                prefix = f"{name}_{subname}_"
-                used.update({k for k in branch_forms if k.startswith(prefix)})
-                subcontent = {
-                    k[len(prefix) :]: branch_forms[k]
-                    for k in branch_forms
-                    if k.startswith(prefix)
-                }
-                if subcontent:
-                    # create the nominal version
-                    content[subname] = branch_forms[f"{prefix}NOSYS"]
-                    # create a collection of the systematic variations for the given variable
-                    content[f"{subname}_syst"] = zip_forms(
-                        subcontent, f"{name}_syst", record_name="NanoCollection"
+                prefix = f"{collection_name}_{subname}_"
+                nosys_branch = f"{prefix}NOSYS"
+                if nosys_branch in branch_forms:
+                    collection_content[subname] = branch_forms[nosys_branch]
+                    used.add(nosys_branch)
+            # Add non-systematic branches (like eta, phi that don't vary)
+            for k, form in branch_forms.items():
+                if (
+                    k.startswith(collection_name + "_")
+                    and k not in used
+                    and "_NOSYS" not in k
+                    and k
+                    not in systematic_branch_patterns  # O(1) lookup instead of O(m*s)
+                ):
+                    field_name = k[len(collection_name) + 1 :]
+                    if field_name not in collection_content:
+                        collection_content[field_name] = form
+            if collection_content:
+                behavior = self.mixins.get(collection_name, "")
+                if not behavior:
+                    behavior = self.suggested_behavior(collection_name)
+                    warnings.warn(
+                        f"I found a collection with no defined mixin: '{collection_name}'. I will assume behavior: '{behavior}'. To suppress this warning next time, please define mixins for your custom collections. [mixin-undefined]",
+                        RuntimeWarning,
+                        stacklevel=2,
                     )
+                nominal_collections[collection_name] = zip_forms(
+                    collection_content, collection_name, record_name=behavior
+                )
+                nominal_collections[collection_name].setdefault("parameters", {})
+                nominal_collections[collection_name]["parameters"].update(
+                    {"collection_name": collection_name}
+                )
-            content.update(
-                {
-                    k[len(name) + 1 :]: branch_forms[k]
-                    for k in branch_forms
-                    if k.startswith(name + "_") and k not in used
+        # Add nominal collections to output
+        output.update(nominal_collections)
+        # Now build systematic event structures
+        for systematic in all_systematics:
+            if systematic == "NOSYS":
+                continue
+            # Check which collections actually have this systematic variation
+            systematic_collections = {}
+            for collection_name in collections:
+                # Check if this collection has any systematic branches for this systematic
+                has_systematic_data = False
+                collection_content = {}
+                used = set()
+                # Process subcollections with systematic variations
+                for subname in subcollections:
+                    prefix = f"{collection_name}_{subname}_"
+                    target_branch = f"{prefix}{systematic}"
+                    fallback_branch = f"{prefix}NOSYS"
+                    if target_branch in branch_forms:
+                        # Use the systematic variation
+                        collection_content[subname] = branch_forms[target_branch]
+                        used.add(target_branch)
+                        has_systematic_data = True
+                    elif fallback_branch in branch_forms:
+                        # Fall back to nominal
+                        collection_content[subname] = branch_forms[fallback_branch]
+                        used.add(fallback_branch)
+                # Add non-systematic branches
+                for k, form in branch_forms.items():
+                    if (
+                        k.startswith(collection_name + "_")
+                        and k not in used
+                        and "_NOSYS" not in k
+                        and k
+                        not in systematic_branch_patterns  # O(1) lookup instead of O(m*s)
+                    ):
+                        field_name = k[len(collection_name) + 1 :]
+                        if field_name not in collection_content:
+                            collection_content[field_name] = form
+                # If this collection has systematic data or fallback data, include it
+                if collection_content:
+                    behavior = self.mixins.get(collection_name, "")
+                    if not behavior:
+                        behavior = self.suggested_behavior(collection_name)
+                        # Only warn once (for nominal collections)
+                    # If no systematic data, use the nominal collection directly
+                    if (
+                        not has_systematic_data
+                        and collection_name in nominal_collections
+                    ):
+                        systematic_collections[collection_name] = nominal_collections[
+                            collection_name
+                        ]
+                    else:
+                        # Build the systematic collection
+                        systematic_collections[collection_name] = zip_forms(
+                            collection_content, collection_name, record_name=behavior
+                        )
+                        systematic_collections[collection_name].setdefault(
+                            "parameters", {}
+                        )
+                        systematic_collections[collection_name]["parameters"].update(
+                            {"collection_name": collection_name}
+                        )
+            # Only create systematic event if there are collections for it
+            if systematic_collections:
+                output[systematic] = {
+                    "class": "RecordArray",
+                    "contents": list(systematic_collections.values()),
+                    "fields": list(systematic_collections.keys()),
+                    "form_key": f"%21invalid%2C{systematic}",
+                    "parameters": {
+                        "__record__": "Systematic",
+                        "metadata": {"systematic": systematic},
+                    },
                 }
-            )
-            if not used and not content:
+        # Handle any remaining unrecognized branches as singletons
+        processed_branches = set()
+        # Add event IDs and explicit singletons
+        processed_branches.update(self.event_ids)
+        processed_branches.update(self.singletons)
+        # Add collection-related branches
+        for collection_name in collections:
+            for branch_name in branch_forms:
+                if branch_name.startswith(collection_name + "_"):
+                    processed_branches.add(branch_name)
+        # Find unrecognized branches
+        for branch_name, form in branch_forms.items():
+            if branch_name not in processed_branches:
+                # This is an unrecognized branch - treat as singleton with warning
                 warnings.warn(
-                    f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly. [singleton-undefined]",
+                    f"I identified a branch that likely does not have any leaves: '{branch_name}'. I will treat this as a 'singleton'. To suppress this warning, add this branch to the singletons set. [singleton-undefined]",
                     RuntimeWarning,
                     stacklevel=2,
                 )
-                self.singletons.add(name)
-                output[name] = branch_forms[name]
+                output[branch_name] = form
-            else:
-                behavior = self.mixins.get(name, "")
-                if not behavior:
-                    behavior = self.suggested_behavior(name)
-                    warnings.warn(
-                        f"I found a collection with no defined mixin: '{name}'. I will assume behavior: '{behavior}'. To suppress this warning next time, please define mixins for your custom collections. [mixin-undefined]",
-                        RuntimeWarning,
-                        stacklevel=2,
-                    )
+        # Return discovered systematics (excluding NOSYS/nominal)
+        discovered_systematics = sorted([s for s in all_systematics if s != "NOSYS"])
-                output[name] = zip_forms(content, name, record_name=behavior)
-            output[name].setdefault("parameters", {})
-            output[name]["parameters"].update({"collection_name": name})
-            if output[name]["class"] == "ListOffsetArray":
-                if output[name]["class"] == "RecordArray":
-                    parameters = output[name]["content"]["fields"]
-                    contents = output[name]["content"]["contents"]
-                else:
-                    # these are also singletons of another kind that we just pass through
-                    continue
-            elif output[name]["class"] == "RecordArray":
-                parameters = output[name]["fields"]
-                contents = output[name]["contents"]
-            elif output[name]["class"] == "NumpyArray":
-                # these are singletons that we just pass through
-                continue
-            else:
-                msg = f"Unhandled class {output[name]['class']}"
-                raise RuntimeError(msg)
+        return output.keys(), output.values(), discovered_systematics
-            # update docstrings as needed
-            # NB: must be before flattening for easier logic
-            for index, parameter in enumerate(parameters):
-                if "parameters" not in contents[index]:
-                    continue
-                parsed_name = parameter.replace("_NOSYS", "")
-                contents[index]["parameters"]["__doc__"] = self.docstrings.get(
-                    parsed_name,
-                    contents[index]["parameters"].get(
-                        "__doc__", "no docstring available"
-                    ),
-                )
+    def _discover_systematics(
+        self,
+        branch_forms: dict[str, Any],
+        collections: set[str],
+        subcollections: set[str],
+    ) -> set[str]:
+        """Extract systematic variations from branch names.
+        Returns:
+            set: Set of all systematic variation names found in branches
+        """
+        # Optimize systematic discovery: pre-index branches by pattern
+        # This avoids O(n*m) nested loops in systematic discovery
+        subcoll_patterns = {f"{subcoll}_" for subcoll in subcollections}
+        all_systematics = set()
+        for k in branch_forms:
+            if not ("_" in k and k not in self.singletons):
+                continue
+            # Handle the pattern: collection_subcollection_systematic
+            # where systematic can contain double underscores like "JET_EnergyResolution__1up"
+            parts = k.split("_")
+            if len(parts) < 3:
+                continue
+            # Find the collection and subcollection parts
+            collection = parts[0]
+            if collection not in collections:
+                continue
+            # Find where the subcollection ends by looking for a known pattern
+            # The systematic starts after the subcollection
+            remaining = "_".join(parts[1:])
+            # Use optimized lookup instead of iterating all subcollections
+            for pattern in subcoll_patterns:
+                if remaining.startswith(pattern):
+                    systematic = remaining[len(pattern) :]
+                    if systematic and systematic != "NOSYS":
+                        all_systematics.add(systematic)
+                    break
-        return output.keys(), output.values()
+        # Always include NOSYS as the nominal case
+        all_systematics.add("NOSYS")
+        return all_systematics
     @classmethod
     def behavior(cls) -> Behavior:
@@ -354,8 +508,6 @@ class NtupleSchema(BaseSchema):  # type: ignore[misc]
         Returns:
             dict[str | tuple['*', str], type[awkward.Record]]: an :data:`awkward.behavior` dictionary
         """
-        from atlas_schema.methods import behavior as roaster
         return roaster
     @classmethod

atlas_schema/utils.py CHANGED Viewed

@@ -1,16 +1,15 @@
 from __future__ import annotations
 from enum import Enum
-from typing import TypeVar, Union, cast
+from typing import TypeVar, cast
 import awkward as ak
-import dask_awkward as dak
-Array = TypeVar("Array", bound=Union[dak.Array, ak.Array])
+Array = TypeVar("Array", bound=ak.Array)
 _E = TypeVar("_E", bound=Enum)
-def isin(element: Array, test_elements: dak.Array | ak.Array, axis: int = -1) -> Array:
+def isin(element: Array, test_elements: ak.Array, axis: int = -1) -> Array:
     """
     Find test_elements in element. Similar in API as :func:`numpy.isin`.
@@ -21,12 +20,12 @@ def isin(element: Array, test_elements: dak.Array | ak.Array, axis: int = -1) ->
     comparison.
     Args:
-        element (dask_awkward.Array or ak.Array): input array of values.
-        test_elements (dask_awkward.Array or ak.Array): one-dimensional set of values against which to test each value of *element*.
+        element (ak.Array): input array of values.
+        test_elements (ak.Array): one-dimensional set of values against which to test each value of *element*.
         axis (int): the axis along which the comparison is performed
     Returns:
-        dask_awkward.Array or ak.Array: result of comparison for test_elements in *element*
+        ak.Array: result of comparison for test_elements in *element*
     Example:
         >>> import awkward as ak

{atlas_schema-0.2.4.dist-info → atlas_schema-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.4
 Name: atlas-schema
-Version: 0.2.4
+Version: 0.4.0
 Summary: Helper python package for ATLAS Common NTuple Analysis work.
 Project-URL: Homepage, https://github.com/scipp-atlas/atlas-schema
 Project-URL: Bug Tracker, https://github.com/scipp-atlas/atlas-schema/issues
 Project-URL: Discussions, https://github.com/scipp-atlas/atlas-schema/discussions
-Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.2.4/
+Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.4.0/
 Project-URL: Releases, https://github.com/scipp-atlas/atlas-schema/releases
 Project-URL: Release Notes, https://atlas-schema.readthedocs.io/en/latest/history.html
 Author-email: Giordon Stark <kratsg@gmail.com>
@@ -227,7 +227,7 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering
 Classifier: Typing :: Typed
 Requires-Python: >=3.9
-Requires-Dist: coffea[dask]>=2024.4.1
+Requires-Dist: coffea[dask]>=2025.7.0
 Requires-Dist: particle>=0.25.0
 Provides-Extra: dev
 Requires-Dist: pytest-cov>=3; extra == 'dev'
@@ -251,7 +251,7 @@ Requires-Dist: tbump>=6.7.0; extra == 'test'
 Requires-Dist: twine; extra == 'test'
 Description-Content-Type: text/markdown
-# atlas-schema v0.2.4
+# atlas-schema v0.4.0
 [![Actions Status][actions-badge]][actions-link]
 [![Documentation Status][rtd-badge]][rtd-link]
@@ -335,11 +335,9 @@ like below:
 ```python
 import awkward as ak
-import dask
-import hist.dask as had
+from hist import Hist
 import matplotlib.pyplot as plt
 from coffea import processor
-from coffea.nanoevents import NanoEventsFactory
 from distributed import Client
 from atlas_schema.schema import NtupleSchema
@@ -352,7 +350,7 @@ class MyFirstProcessor(processor.ProcessorABC):
     def process(self, events):
         dataset = events.metadata["dataset"]
         h_ph_pt = (
-            had.Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
+            Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
             .Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
             .Int64()
         )
@@ -376,17 +374,18 @@ class MyFirstProcessor(processor.ProcessorABC):
 if __name__ == "__main__":
     client = Client()
-    fname = "ntuple.root"
-    events = NanoEventsFactory.from_root(
-        {fname: "analysis"},
-        schemaclass=NtupleSchema,
-        metadata={"dataset": "700352.Zqqgamma.mc20d.v1"},
-    ).events()
+    fileset = {"700352.Zqqgamma.mc20d.v1": {"files": {"ntuple.root": "analysis"}}}
-    p = MyFirstProcessor()
-    out = p.process(events)
-    (computed,) = dask.compute(out)
-    print(computed)
+    run = processor.Runner(
+        executor=processor.IterativeExecutor(compression=None),
+        schema=NtupleSchema,
+        savemetrics=True,
+    )
+    out, metrics = run(fileset, processor_instance=MyFirstProcessor())
+    print(out)
+    print(metrics)
     fig, ax = plt.subplots()
     computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
@@ -400,6 +399,57 @@ which produces
 <img src="https://raw.githubusercontent.com/scipp-atlas/atlas-schema/main/docs/_static/img/ph_pt.png" alt="three stacked histograms of photon pT, with each stack corresponding to: no selection, requiring the isEM flag, and inverting the isEM requirement" width="500" style="display: block; margin-left: auto; margin-right: auto;">
+## Processing with Systematic Variations
+For analyses requiring systematic uncertainty evaluation, you can easily iterate
+over all systematic variations using the new `events["NOSYS"]` alias and
+`systematic_names` property:
+```python
+import awkward as ak
+from hist import Hist
+from coffea import processor
+from atlas_schema.schema import NtupleSchema
+class SystematicsProcessor(processor.ProcessorABC):
+    def __init__(self):
+        self.h = (
+            Hist.new.StrCat([], name="variation", growth=True)
+            .Regular(50, 0.0, 500.0, name="jet_pt", label="Leading Jet $p_T$ [GeV]")
+            .Int64()
+        )
+    def process(self, events):
+        dsid = events.metadata["dataset"]
+        # Process all systematic variations including nominal ("NOSYS")
+        for variation in events.systematic_names:
+            event_view = events[variation]
+            # Fill histogram with leading jet pT for this systematic variation
+            leading_jet_pt = event_view.jet.pt[:, 0] / 1_000  # Convert MeV to GeV
+            weights = (
+                event_view.weight.mc
+                if hasattr(event_view, "weight")
+                else ak.ones_like(leading_jet_pt)
+            )
+            self.h.fill(variation=variation, jet_pt=leading_jet_pt, weight=weights)
+        return {
+            "hist": self.h,
+            "meta": {"sumw": {dsid: {(events.metadata["fileuuid"], ak.sum(weights))}}},
+        }
+    def postprocess(self, accumulator):
+        return accumulator
+```
+This approach allows you to seamlessly process both nominal and systematic
+variations in a single loop, eliminating the need for special-case handling of
+the nominal variation.
 <!-- SPHINX-END -->
 ## Developer Notes

atlas_schema-0.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
+atlas_schema/_version.py,sha256=2_0GUP7yBCXRus-qiJKxQD62z172WSs1sQ6DVpPsbmM,704
+atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
+atlas_schema/enums.py,sha256=GDDKSBZY-L8X5W41Kwi0G5Yd4Vu4Kiga-ttSbztEXEM,3687
+atlas_schema/methods.py,sha256=rQRQgD26ndCzwpxAuAeEbXIHd8v64cK2rP5A5GxvBn8,12934
+atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+atlas_schema/schema.py,sha256=TbNxekA0DRwS5Mye0frHo7p1K7LW7FBXrkEwmPUQ8MA,27884
+atlas_schema/typing_compat.py,sha256=3G8h4WfLoDmrtWZvtYKLCwEpCQ_O4Fwygb2WlDRSE4E,488
+atlas_schema/utils.py,sha256=E3jCka-pf_0h_r3OO0hMLlbF6dQKoxr2T1Gd18-aJ4U,2034
+atlas_schema-0.4.0.dist-info/METADATA,sha256=9d1QN2OaZ0i68ZDAR3GBKUNw9Lm4l1nAJycCb9jSmqg,21755
+atlas_schema-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+atlas_schema-0.4.0.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
+atlas_schema-0.4.0.dist-info/RECORD,,

atlas_schema-0.2.4.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
-atlas_schema/_version.py,sha256=4gL0W4-u58XR5lRLpeoIPrGhcewTk0-527de6uTNmkg,411
-atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
-atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
-atlas_schema/methods.py,sha256=hFdtKXnyCcx4M05WhAM24fKwzEhh_ubA7jNa6_xv67k,7238
-atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-atlas_schema/schema.py,sha256=4OAvuPrOds-taVES32y4K8dvNDf8PKdu83DZqAlTdp8,20621
-atlas_schema/typing_compat.py,sha256=3G8h4WfLoDmrtWZvtYKLCwEpCQ_O4Fwygb2WlDRSE4E,488
-atlas_schema/utils.py,sha256=IqMbWqq0ib_kZdJCaM5ghURZatmb8pKidlewx3dpy0A,2164
-atlas_schema-0.2.4.dist-info/METADATA,sha256=KZDH5fsZon5wFXuU-iSUeqgjoplOwAoqTM1I9LgaTiM,20107
-atlas_schema-0.2.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-atlas_schema-0.2.4.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
-atlas_schema-0.2.4.dist-info/RECORD,,

{atlas_schema-0.2.4.dist-info → atlas_schema-0.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{atlas_schema-0.2.4.dist-info → atlas_schema-0.4.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

atlas-schema 0.2.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

atlas-schema 0.2.4py3-none-any.whl → 0.4.0py3-none-any.whl