atlas-schema 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas_schema/_version.py +9 -4
- atlas_schema/methods.py +26 -21
- atlas_schema/schema.py +235 -22
- atlas_schema/typing_compat.py +2 -2
- atlas_schema/utils.py +6 -7
- {atlas_schema-0.2.3.dist-info → atlas_schema-0.3.0.dist-info}/METADATA +18 -19
- atlas_schema-0.3.0.dist-info/RECORD +13 -0
- atlas_schema-0.2.3.dist-info/RECORD +0 -13
- {atlas_schema-0.2.3.dist-info → atlas_schema-0.3.0.dist-info}/WHEEL +0 -0
- {atlas_schema-0.2.3.dist-info → atlas_schema-0.3.0.dist-info}/licenses/LICENSE +0 -0
atlas_schema/_version.py
CHANGED
@@ -1,8 +1,13 @@
|
|
1
|
-
# file generated by
|
1
|
+
# file generated by setuptools-scm
|
2
2
|
# don't change, don't track in version control
|
3
|
+
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
5
|
+
|
3
6
|
TYPE_CHECKING = False
|
4
7
|
if TYPE_CHECKING:
|
5
|
-
from typing import Tuple
|
8
|
+
from typing import Tuple
|
9
|
+
from typing import Union
|
10
|
+
|
6
11
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
7
12
|
else:
|
8
13
|
VERSION_TUPLE = object
|
@@ -12,5 +17,5 @@ __version__: str
|
|
12
17
|
__version_tuple__: VERSION_TUPLE
|
13
18
|
version_tuple: VERSION_TUPLE
|
14
19
|
|
15
|
-
__version__ = version = '0.
|
16
|
-
__version_tuple__ = version_tuple = (0,
|
20
|
+
__version__ = version = '0.3.0'
|
21
|
+
__version_tuple__ = version_tuple = (0, 3, 0)
|
atlas_schema/methods.py
CHANGED
@@ -8,7 +8,6 @@ from operator import ior
|
|
8
8
|
import awkward
|
9
9
|
import particle
|
10
10
|
from coffea.nanoevents.methods import base, candidate, vector
|
11
|
-
from dask_awkward import dask_method
|
12
11
|
|
13
12
|
from atlas_schema.enums import PhotonID
|
14
13
|
from atlas_schema.typing_compat import Behavior
|
@@ -63,22 +62,9 @@ class Particle(vector.PtEtaPhiMLorentzVector):
|
|
63
62
|
- '{obj}_select'
|
64
63
|
"""
|
65
64
|
|
66
|
-
@property
|
67
|
-
def mass(self):
|
68
|
-
r"""Invariant mass (+, -, -, -)
|
69
|
-
|
70
|
-
:math:`\sqrt{t^2-x^2-y^2-z^2}`
|
71
|
-
"""
|
72
|
-
return self["mass"] / 1.0e3
|
73
|
-
|
74
|
-
@dask_method
|
75
65
|
def passes(self, name):
|
76
66
|
return self[f"select_{name}"] == 1
|
77
67
|
|
78
|
-
@passes.dask
|
79
|
-
def passes(self, dask_array, name):
|
80
|
-
return dask_array[f"select_{name}"] == 1
|
81
|
-
|
82
68
|
# NB: fields with the name 'pt' take precedence over this
|
83
69
|
# @dask_property
|
84
70
|
# def pt(self):
|
@@ -166,8 +152,8 @@ behavior.update(awkward._util.copy_behaviors("Particle", "Electron", behavior))
|
|
166
152
|
class Electron(Particle, base.NanoCollection, base.Systematic):
|
167
153
|
@property
|
168
154
|
def mass(self):
|
169
|
-
"""Electron mass in
|
170
|
-
return particle.literals.e_minus.mass
|
155
|
+
"""Electron mass in MeV"""
|
156
|
+
return awkward.ones_like(self.pt) * particle.literals.e_minus.mass
|
171
157
|
|
172
158
|
|
173
159
|
_set_repr_name("Electron")
|
@@ -184,8 +170,8 @@ behavior.update(awkward._util.copy_behaviors("Particle", "Muon", behavior))
|
|
184
170
|
class Muon(Particle, base.NanoCollection, base.Systematic):
|
185
171
|
@property
|
186
172
|
def mass(self):
|
187
|
-
"""Muon mass in
|
188
|
-
return particle.literals.mu_minus.mass
|
173
|
+
"""Muon mass in MeV"""
|
174
|
+
return awkward.ones_like(self.pt) * particle.literals.mu_minus.mass
|
189
175
|
|
190
176
|
|
191
177
|
_set_repr_name("Muon")
|
@@ -202,8 +188,8 @@ behavior.update(awkward._util.copy_behaviors("Particle", "Tau", behavior))
|
|
202
188
|
class Tau(Particle, base.NanoCollection, base.Systematic):
|
203
189
|
@property
|
204
190
|
def mass(self):
|
205
|
-
"""Tau mass in
|
206
|
-
return particle.literals.tau_minus.mass
|
191
|
+
"""Tau mass in MeV"""
|
192
|
+
return awkward.ones_like(self.pt) * particle.literals.tau_minus.mass
|
207
193
|
|
208
194
|
|
209
195
|
_set_repr_name("Tau")
|
@@ -218,7 +204,14 @@ behavior.update(awkward._util.copy_behaviors("Particle", "Jet", behavior))
|
|
218
204
|
|
219
205
|
|
220
206
|
@awkward.mixin_class(behavior)
|
221
|
-
class Jet(Particle, base.NanoCollection, base.Systematic):
|
207
|
+
class Jet(Particle, base.NanoCollection, base.Systematic):
|
208
|
+
@property
|
209
|
+
def mass(self):
|
210
|
+
r"""Invariant mass (+, -, -, -)
|
211
|
+
|
212
|
+
:math:`\sqrt{t^2-x^2-y^2-z^2}`
|
213
|
+
"""
|
214
|
+
return self["m"]
|
222
215
|
|
223
216
|
|
224
217
|
_set_repr_name("Jet")
|
@@ -230,12 +223,24 @@ JetArray.MomentumClass = vector.LorentzVectorArray # noqa: F821
|
|
230
223
|
|
231
224
|
__all__ = [
|
232
225
|
"Electron",
|
226
|
+
"ElectronArray", # noqa: F822
|
227
|
+
"ElectronRecord", # noqa: F822
|
233
228
|
"Jet",
|
229
|
+
"JetArray", # noqa: F822
|
230
|
+
"JetRecord", # noqa: F822
|
234
231
|
"MissingET",
|
232
|
+
"MissingETArray", # noqa: F822
|
233
|
+
"MissingETRecord", # noqa: F822
|
235
234
|
"Muon",
|
235
|
+
"MuonArray", # noqa: F822
|
236
|
+
"MuonRecord", # noqa: F822
|
236
237
|
"NtupleEvents",
|
237
238
|
"Particle",
|
239
|
+
"ParticleArray", # noqa: F822
|
240
|
+
"ParticleRecord", # noqa: F822
|
238
241
|
"Pass",
|
239
242
|
"Photon",
|
243
|
+
"PhotonArray", # noqa: F822
|
244
|
+
"PhotonRecord", # noqa: F822
|
240
245
|
"Weight",
|
241
246
|
]
|
atlas_schema/schema.py
CHANGED
@@ -1,42 +1,142 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import difflib
|
3
4
|
import warnings
|
4
5
|
from collections.abc import KeysView, ValuesView
|
5
6
|
from typing import Any, ClassVar
|
6
7
|
|
7
8
|
from coffea.nanoevents.schemas.base import BaseSchema, zip_forms
|
8
9
|
|
10
|
+
from atlas_schema.methods import behavior as roaster
|
9
11
|
from atlas_schema.typing_compat import Behavior, Self
|
10
12
|
|
11
13
|
|
12
14
|
class NtupleSchema(BaseSchema): # type: ignore[misc]
|
13
|
-
"""
|
15
|
+
"""The schema for building ATLAS ntuples following the typical centralized formats.
|
14
16
|
|
15
|
-
|
16
|
-
the naming pattern of the branches.
|
17
|
+
This schema is built from all branches found in a tree in the supplied
|
18
|
+
file, based on the naming pattern of the branches. This naming pattern is
|
19
|
+
typically assumed to be
|
17
20
|
|
18
|
-
-
|
21
|
+
.. code-block:: bash
|
22
|
+
|
23
|
+
{collection:str}_{subcollection:str}_{systematic:str}
|
24
|
+
|
25
|
+
where:
|
26
|
+
* ``collection`` is assumed to be a prefix with typical characters, following the regex ``[a-zA-Z][a-zA-Z0-9]*``; that is starting with a case-insensitive letter, and proceeded by zero or more alphanumeric characters,
|
27
|
+
* ``subcollection`` is assumed to be anything with typical characters (allowing for underscores) following the regex ``[a-zA-Z_][a-zA-Z0-9_]*``; that is starting with a case-insensitive letter or underscore, and proceeded by zero or more alphanumeric characters including underscores, and
|
28
|
+
* ``systematic`` is assumed to be either ``NOSYS`` to indicate a branch with potential systematic variariations, or anything with typical characters (allowing for underscores) following the same regular expression as the ``subcollection``.
|
29
|
+
|
30
|
+
Here, a collection refers to the top-level entry to access an item - a collection called ``el`` will be accessible under the ``el`` attributes via ``events['el']`` or ``events.el``. A subcollection called ``pt`` will be accessible under that collection, such as ``events['el']['pt']`` or ``events.el.pt``. This is the power of the schema providing a more user-friendly (and programmatic) access to the underlying branches.
|
31
|
+
|
32
|
+
The above logic means that the following branches below will be categorized as follows:
|
33
|
+
|
34
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
35
|
+
| branch | collection | subcollection | systematic |
|
36
|
+
+===============================+===================+=======================+==================+
|
37
|
+
| ``'eventNumber'`` | ``'eventNumber'`` | ``None`` | ``None`` |
|
38
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
39
|
+
| ``'runNumber'`` | ``'runNumber'`` | ``None`` | ``None`` |
|
40
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
41
|
+
| ``'el_pt_NOSYS'`` | ``'el'`` | ``'pt'`` | ``'NOSYS'`` |
|
42
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
43
|
+
| ``'jet_cleanTightBad_NOSYS'`` | ``'jet'`` | ``'cleanTightBad'`` | ``'NOSYS'`` |
|
44
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
45
|
+
| ``'jet_select_btag_NOSYS'`` | ``'jet'`` | ``'select_btag'`` | ``'NOSYS'`` |
|
46
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
47
|
+
| ``'jet_e_NOSYS'`` | ``'jet'`` | ``'e'`` | ``'NOSYS'`` |
|
48
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
49
|
+
| ``'truthel_phi'`` | ``'truthel'`` | ``'phi'`` | ``None`` |
|
50
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
51
|
+
| ``'truthel_pt'`` | ``'truthel'`` | ``'pt'`` | ``None`` |
|
52
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
53
|
+
| ``'ph_eta'`` | ``'ph'`` | ``'eta'`` | ``None`` |
|
54
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
55
|
+
| ``'ph_phi_SCALE__1up'`` | ``'ph'`` | ``'phi'`` | ``'SCALE__1up'`` |
|
56
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
57
|
+
| ``'mu_TTVA_effSF_NOSYS'`` | ``'mu'`` | ``'TTVA_effSF'`` | ``'NOSYS'`` |
|
58
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
59
|
+
| ``'recojet_antikt4PFlow_pt'`` | ``'recojet'`` | ``'antikt4PFlow_pt'`` | ``'NOSYS'`` |
|
60
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
61
|
+
| ``'recojet_antikt10UFO_m'`` | ``'recojet'`` | ``'antikt10UFO_m'`` | ``None`` |
|
62
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
63
|
+
|
64
|
+
Sometimes this logic is not what you want, and there are ways to teach ``NtupleSchema`` how to group some of these better for atypical cases. We can address these case-by-case.
|
65
|
+
|
66
|
+
**Singletons**
|
67
|
+
|
68
|
+
Sometimes you have particular branches that you don't want to be treated as a collection (with subcollections). And sometimes you will see warnings about this (see :ref:`faq`). There are some pre-defined ``singletons`` stored under :attr:`event_ids`, and these will be lazily treated as a _singleton_. For other cases where you add your own branches, you can additionally extend this class to add your own :attr:`singletons`:
|
69
|
+
|
70
|
+
.. code-block:: python
|
71
|
+
|
72
|
+
from atlas_schema.schema import NtupleSchema
|
73
|
+
|
74
|
+
|
75
|
+
class MySchema(NtupleSchema):
|
76
|
+
singletons = {"RandomRunNumber"}
|
77
|
+
|
78
|
+
and use this schema in your analysis code. The rest of the logic will be handled for you, and you can access your singletons under ``events.RandomRunNumber`` as expected.
|
79
|
+
|
80
|
+
**Mixins (collections, subcollections)**
|
81
|
+
|
82
|
+
In more complicated scenarios, you might need to teach :class:`NtupleSchema` how to handle collections that end up having underscores in their name, or other characters that make the grouping non-trivial. In some other scenarios, you want to tell the schema to assign a certain set of behaviors to a collection - rather than the default :class:`atlas_schema.methods.Particle` behavior. This is where :attr:`mixins` comes in. Similar to how :attr:`singletons` are handled, you extend this schema to include your own ``mixins`` pointing them at one of the behaviors defined in :mod:`atlas_schema.methods`.
|
83
|
+
|
84
|
+
Let's demonstrate both cases. Imagine you want to have your ``truthel`` collections above treated as :class:`atlas_schema.methods.Electron`, then you would extend the existing :attr:`mixins`:
|
85
|
+
|
86
|
+
.. code-block:: python
|
87
|
+
|
88
|
+
from atlas_schema.schema import NtupleSchema
|
89
|
+
|
90
|
+
|
91
|
+
class MySchema(NtupleSchema):
|
92
|
+
mixins = {"truthel": "Electron", **NtupleSchema.mixins}
|
93
|
+
|
94
|
+
Now, ``events.truthel`` will give you arrays zipped up with :class:`atlas_schema.methods.Electron` behaviors.
|
95
|
+
|
96
|
+
If instead, you run into problems with mixing different branches in the same collection, because the default behavior of this schema described above is not smart enough to handle the atypical cases, you can explicitly fix this by defining your collections:
|
97
|
+
|
98
|
+
.. code-block:: python
|
99
|
+
|
100
|
+
from atlas_schema.schema import NtupleSchema
|
101
|
+
|
102
|
+
|
103
|
+
class MySchema(NtupleSchema):
|
104
|
+
mixins = {
|
105
|
+
"recojet_antikt4PFlow": "Jet",
|
106
|
+
"recojet_antikt10UFO": "Jet",
|
107
|
+
**NtupleSchema.mixins,
|
108
|
+
}
|
109
|
+
|
110
|
+
Now, ``events.recojet_antikt4PFlow`` and ``events.recojet_antikt10UFO`` will be separate collections, instead of a single ``events.recojet`` that incorrectly merged branches from each of these collections.
|
19
111
|
"""
|
20
112
|
|
21
|
-
__dask_capable__ = True
|
113
|
+
__dask_capable__: ClassVar[bool] = True
|
114
|
+
|
115
|
+
warn_missing_crossrefs: ClassVar[bool] = True
|
22
116
|
|
23
|
-
|
24
|
-
error_missing_event_ids = False
|
117
|
+
#: Treat missing event-level branches as error instead of warning (default is ``False``)
|
118
|
+
error_missing_event_ids: ClassVar[bool] = False
|
119
|
+
#: Determine closest behavior for a given branch or treat branch as :attr:`default_behavior` (default is ``True``)
|
120
|
+
identify_closest_behavior: ClassVar[bool] = True
|
25
121
|
|
122
|
+
#: event IDs to expect in data datasets
|
26
123
|
event_ids_data: ClassVar[set[str]] = {
|
27
124
|
"lumiBlock",
|
28
125
|
"averageInteractionsPerCrossing",
|
29
126
|
"actualInteractionsPerCrossing",
|
30
127
|
"dataTakingYear",
|
31
128
|
}
|
129
|
+
#: event IDs to expect in MC datasets
|
32
130
|
event_ids_mc: ClassVar[set[str]] = {
|
33
131
|
"mcChannelNumber",
|
34
132
|
"runNumber",
|
35
133
|
"eventNumber",
|
36
134
|
"mcEventWeights",
|
37
135
|
}
|
136
|
+
#: all event IDs to expect in the dataset
|
38
137
|
event_ids: ClassVar[set[str]] = {*event_ids_data, *event_ids_mc}
|
39
138
|
|
139
|
+
#: mixins defining the mapping from collection name to behavior to use for that collection
|
40
140
|
mixins: ClassVar[dict[str, str]] = {
|
41
141
|
"el": "Electron",
|
42
142
|
"jet": "Jet",
|
@@ -48,9 +148,10 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
48
148
|
"weight": "Weight",
|
49
149
|
}
|
50
150
|
|
51
|
-
|
151
|
+
#: additional branches to pass-through with no zipping or additional interpretation (such as those stored as length-1 vectors)
|
52
152
|
singletons: ClassVar[set[str]] = set()
|
53
153
|
|
154
|
+
#: docstrings to assign for specific subcollections across the various collections identified by this schema
|
54
155
|
docstrings: ClassVar[dict[str, str]] = {
|
55
156
|
"charge": "charge",
|
56
157
|
"eta": "pseudorapidity",
|
@@ -60,6 +161,9 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
60
161
|
"phi": "azimuthal angle",
|
61
162
|
}
|
62
163
|
|
164
|
+
#: default behavior to use for any collection (default ``"NanoCollection"``, from :class:`coffea.nanoevents.methods.base.NanoCollection`)
|
165
|
+
default_behavior: ClassVar[str] = "NanoCollection"
|
166
|
+
|
63
167
|
def __init__(self, base_form: dict[str, Any], version: str = "latest"):
|
64
168
|
super().__init__(base_form)
|
65
169
|
self._version = version
|
@@ -87,10 +191,37 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
87
191
|
branch_forms = dict(zip(field_names, input_contents))
|
88
192
|
|
89
193
|
# parse into high-level records (collections, list collections, and singletons)
|
90
|
-
collections = {
|
194
|
+
collections = {
|
195
|
+
k.split("_")[0] for k in branch_forms if k not in self.singletons
|
196
|
+
}
|
91
197
|
collections -= self.event_ids
|
92
198
|
collections -= set(self.singletons)
|
93
199
|
|
200
|
+
# now handle any collections that we identified that are substrings of the items in the mixins
|
201
|
+
# convert all valid branch_forms into strings to make the lookups a bit faster
|
202
|
+
bf_str = ",".join(branch_forms.keys())
|
203
|
+
for mixin in self.mixins:
|
204
|
+
if mixin in collections:
|
205
|
+
continue
|
206
|
+
if f",{mixin}_" not in bf_str and not bf_str.startswith(f"{mixin}_"):
|
207
|
+
continue
|
208
|
+
if "_" in mixin:
|
209
|
+
warnings.warn(
|
210
|
+
f"I identified a mixin that I did not automatically identify as a collection because it contained an underscore: '{mixin}'. I will add this to the known collections. To suppress this warning next time, please create your ntuples with collections without underscores. [mixin-underscore]",
|
211
|
+
RuntimeWarning,
|
212
|
+
stacklevel=2,
|
213
|
+
)
|
214
|
+
collections.add(mixin)
|
215
|
+
for collection in list(collections):
|
216
|
+
if mixin.startswith(f"{collection}_"):
|
217
|
+
warnings.warn(
|
218
|
+
f"I found a misidentified collection: '{collection}'. I will remove this from the known collections. To suppress this warning next time, please create your ntuples with collections that are not similarly named with underscores. [collection-subset]",
|
219
|
+
RuntimeWarning,
|
220
|
+
stacklevel=2,
|
221
|
+
)
|
222
|
+
collections.remove(collection)
|
223
|
+
break
|
224
|
+
|
94
225
|
# rename needed because easyjet breaks the AMG assumptions
|
95
226
|
# https://gitlab.cern.ch/easyjet/easyjet/-/issues/246
|
96
227
|
for k in list(branch_forms):
|
@@ -99,17 +230,25 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
99
230
|
branch_forms[k.replace("_NOSYS", "") + "_NOSYS"] = branch_forms.pop(k)
|
100
231
|
|
101
232
|
# these are collections with systematic variations
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
233
|
+
try:
|
234
|
+
subcollections = {
|
235
|
+
k.split("__")[0].split("_", 1)[1].replace("_NOSYS", "")
|
236
|
+
for k in branch_forms
|
237
|
+
if "NOSYS" in k and k not in self.singletons
|
238
|
+
}
|
239
|
+
except IndexError as exc:
|
240
|
+
msg = "One of the branches does not follow the assumed pattern for this schema. [invalid-branch-name]"
|
241
|
+
raise RuntimeError(msg) from exc
|
107
242
|
|
108
243
|
# Check the presence of the event_ids
|
109
244
|
missing_event_ids = [
|
110
245
|
event_id for event_id in self.event_ids if event_id not in branch_forms
|
111
246
|
]
|
112
247
|
|
248
|
+
missing_singletons = [
|
249
|
+
singleton for singleton in self.singletons if singleton not in branch_forms
|
250
|
+
]
|
251
|
+
|
113
252
|
if len(missing_event_ids) > 0:
|
114
253
|
if self.error_missing_event_ids:
|
115
254
|
msg = f"There are missing event ID fields: {missing_event_ids} \n\n\
|
@@ -125,17 +264,29 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
125
264
|
stacklevel=2,
|
126
265
|
)
|
127
266
|
|
267
|
+
if len(missing_singletons) > 0:
|
268
|
+
# These singletons are simply branches we do not parse or handle
|
269
|
+
# explicitly in atlas-schema (e.g. they are copied directly to the
|
270
|
+
# output structure we provide you), however there can be false
|
271
|
+
# positives when you submit multiple files with different branch
|
272
|
+
# structures and this warning could be safely ignored.
|
273
|
+
warnings.warn(
|
274
|
+
f"Missing singletons : {missing_singletons}. [singleton-missing]",
|
275
|
+
RuntimeWarning,
|
276
|
+
stacklevel=2,
|
277
|
+
)
|
278
|
+
|
128
279
|
output = {}
|
129
280
|
|
130
281
|
# first, register singletons (event-level, others)
|
131
282
|
for name in {*self.event_ids, *self.singletons}:
|
132
|
-
if name in missing_event_ids:
|
283
|
+
if name in [*missing_event_ids, *missing_singletons]:
|
133
284
|
continue
|
285
|
+
|
134
286
|
output[name] = branch_forms[name]
|
135
287
|
|
136
288
|
# next, go through and start grouping up collections
|
137
289
|
for name in collections:
|
138
|
-
mixin = self.mixins.get(name, "NanoCollection")
|
139
290
|
content = {}
|
140
291
|
used = set()
|
141
292
|
|
@@ -165,7 +316,7 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
165
316
|
|
166
317
|
if not used and not content:
|
167
318
|
warnings.warn(
|
168
|
-
f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly.",
|
319
|
+
f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly. [singleton-undefined]",
|
169
320
|
RuntimeWarning,
|
170
321
|
stacklevel=2,
|
171
322
|
)
|
@@ -173,14 +324,27 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
173
324
|
output[name] = branch_forms[name]
|
174
325
|
|
175
326
|
else:
|
176
|
-
|
327
|
+
behavior = self.mixins.get(name, "")
|
328
|
+
if not behavior:
|
329
|
+
behavior = self.suggested_behavior(name)
|
330
|
+
warnings.warn(
|
331
|
+
f"I found a collection with no defined mixin: '{name}'. I will assume behavior: '{behavior}'. To suppress this warning next time, please define mixins for your custom collections. [mixin-undefined]",
|
332
|
+
RuntimeWarning,
|
333
|
+
stacklevel=2,
|
334
|
+
)
|
335
|
+
|
336
|
+
output[name] = zip_forms(content, name, record_name=behavior)
|
177
337
|
|
178
338
|
output[name].setdefault("parameters", {})
|
179
339
|
output[name]["parameters"].update({"collection_name": name})
|
180
340
|
|
181
341
|
if output[name]["class"] == "ListOffsetArray":
|
182
|
-
|
183
|
-
|
342
|
+
if output[name]["class"] == "RecordArray":
|
343
|
+
parameters = output[name]["content"]["fields"]
|
344
|
+
contents = output[name]["content"]["contents"]
|
345
|
+
else:
|
346
|
+
# these are also singletons of another kind that we just pass through
|
347
|
+
continue
|
184
348
|
elif output[name]["class"] == "RecordArray":
|
185
349
|
parameters = output[name]["fields"]
|
186
350
|
contents = output[name]["contents"]
|
@@ -190,6 +354,7 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
190
354
|
else:
|
191
355
|
msg = f"Unhandled class {output[name]['class']}"
|
192
356
|
raise RuntimeError(msg)
|
357
|
+
|
193
358
|
# update docstrings as needed
|
194
359
|
# NB: must be before flattening for easier logic
|
195
360
|
for index, parameter in enumerate(parameters):
|
@@ -208,7 +373,55 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
208
373
|
|
209
374
|
@classmethod
|
210
375
|
def behavior(cls) -> Behavior:
|
211
|
-
"""Behaviors necessary to implement this schema
|
212
|
-
from atlas_schema.methods import behavior as roaster
|
376
|
+
"""Behaviors necessary to implement this schema
|
213
377
|
|
378
|
+
Returns:
|
379
|
+
dict[str | tuple['*', str], type[awkward.Record]]: an :data:`awkward.behavior` dictionary
|
380
|
+
"""
|
214
381
|
return roaster
|
382
|
+
|
383
|
+
@classmethod
|
384
|
+
def suggested_behavior(cls, key: str, cutoff: float = 0.4) -> str:
|
385
|
+
"""
|
386
|
+
Suggest e behavior to use for a provided collection or branch name.
|
387
|
+
|
388
|
+
Default behavior: :class:`~coffea.nanoevents.methods.base.NanoCollection`.
|
389
|
+
|
390
|
+
Note:
|
391
|
+
If :attr:`identify_closest_behavior` is ``False``, then this function will return the default behavior ``NanoCollection``.
|
392
|
+
|
393
|
+
Warning:
|
394
|
+
If no behavior is found above the *cutoff* score, then this function will return the default behavior.
|
395
|
+
|
396
|
+
Args:
|
397
|
+
key (str): collection name to suggest a matching behavior for
|
398
|
+
cutoff (float): o ptional argument cutoff (default ``0.4``) is a float in the range ``[0, 1]``. Possibilities that don't score at least that similar to *key* are ignored.
|
399
|
+
|
400
|
+
Returns:
|
401
|
+
str: suggested behavior to use by string
|
402
|
+
|
403
|
+
Example:
|
404
|
+
>>> from atlas_schema.schema import NtupleSchema
|
405
|
+
>>> NtupleSchema.suggested_behavior("truthjet")
|
406
|
+
'Jet'
|
407
|
+
>>> NtupleSchema.suggested_behavior("SignalElectron")
|
408
|
+
'Electron'
|
409
|
+
>>> NtupleSchema.suggested_behavior("generatorWeight")
|
410
|
+
'Weight'
|
411
|
+
>>> NtupleSchema.suggested_behavior("aVeryStrangelyNamedBranchWithNoMatch")
|
412
|
+
'NanoCollection'
|
413
|
+
"""
|
414
|
+
if cls.identify_closest_behavior:
|
415
|
+
# lowercase everything to do case-insensitive matching
|
416
|
+
behaviors = [b for b in cls.behavior() if isinstance(b, str)]
|
417
|
+
behaviors_l = [b.lower() for b in behaviors]
|
418
|
+
results = difflib.get_close_matches(
|
419
|
+
key.lower(), behaviors_l, n=1, cutoff=cutoff
|
420
|
+
)
|
421
|
+
if not results:
|
422
|
+
return cls.default_behavior
|
423
|
+
|
424
|
+
behavior = results[0]
|
425
|
+
# need to identify the index and return the unlowered version
|
426
|
+
return behaviors[behaviors_l.index(behavior)]
|
427
|
+
return cls.default_behavior
|
atlas_schema/typing_compat.py
CHANGED
@@ -5,7 +5,7 @@ Typing helpers.
|
|
5
5
|
from __future__ import annotations
|
6
6
|
|
7
7
|
import sys
|
8
|
-
from typing import Annotated
|
8
|
+
from typing import Annotated, Literal, Union
|
9
9
|
|
10
10
|
import awkward
|
11
11
|
|
@@ -19,6 +19,6 @@ if sys.version_info >= (3, 11):
|
|
19
19
|
else:
|
20
20
|
from typing_extensions import Self
|
21
21
|
|
22
|
-
Behavior: TypeAlias = dict[str, type[awkward.Record]]
|
22
|
+
Behavior: TypeAlias = dict[Union[str, tuple[Literal["*"]], str], type[awkward.Record]]
|
23
23
|
|
24
24
|
__all__ = ("Annotated", "Behavior", "Self")
|
atlas_schema/utils.py
CHANGED
@@ -1,16 +1,15 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from enum import Enum
|
4
|
-
from typing import TypeVar,
|
4
|
+
from typing import TypeVar, cast
|
5
5
|
|
6
6
|
import awkward as ak
|
7
|
-
import dask_awkward as dak
|
8
7
|
|
9
|
-
Array = TypeVar("Array", bound=
|
8
|
+
Array = TypeVar("Array", bound=ak.Array)
|
10
9
|
_E = TypeVar("_E", bound=Enum)
|
11
10
|
|
12
11
|
|
13
|
-
def isin(element: Array, test_elements:
|
12
|
+
def isin(element: Array, test_elements: ak.Array, axis: int = -1) -> Array:
|
14
13
|
"""
|
15
14
|
Find test_elements in element. Similar in API as :func:`numpy.isin`.
|
16
15
|
|
@@ -21,12 +20,12 @@ def isin(element: Array, test_elements: dak.Array | ak.Array, axis: int = -1) ->
|
|
21
20
|
comparison.
|
22
21
|
|
23
22
|
Args:
|
24
|
-
element (
|
25
|
-
test_elements (
|
23
|
+
element (ak.Array): input array of values.
|
24
|
+
test_elements (ak.Array): one-dimensional set of values against which to test each value of *element*.
|
26
25
|
axis (int): the axis along which the comparison is performed
|
27
26
|
|
28
27
|
Returns:
|
29
|
-
|
28
|
+
ak.Array: result of comparison for test_elements in *element*
|
30
29
|
|
31
30
|
Example:
|
32
31
|
>>> import awkward as ak
|
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: atlas-schema
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Helper python package for ATLAS Common NTuple Analysis work.
|
5
5
|
Project-URL: Homepage, https://github.com/scipp-atlas/atlas-schema
|
6
6
|
Project-URL: Bug Tracker, https://github.com/scipp-atlas/atlas-schema/issues
|
7
7
|
Project-URL: Discussions, https://github.com/scipp-atlas/atlas-schema/discussions
|
8
|
-
Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.
|
8
|
+
Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.3.0/
|
9
9
|
Project-URL: Releases, https://github.com/scipp-atlas/atlas-schema/releases
|
10
10
|
Project-URL: Release Notes, https://atlas-schema.readthedocs.io/en/latest/history.html
|
11
11
|
Author-email: Giordon Stark <kratsg@gmail.com>
|
@@ -227,7 +227,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
227
227
|
Classifier: Topic :: Scientific/Engineering
|
228
228
|
Classifier: Typing :: Typed
|
229
229
|
Requires-Python: >=3.9
|
230
|
-
Requires-Dist: coffea[dask]>=
|
230
|
+
Requires-Dist: coffea[dask]>=2025.7.0
|
231
231
|
Requires-Dist: particle>=0.25.0
|
232
232
|
Provides-Extra: dev
|
233
233
|
Requires-Dist: pytest-cov>=3; extra == 'dev'
|
@@ -251,7 +251,7 @@ Requires-Dist: tbump>=6.7.0; extra == 'test'
|
|
251
251
|
Requires-Dist: twine; extra == 'test'
|
252
252
|
Description-Content-Type: text/markdown
|
253
253
|
|
254
|
-
# atlas-schema v0.
|
254
|
+
# atlas-schema v0.3.0
|
255
255
|
|
256
256
|
[![Actions Status][actions-badge]][actions-link]
|
257
257
|
[![Documentation Status][rtd-badge]][rtd-link]
|
@@ -335,11 +335,9 @@ like below:
|
|
335
335
|
|
336
336
|
```python
|
337
337
|
import awkward as ak
|
338
|
-
import
|
339
|
-
import hist.dask as had
|
338
|
+
from hist import Hist
|
340
339
|
import matplotlib.pyplot as plt
|
341
340
|
from coffea import processor
|
342
|
-
from coffea.nanoevents import NanoEventsFactory
|
343
341
|
from distributed import Client
|
344
342
|
|
345
343
|
from atlas_schema.schema import NtupleSchema
|
@@ -352,7 +350,7 @@ class MyFirstProcessor(processor.ProcessorABC):
|
|
352
350
|
def process(self, events):
|
353
351
|
dataset = events.metadata["dataset"]
|
354
352
|
h_ph_pt = (
|
355
|
-
|
353
|
+
Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
|
356
354
|
.Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
|
357
355
|
.Int64()
|
358
356
|
)
|
@@ -376,17 +374,18 @@ class MyFirstProcessor(processor.ProcessorABC):
|
|
376
374
|
if __name__ == "__main__":
|
377
375
|
client = Client()
|
378
376
|
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
out =
|
388
|
-
|
389
|
-
print(
|
377
|
+
fileset = {"700352.Zqqgamma.mc20d.v1": {"files": {"ntuple.root": "analysis"}}}
|
378
|
+
|
379
|
+
run = processor.Runner(
|
380
|
+
executor=processor.IterativeExecutor(compression=None),
|
381
|
+
schema=NtupleSchema,
|
382
|
+
savemetrics=True,
|
383
|
+
)
|
384
|
+
|
385
|
+
out, metrics = run(fileset, processor_instance=MyFirstProcessor())
|
386
|
+
|
387
|
+
print(out)
|
388
|
+
print(metrics)
|
390
389
|
|
391
390
|
fig, ax = plt.subplots()
|
392
391
|
computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
|
2
|
+
atlas_schema/_version.py,sha256=AGmG_Lx0-9ztFw_7d9mYbaYuC-2abxE1oXOUNAY29YY,511
|
3
|
+
atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
|
4
|
+
atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
|
5
|
+
atlas_schema/methods.py,sha256=DPeEFofeD5_bCk7V3KudJaE_sAUMpBIh-gPnM4kWDe8,7124
|
6
|
+
atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
atlas_schema/schema.py,sha256=44i7ri-7OP4SK6_D_3JAGvoiIY-IcPBp1p6MlKfWK5I,21675
|
8
|
+
atlas_schema/typing_compat.py,sha256=3G8h4WfLoDmrtWZvtYKLCwEpCQ_O4Fwygb2WlDRSE4E,488
|
9
|
+
atlas_schema/utils.py,sha256=E3jCka-pf_0h_r3OO0hMLlbF6dQKoxr2T1Gd18-aJ4U,2034
|
10
|
+
atlas_schema-0.3.0.dist-info/METADATA,sha256=NCkA4ydLhlTHYJOTipTgZlR7_yhDsDe7zpa0jbnqw00,20069
|
11
|
+
atlas_schema-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
12
|
+
atlas_schema-0.3.0.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
|
13
|
+
atlas_schema-0.3.0.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
|
2
|
-
atlas_schema/_version.py,sha256=AaQEeqeDwmZAHoPuwg2C0ulADePbIYLSFanZzt0cytQ,411
|
3
|
-
atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
|
4
|
-
atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
|
5
|
-
atlas_schema/methods.py,sha256=K7u6HGKXrtpMg7jjCjKPwIEnknOShUH4HQ1ibKBzkZ0,6832
|
6
|
-
atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
atlas_schema/schema.py,sha256=s3bcSa5DH5iILPD_2BD3co8MSTsXNs1rYBmn44388Kc,8082
|
8
|
-
atlas_schema/typing_compat.py,sha256=RwkxiiYbXO9yxkeaL8CdRaOHH7wq6vO_epg1YD7RbRs,439
|
9
|
-
atlas_schema/utils.py,sha256=spk7KIMBbXSPpZBTltyxaHWvyitkEGSVldfuKFoyavk,2137
|
10
|
-
atlas_schema-0.2.3.dist-info/METADATA,sha256=xtXdXa-9ra8TTtZSXLycUKPvBLPzXCIPa5cueuv0w90,20107
|
11
|
-
atlas_schema-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
12
|
-
atlas_schema-0.2.3.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
|
13
|
-
atlas_schema-0.2.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|