atlas-schema 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas_schema/_version.py +2 -2
- atlas_schema/methods.py +12 -0
- atlas_schema/schema.py +220 -21
- atlas_schema/typing_compat.py +2 -2
- atlas_schema/utils.py +23 -13
- {atlas_schema-0.2.2.dist-info → atlas_schema-0.2.4.dist-info}/METADATA +126 -3
- atlas_schema-0.2.4.dist-info/RECORD +13 -0
- atlas_schema-0.2.2.dist-info/RECORD +0 -13
- {atlas_schema-0.2.2.dist-info → atlas_schema-0.2.4.dist-info}/WHEEL +0 -0
- {atlas_schema-0.2.2.dist-info → atlas_schema-0.2.4.dist-info}/licenses/LICENSE +0 -0
atlas_schema/_version.py
CHANGED
atlas_schema/methods.py
CHANGED
@@ -230,12 +230,24 @@ JetArray.MomentumClass = vector.LorentzVectorArray # noqa: F821
|
|
230
230
|
|
231
231
|
__all__ = [
|
232
232
|
"Electron",
|
233
|
+
"ElectronArray", # noqa: F822
|
234
|
+
"ElectronRecord", # noqa: F822
|
233
235
|
"Jet",
|
236
|
+
"JetArray", # noqa: F822
|
237
|
+
"JetRecord", # noqa: F822
|
234
238
|
"MissingET",
|
239
|
+
"MissingETArray", # noqa: F822
|
240
|
+
"MissingETRecord", # noqa: F822
|
235
241
|
"Muon",
|
242
|
+
"MuonArray", # noqa: F822
|
243
|
+
"MuonRecord", # noqa: F822
|
236
244
|
"NtupleEvents",
|
237
245
|
"Particle",
|
246
|
+
"ParticleArray", # noqa: F822
|
247
|
+
"ParticleRecord", # noqa: F822
|
238
248
|
"Pass",
|
239
249
|
"Photon",
|
250
|
+
"PhotonArray", # noqa: F822
|
251
|
+
"PhotonRecord", # noqa: F822
|
240
252
|
"Weight",
|
241
253
|
]
|
atlas_schema/schema.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import difflib
|
3
4
|
import warnings
|
4
5
|
from collections.abc import KeysView, ValuesView
|
5
6
|
from typing import Any, ClassVar
|
@@ -10,33 +11,131 @@ from atlas_schema.typing_compat import Behavior, Self
|
|
10
11
|
|
11
12
|
|
12
13
|
class NtupleSchema(BaseSchema): # type: ignore[misc]
|
13
|
-
"""
|
14
|
+
"""The schema for building ATLAS ntuples following the typical centralized formats.
|
14
15
|
|
15
|
-
|
16
|
-
the naming pattern of the branches.
|
16
|
+
This schema is built from all branches found in a tree in the supplied
|
17
|
+
file, based on the naming pattern of the branches. This naming pattern is
|
18
|
+
typically assumed to be
|
17
19
|
|
18
|
-
-
|
20
|
+
.. code-block:: bash
|
21
|
+
|
22
|
+
{collection:str}_{subcollection:str}_{systematic:str}
|
23
|
+
|
24
|
+
where:
|
25
|
+
* ``collection`` is assumed to be a prefix with typical characters, following the regex ``[a-zA-Z][a-zA-Z0-9]*``; that is starting with a case-insensitive letter, and proceeded by zero or more alphanumeric characters,
|
26
|
+
* ``subcollection`` is assumed to be anything with typical characters (allowing for underscores) following the regex ``[a-zA-Z_][a-zA-Z0-9_]*``; that is starting with a case-insensitive letter or underscore, and proceeded by zero or more alphanumeric characters including underscores, and
|
27
|
+
* ``systematic`` is assumed to be either ``NOSYS`` to indicate a branch with potential systematic variariations, or anything with typical characters (allowing for underscores) following the same regular expression as the ``subcollection``.
|
28
|
+
|
29
|
+
Here, a collection refers to the top-level entry to access an item - a collection called ``el`` will be accessible under the ``el`` attributes via ``events['el']`` or ``events.el``. A subcollection called ``pt`` will be accessible under that collection, such as ``events['el']['pt']`` or ``events.el.pt``. This is the power of the schema providing a more user-friendly (and programmatic) access to the underlying branches.
|
30
|
+
|
31
|
+
The above logic means that the following branches below will be categorized as follows:
|
32
|
+
|
33
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
34
|
+
| branch | collection | subcollection | systematic |
|
35
|
+
+===============================+===================+=======================+==================+
|
36
|
+
| ``'eventNumber'`` | ``'eventNumber'`` | ``None`` | ``None`` |
|
37
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
38
|
+
| ``'runNumber'`` | ``'runNumber'`` | ``None`` | ``None`` |
|
39
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
40
|
+
| ``'el_pt_NOSYS'`` | ``'el'`` | ``'pt'`` | ``'NOSYS'`` |
|
41
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
42
|
+
| ``'jet_cleanTightBad_NOSYS'`` | ``'jet'`` | ``'cleanTightBad'`` | ``'NOSYS'`` |
|
43
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
44
|
+
| ``'jet_select_btag_NOSYS'`` | ``'jet'`` | ``'select_btag'`` | ``'NOSYS'`` |
|
45
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
46
|
+
| ``'jet_e_NOSYS'`` | ``'jet'`` | ``'e'`` | ``'NOSYS'`` |
|
47
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
48
|
+
| ``'truthel_phi'`` | ``'truthel'`` | ``'phi'`` | ``None`` |
|
49
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
50
|
+
| ``'truthel_pt'`` | ``'truthel'`` | ``'pt'`` | ``None`` |
|
51
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
52
|
+
| ``'ph_eta'`` | ``'ph'`` | ``'eta'`` | ``None`` |
|
53
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
54
|
+
| ``'ph_phi_SCALE__1up'`` | ``'ph'`` | ``'phi'`` | ``'SCALE__1up'`` |
|
55
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
56
|
+
| ``'mu_TTVA_effSF_NOSYS'`` | ``'mu'`` | ``'TTVA_effSF'`` | ``'NOSYS'`` |
|
57
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
58
|
+
| ``'recojet_antikt4PFlow_pt'`` | ``'recojet'`` | ``'antikt4PFlow_pt'`` | ``'NOSYS'`` |
|
59
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
60
|
+
| ``'recojet_antikt10UFO_m'`` | ``'recojet'`` | ``'antikt10UFO_m'`` | ``None`` |
|
61
|
+
+-------------------------------+-------------------+-----------------------+------------------+
|
62
|
+
|
63
|
+
Sometimes this logic is not what you want, and there are ways to teach ``NtupleSchema`` how to group some of these better for atypical cases. We can address these case-by-case.
|
64
|
+
|
65
|
+
**Singletons**
|
66
|
+
|
67
|
+
Sometimes you have particular branches that you don't want to be treated as a collection (with subcollections). And sometimes you will see warnings about this (see :ref:`faq`). There are some pre-defined ``singletons`` stored under :attr:`event_ids`, and these will be lazily treated as a _singleton_. For other cases where you add your own branches, you can additionally extend this class to add your own :attr:`singletons`:
|
68
|
+
|
69
|
+
.. code-block:: python
|
70
|
+
|
71
|
+
from atlas_schema.schema import NtupleSchema
|
72
|
+
|
73
|
+
|
74
|
+
class MySchema(NtupleSchema):
|
75
|
+
singletons = {"RandomRunNumber"}
|
76
|
+
|
77
|
+
and use this schema in your analysis code. The rest of the logic will be handled for you, and you can access your singletons under ``events.RandomRunNumber`` as expected.
|
78
|
+
|
79
|
+
**Mixins (collections, subcollections)**
|
80
|
+
|
81
|
+
In more complicated scenarios, you might need to teach :class:`NtupleSchema` how to handle collections that end up having underscores in their name, or other characters that make the grouping non-trivial. In some other scenarios, you want to tell the schema to assign a certain set of behaviors to a collection - rather than the default :class:`atlas_schema.methods.Particle` behavior. This is where :attr:`mixins` comes in. Similar to how :attr:`singletons` are handled, you extend this schema to include your own ``mixins`` pointing them at one of the behaviors defined in :mod:`atlas_schema.methods`.
|
82
|
+
|
83
|
+
Let's demonstrate both cases. Imagine you want to have your ``truthel`` collections above treated as :class:`atlas_schema.methods.Electron`, then you would extend the existing :attr:`mixins`:
|
84
|
+
|
85
|
+
.. code-block:: python
|
86
|
+
|
87
|
+
from atlas_schema.schema import NtupleSchema
|
88
|
+
|
89
|
+
|
90
|
+
class MySchema(NtupleSchema):
|
91
|
+
mixins = {"truthel": "Electron", **NtupleSchema.mixins}
|
92
|
+
|
93
|
+
Now, ``events.truthel`` will give you arrays zipped up with :class:`atlas_schema.methods.Electron` behaviors.
|
94
|
+
|
95
|
+
If instead, you run into problems with mixing different branches in the same collection, because the default behavior of this schema described above is not smart enough to handle the atypical cases, you can explicitly fix this by defining your collections:
|
96
|
+
|
97
|
+
.. code-block:: python
|
98
|
+
|
99
|
+
from atlas_schema.schema import NtupleSchema
|
100
|
+
|
101
|
+
|
102
|
+
class MySchema(NtupleSchema):
|
103
|
+
mixins = {
|
104
|
+
"recojet_antikt4PFlow": "Jet",
|
105
|
+
"recojet_antikt10UFO": "Jet",
|
106
|
+
**NtupleSchema.mixins,
|
107
|
+
}
|
108
|
+
|
109
|
+
Now, ``events.recojet_antikt4PFlow`` and ``events.recojet_antikt10UFO`` will be separate collections, instead of a single ``events.recojet`` that incorrectly merged branches from each of these collections.
|
19
110
|
"""
|
20
111
|
|
21
|
-
__dask_capable__ = True
|
112
|
+
__dask_capable__: ClassVar[bool] = True
|
113
|
+
|
114
|
+
warn_missing_crossrefs: ClassVar[bool] = True
|
22
115
|
|
23
|
-
|
24
|
-
error_missing_event_ids = False
|
116
|
+
#: Treat missing event-level branches as error instead of warning (default is ``False``)
|
117
|
+
error_missing_event_ids: ClassVar[bool] = False
|
118
|
+
#: Determine closest behavior for a given branch or treat branch as :attr:`default_behavior` (default is ``True``)
|
119
|
+
identify_closest_behavior: ClassVar[bool] = True
|
25
120
|
|
121
|
+
#: event IDs to expect in data datasets
|
26
122
|
event_ids_data: ClassVar[set[str]] = {
|
27
123
|
"lumiBlock",
|
28
124
|
"averageInteractionsPerCrossing",
|
29
125
|
"actualInteractionsPerCrossing",
|
30
126
|
"dataTakingYear",
|
31
127
|
}
|
128
|
+
#: event IDs to expect in MC datasets
|
32
129
|
event_ids_mc: ClassVar[set[str]] = {
|
33
130
|
"mcChannelNumber",
|
34
131
|
"runNumber",
|
35
132
|
"eventNumber",
|
36
133
|
"mcEventWeights",
|
37
134
|
}
|
135
|
+
#: all event IDs to expect in the dataset
|
38
136
|
event_ids: ClassVar[set[str]] = {*event_ids_data, *event_ids_mc}
|
39
137
|
|
138
|
+
#: mixins defining the mapping from collection name to behavior to use for that collection
|
40
139
|
mixins: ClassVar[dict[str, str]] = {
|
41
140
|
"el": "Electron",
|
42
141
|
"jet": "Jet",
|
@@ -48,9 +147,10 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
48
147
|
"weight": "Weight",
|
49
148
|
}
|
50
149
|
|
51
|
-
|
52
|
-
singletons: ClassVar[
|
150
|
+
#: additional branches to pass-through with no zipping or additional interpretation (such as those stored as length-1 vectors)
|
151
|
+
singletons: ClassVar[set[str]] = set()
|
53
152
|
|
153
|
+
#: docstrings to assign for specific subcollections across the various collections identified by this schema
|
54
154
|
docstrings: ClassVar[dict[str, str]] = {
|
55
155
|
"charge": "charge",
|
56
156
|
"eta": "pseudorapidity",
|
@@ -60,6 +160,9 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
60
160
|
"phi": "azimuthal angle",
|
61
161
|
}
|
62
162
|
|
163
|
+
#: default behavior to use for any collection (default ``"NanoCollection"``, from :class:`coffea.nanoevents.methods.base.NanoCollection`)
|
164
|
+
default_behavior: ClassVar[str] = "NanoCollection"
|
165
|
+
|
63
166
|
def __init__(self, base_form: dict[str, Any], version: str = "latest"):
|
64
167
|
super().__init__(base_form)
|
65
168
|
self._version = version
|
@@ -91,6 +194,31 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
91
194
|
collections -= self.event_ids
|
92
195
|
collections -= set(self.singletons)
|
93
196
|
|
197
|
+
# now handle any collections that we identified that are substrings of the items in the mixins
|
198
|
+
# convert all valid branch_forms into strings to make the lookups a bit faster
|
199
|
+
bf_str = ",".join(branch_forms.keys())
|
200
|
+
for mixin in self.mixins:
|
201
|
+
if mixin in collections:
|
202
|
+
continue
|
203
|
+
if f",{mixin}_" not in bf_str and not bf_str.startswith(f"{mixin}_"):
|
204
|
+
continue
|
205
|
+
if "_" in mixin:
|
206
|
+
warnings.warn(
|
207
|
+
f"I identified a mixin that I did not automatically identify as a collection because it contained an underscore: '{mixin}'. I will add this to the known collections. To suppress this warning next time, please create your ntuples with collections without underscores. [mixin-underscore]",
|
208
|
+
RuntimeWarning,
|
209
|
+
stacklevel=2,
|
210
|
+
)
|
211
|
+
collections.add(mixin)
|
212
|
+
for collection in list(collections):
|
213
|
+
if mixin.startswith(f"{collection}_"):
|
214
|
+
warnings.warn(
|
215
|
+
f"I found a misidentified collection: '{collection}'. I will remove this from the known collections. To suppress this warning next time, please create your ntuples with collections that are not similarly named with underscores. [collection-subset]",
|
216
|
+
RuntimeWarning,
|
217
|
+
stacklevel=2,
|
218
|
+
)
|
219
|
+
collections.remove(collection)
|
220
|
+
break
|
221
|
+
|
94
222
|
# rename needed because easyjet breaks the AMG assumptions
|
95
223
|
# https://gitlab.cern.ch/easyjet/easyjet/-/issues/246
|
96
224
|
for k in list(branch_forms):
|
@@ -127,15 +255,14 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
127
255
|
|
128
256
|
output = {}
|
129
257
|
|
130
|
-
# first, register
|
131
|
-
for name in self.event_ids:
|
258
|
+
# first, register singletons (event-level, others)
|
259
|
+
for name in {*self.event_ids, *self.singletons}:
|
132
260
|
if name in missing_event_ids:
|
133
261
|
continue
|
134
262
|
output[name] = branch_forms[name]
|
135
263
|
|
136
264
|
# next, go through and start grouping up collections
|
137
265
|
for name in collections:
|
138
|
-
mixin = self.mixins.get(name, "NanoCollection")
|
139
266
|
content = {}
|
140
267
|
used = set()
|
141
268
|
|
@@ -163,20 +290,47 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
163
290
|
}
|
164
291
|
)
|
165
292
|
|
166
|
-
|
293
|
+
if not used and not content:
|
294
|
+
warnings.warn(
|
295
|
+
f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly. [singleton-undefined]",
|
296
|
+
RuntimeWarning,
|
297
|
+
stacklevel=2,
|
298
|
+
)
|
299
|
+
self.singletons.add(name)
|
300
|
+
output[name] = branch_forms[name]
|
301
|
+
|
302
|
+
else:
|
303
|
+
behavior = self.mixins.get(name, "")
|
304
|
+
if not behavior:
|
305
|
+
behavior = self.suggested_behavior(name)
|
306
|
+
warnings.warn(
|
307
|
+
f"I found a collection with no defined mixin: '{name}'. I will assume behavior: '{behavior}'. To suppress this warning next time, please define mixins for your custom collections. [mixin-undefined]",
|
308
|
+
RuntimeWarning,
|
309
|
+
stacklevel=2,
|
310
|
+
)
|
311
|
+
|
312
|
+
output[name] = zip_forms(content, name, record_name=behavior)
|
167
313
|
|
168
314
|
output[name].setdefault("parameters", {})
|
169
315
|
output[name]["parameters"].update({"collection_name": name})
|
170
316
|
|
171
317
|
if output[name]["class"] == "ListOffsetArray":
|
172
|
-
|
173
|
-
|
318
|
+
if output[name]["class"] == "RecordArray":
|
319
|
+
parameters = output[name]["content"]["fields"]
|
320
|
+
contents = output[name]["content"]["contents"]
|
321
|
+
else:
|
322
|
+
# these are also singletons of another kind that we just pass through
|
323
|
+
continue
|
174
324
|
elif output[name]["class"] == "RecordArray":
|
175
325
|
parameters = output[name]["fields"]
|
176
326
|
contents = output[name]["contents"]
|
327
|
+
elif output[name]["class"] == "NumpyArray":
|
328
|
+
# these are singletons that we just pass through
|
329
|
+
continue
|
177
330
|
else:
|
178
331
|
msg = f"Unhandled class {output[name]['class']}"
|
179
332
|
raise RuntimeError(msg)
|
333
|
+
|
180
334
|
# update docstrings as needed
|
181
335
|
# NB: must be before flattening for easier logic
|
182
336
|
for index, parameter in enumerate(parameters):
|
@@ -191,16 +345,61 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
191
345
|
),
|
192
346
|
)
|
193
347
|
|
194
|
-
if name in self.singletons:
|
195
|
-
# flatten! this 'promotes' the content of an inner dimension
|
196
|
-
# upwards, effectively hiding one nested dimension
|
197
|
-
output[name] = output[name]["content"]
|
198
|
-
|
199
348
|
return output.keys(), output.values()
|
200
349
|
|
201
350
|
@classmethod
|
202
351
|
def behavior(cls) -> Behavior:
|
203
|
-
"""Behaviors necessary to implement this schema
|
352
|
+
"""Behaviors necessary to implement this schema
|
353
|
+
|
354
|
+
Returns:
|
355
|
+
dict[str | tuple['*', str], type[awkward.Record]]: an :data:`awkward.behavior` dictionary
|
356
|
+
"""
|
204
357
|
from atlas_schema.methods import behavior as roaster
|
205
358
|
|
206
359
|
return roaster
|
360
|
+
|
361
|
+
@classmethod
|
362
|
+
def suggested_behavior(cls, key: str, cutoff: float = 0.4) -> str:
|
363
|
+
"""
|
364
|
+
Suggest e behavior to use for a provided collection or branch name.
|
365
|
+
|
366
|
+
Default behavior: :class:`~coffea.nanoevents.methods.base.NanoCollection`.
|
367
|
+
|
368
|
+
Note:
|
369
|
+
If :attr:`identify_closest_behavior` is ``False``, then this function will return the default behavior ``NanoCollection``.
|
370
|
+
|
371
|
+
Warning:
|
372
|
+
If no behavior is found above the *cutoff* score, then this function will return the default behavior.
|
373
|
+
|
374
|
+
Args:
|
375
|
+
key (str): collection name to suggest a matching behavior for
|
376
|
+
cutoff (float): o ptional argument cutoff (default ``0.4``) is a float in the range ``[0, 1]``. Possibilities that don't score at least that similar to *key* are ignored.
|
377
|
+
|
378
|
+
Returns:
|
379
|
+
str: suggested behavior to use by string
|
380
|
+
|
381
|
+
Example:
|
382
|
+
>>> from atlas_schema.schema import NtupleSchema
|
383
|
+
>>> NtupleSchema.suggested_behavior("truthjet")
|
384
|
+
'Jet'
|
385
|
+
>>> NtupleSchema.suggested_behavior("SignalElectron")
|
386
|
+
'Electron'
|
387
|
+
>>> NtupleSchema.suggested_behavior("generatorWeight")
|
388
|
+
'Weight'
|
389
|
+
>>> NtupleSchema.suggested_behavior("aVeryStrangelyNamedBranchWithNoMatch")
|
390
|
+
'NanoCollection'
|
391
|
+
"""
|
392
|
+
if cls.identify_closest_behavior:
|
393
|
+
# lowercase everything to do case-insensitive matching
|
394
|
+
behaviors = [b for b in cls.behavior() if isinstance(b, str)]
|
395
|
+
behaviors_l = [b.lower() for b in behaviors]
|
396
|
+
results = difflib.get_close_matches(
|
397
|
+
key.lower(), behaviors_l, n=1, cutoff=cutoff
|
398
|
+
)
|
399
|
+
if not results:
|
400
|
+
return cls.default_behavior
|
401
|
+
|
402
|
+
behavior = results[0]
|
403
|
+
# need to identify the index and return the unlowered version
|
404
|
+
return behaviors[behaviors_l.index(behavior)]
|
405
|
+
return cls.default_behavior
|
atlas_schema/typing_compat.py
CHANGED
@@ -5,7 +5,7 @@ Typing helpers.
|
|
5
5
|
from __future__ import annotations
|
6
6
|
|
7
7
|
import sys
|
8
|
-
from typing import Annotated
|
8
|
+
from typing import Annotated, Literal, Union
|
9
9
|
|
10
10
|
import awkward
|
11
11
|
|
@@ -19,6 +19,6 @@ if sys.version_info >= (3, 11):
|
|
19
19
|
else:
|
20
20
|
from typing_extensions import Self
|
21
21
|
|
22
|
-
Behavior: TypeAlias = dict[str, type[awkward.Record]]
|
22
|
+
Behavior: TypeAlias = dict[Union[str, tuple[Literal["*"]], str], type[awkward.Record]]
|
23
23
|
|
24
24
|
__all__ = ("Annotated", "Behavior", "Self")
|
atlas_schema/utils.py
CHANGED
@@ -10,30 +10,40 @@ Array = TypeVar("Array", bound=Union[dak.Array, ak.Array])
|
|
10
10
|
_E = TypeVar("_E", bound=Enum)
|
11
11
|
|
12
12
|
|
13
|
-
def isin(
|
13
|
+
def isin(element: Array, test_elements: dak.Array | ak.Array, axis: int = -1) -> Array:
|
14
14
|
"""
|
15
|
-
Find
|
15
|
+
Find test_elements in element. Similar in API as :func:`numpy.isin`.
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
Calculates `element in test_elements`, broadcasting over *element elements only*. Returns a boolean array of the same shape as *element* that is `True` where an element of *element* is in *test_elements* and `False` otherwise.
|
18
|
+
|
19
|
+
This works by first transforming *test_elements* to an array with one more
|
20
|
+
dimension than the *element*, placing the *test_elements* at *axis*, and then doing a
|
19
21
|
comparison.
|
20
22
|
|
21
23
|
Args:
|
22
|
-
|
23
|
-
|
24
|
+
element (dask_awkward.Array or ak.Array): input array of values.
|
25
|
+
test_elements (dask_awkward.Array or ak.Array): one-dimensional set of values against which to test each value of *element*.
|
24
26
|
axis (int): the axis along which the comparison is performed
|
25
27
|
|
26
28
|
Returns:
|
27
|
-
|
29
|
+
dask_awkward.Array or ak.Array: result of comparison for test_elements in *element*
|
30
|
+
|
31
|
+
Example:
|
32
|
+
>>> import awkward as ak
|
33
|
+
>>> import atlas_schema as ats
|
34
|
+
>>> truth_origins = ak.Array([[1, 2, 3], [4], [5, 6, 7], [1]])
|
35
|
+
>>> prompt_origins = ak.Array([1, 2, 7])
|
36
|
+
>>> ats.isin(truth_origins, prompt_origins).to_list()
|
37
|
+
[[True, True, False], [False], [False, False, True], [True]]
|
28
38
|
"""
|
29
|
-
assert
|
39
|
+
assert test_elements.ndim == 1, "test_elements must be one-dimensional"
|
30
40
|
assert axis >= -1, "axis must be -1 or positive-valued"
|
31
|
-
assert axis <
|
41
|
+
assert axis < element.ndim + 1, "axis too large for the element"
|
32
42
|
|
33
|
-
# First, build up the transformation, with slice(None) indicating where to stick the
|
34
|
-
reshaper: list[None | slice] = [None] *
|
35
|
-
axis =
|
43
|
+
# First, build up the transformation, with slice(None) indicating where to stick the test_elements
|
44
|
+
reshaper: list[None | slice] = [None] * element.ndim
|
45
|
+
axis = element.ndim if axis == -1 else axis
|
36
46
|
reshaper.insert(axis, slice(None))
|
37
47
|
|
38
48
|
# Note: reshaper needs to be a tuple for indexing purposes
|
39
|
-
return cast(Array, ak.any(
|
49
|
+
return cast(Array, ak.any(element == test_elements[tuple(reshaper)], axis=-1))
|
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: atlas-schema
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.4
|
4
4
|
Summary: Helper python package for ATLAS Common NTuple Analysis work.
|
5
5
|
Project-URL: Homepage, https://github.com/scipp-atlas/atlas-schema
|
6
6
|
Project-URL: Bug Tracker, https://github.com/scipp-atlas/atlas-schema/issues
|
7
7
|
Project-URL: Discussions, https://github.com/scipp-atlas/atlas-schema/discussions
|
8
|
-
Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.2.
|
8
|
+
Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.2.4/
|
9
9
|
Project-URL: Releases, https://github.com/scipp-atlas/atlas-schema/releases
|
10
10
|
Project-URL: Release Notes, https://atlas-schema.readthedocs.io/en/latest/history.html
|
11
11
|
Author-email: Giordon Stark <kratsg@gmail.com>
|
@@ -251,7 +251,7 @@ Requires-Dist: tbump>=6.7.0; extra == 'test'
|
|
251
251
|
Requires-Dist: twine; extra == 'test'
|
252
252
|
Description-Content-Type: text/markdown
|
253
253
|
|
254
|
-
# atlas-schema v0.2.
|
254
|
+
# atlas-schema v0.2.4
|
255
255
|
|
256
256
|
[![Actions Status][actions-badge]][actions-link]
|
257
257
|
[![Documentation Status][rtd-badge]][rtd-link]
|
@@ -279,6 +279,129 @@ Description-Content-Type: text/markdown
|
|
279
279
|
|
280
280
|
<!-- prettier-ignore-end -->
|
281
281
|
|
282
|
+
This is the python package containing schemas and helper functions enabling
|
283
|
+
analyzers to work with ATLAS datasets (Monte Carlo and Data), using
|
284
|
+
[coffea](https://coffea-hep.readthedocs.io/en/latest/).
|
285
|
+
|
286
|
+
## Hello World
|
287
|
+
|
288
|
+
The simplest example is to just get started processing the file as expected:
|
289
|
+
|
290
|
+
```python
|
291
|
+
from atlas_schema.schema import NtupleSchema
|
292
|
+
from coffea import dataset_tools
|
293
|
+
import awkward as ak
|
294
|
+
|
295
|
+
fileset = {"ttbar": {"files": {"path/to/ttbar.root": "tree_name"}}}
|
296
|
+
samples, report = dataset_tools.preprocess(fileset)
|
297
|
+
|
298
|
+
|
299
|
+
def noop(events):
|
300
|
+
return ak.fields(events)
|
301
|
+
|
302
|
+
|
303
|
+
fields = dataset_tools.apply_to_fileset(noop, samples, schemaclass=NtupleSchema)
|
304
|
+
print(fields)
|
305
|
+
```
|
306
|
+
|
307
|
+
which produces something similar to
|
308
|
+
|
309
|
+
```python
|
310
|
+
{
|
311
|
+
"ttbar": [
|
312
|
+
"dataTakingYear",
|
313
|
+
"mcChannelNumber",
|
314
|
+
"runNumber",
|
315
|
+
"eventNumber",
|
316
|
+
"lumiBlock",
|
317
|
+
"actualInteractionsPerCrossing",
|
318
|
+
"averageInteractionsPerCrossing",
|
319
|
+
"truthjet",
|
320
|
+
"PileupWeight",
|
321
|
+
"RandomRunNumber",
|
322
|
+
"met",
|
323
|
+
"recojet",
|
324
|
+
"truth",
|
325
|
+
"generatorWeight",
|
326
|
+
"beamSpotWeight",
|
327
|
+
"trigPassed",
|
328
|
+
"jvt",
|
329
|
+
]
|
330
|
+
}
|
331
|
+
```
|
332
|
+
|
333
|
+
However, a more involved example to apply a selection and fill a histogram looks
|
334
|
+
like below:
|
335
|
+
|
336
|
+
```python
|
337
|
+
import awkward as ak
|
338
|
+
import dask
|
339
|
+
import hist.dask as had
|
340
|
+
import matplotlib.pyplot as plt
|
341
|
+
from coffea import processor
|
342
|
+
from coffea.nanoevents import NanoEventsFactory
|
343
|
+
from distributed import Client
|
344
|
+
|
345
|
+
from atlas_schema.schema import NtupleSchema
|
346
|
+
|
347
|
+
|
348
|
+
class MyFirstProcessor(processor.ProcessorABC):
|
349
|
+
def __init__(self):
|
350
|
+
pass
|
351
|
+
|
352
|
+
def process(self, events):
|
353
|
+
dataset = events.metadata["dataset"]
|
354
|
+
h_ph_pt = (
|
355
|
+
had.Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
|
356
|
+
.Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
|
357
|
+
.Int64()
|
358
|
+
)
|
359
|
+
|
360
|
+
cut = ak.all(events.ph.isEM, axis=1)
|
361
|
+
h_ph_pt.fill(isEM="all", pt=ak.firsts(events.ph.pt / 1.0e3))
|
362
|
+
h_ph_pt.fill(isEM="pass", pt=ak.firsts(events[cut].ph.pt / 1.0e3))
|
363
|
+
h_ph_pt.fill(isEM="fail", pt=ak.firsts(events[~cut].ph.pt / 1.0e3))
|
364
|
+
|
365
|
+
return {
|
366
|
+
dataset: {
|
367
|
+
"entries": ak.num(events, axis=0),
|
368
|
+
"ph_pt": h_ph_pt,
|
369
|
+
}
|
370
|
+
}
|
371
|
+
|
372
|
+
def postprocess(self, accumulator):
|
373
|
+
pass
|
374
|
+
|
375
|
+
|
376
|
+
if __name__ == "__main__":
|
377
|
+
client = Client()
|
378
|
+
|
379
|
+
fname = "ntuple.root"
|
380
|
+
events = NanoEventsFactory.from_root(
|
381
|
+
{fname: "analysis"},
|
382
|
+
schemaclass=NtupleSchema,
|
383
|
+
metadata={"dataset": "700352.Zqqgamma.mc20d.v1"},
|
384
|
+
).events()
|
385
|
+
|
386
|
+
p = MyFirstProcessor()
|
387
|
+
out = p.process(events)
|
388
|
+
(computed,) = dask.compute(out)
|
389
|
+
print(computed)
|
390
|
+
|
391
|
+
fig, ax = plt.subplots()
|
392
|
+
computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
|
393
|
+
ax.set_xscale("log")
|
394
|
+
ax.legend(title="Photon pT for Zqqgamma")
|
395
|
+
|
396
|
+
fig.savefig("ph_pt.pdf")
|
397
|
+
```
|
398
|
+
|
399
|
+
which produces
|
400
|
+
|
401
|
+
<img src="https://raw.githubusercontent.com/scipp-atlas/atlas-schema/main/docs/_static/img/ph_pt.png" alt="three stacked histograms of photon pT, with each stack corresponding to: no selection, requiring the isEM flag, and inverting the isEM requirement" width="500" style="display: block; margin-left: auto; margin-right: auto;">
|
402
|
+
|
403
|
+
<!-- SPHINX-END -->
|
404
|
+
|
282
405
|
## Developer Notes
|
283
406
|
|
284
407
|
### Converting Enums from C++ to Python
|
@@ -0,0 +1,13 @@
|
|
1
|
+
atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
|
2
|
+
atlas_schema/_version.py,sha256=4gL0W4-u58XR5lRLpeoIPrGhcewTk0-527de6uTNmkg,411
|
3
|
+
atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
|
4
|
+
atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
|
5
|
+
atlas_schema/methods.py,sha256=hFdtKXnyCcx4M05WhAM24fKwzEhh_ubA7jNa6_xv67k,7238
|
6
|
+
atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
atlas_schema/schema.py,sha256=4OAvuPrOds-taVES32y4K8dvNDf8PKdu83DZqAlTdp8,20621
|
8
|
+
atlas_schema/typing_compat.py,sha256=3G8h4WfLoDmrtWZvtYKLCwEpCQ_O4Fwygb2WlDRSE4E,488
|
9
|
+
atlas_schema/utils.py,sha256=IqMbWqq0ib_kZdJCaM5ghURZatmb8pKidlewx3dpy0A,2164
|
10
|
+
atlas_schema-0.2.4.dist-info/METADATA,sha256=KZDH5fsZon5wFXuU-iSUeqgjoplOwAoqTM1I9LgaTiM,20107
|
11
|
+
atlas_schema-0.2.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
12
|
+
atlas_schema-0.2.4.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
|
13
|
+
atlas_schema-0.2.4.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
|
2
|
-
atlas_schema/_version.py,sha256=RrHB9KG1O3GPm--rbTedqmZbdDrbgeRLXBmT4OBUqqI,411
|
3
|
-
atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
|
4
|
-
atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
|
5
|
-
atlas_schema/methods.py,sha256=K7u6HGKXrtpMg7jjCjKPwIEnknOShUH4HQ1ibKBzkZ0,6832
|
6
|
-
atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
atlas_schema/schema.py,sha256=YRVaiDa5Evl2HZ9CzH23d0-TLkvxqyvFQhn0ixyWCcw,7668
|
8
|
-
atlas_schema/typing_compat.py,sha256=RwkxiiYbXO9yxkeaL8CdRaOHH7wq6vO_epg1YD7RbRs,439
|
9
|
-
atlas_schema/utils.py,sha256=Oe2G3pe009Uhawsdk9e0MuqOHbAa5vZ8F2F9pOmz_Ok,1442
|
10
|
-
atlas_schema-0.2.2.dist-info/METADATA,sha256=QeHezHbhZY-hA2xdVlrQNeZN2OSCA8hn24jzoMUZDX8,16823
|
11
|
-
atlas_schema-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
12
|
-
atlas_schema-0.2.2.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
|
13
|
-
atlas_schema-0.2.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|