atlas-schema 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
atlas_schema/_version.py CHANGED
@@ -1,8 +1,13 @@
1
- # file generated by setuptools_scm
1
+ # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
3
6
  TYPE_CHECKING = False
4
7
  if TYPE_CHECKING:
5
- from typing import Tuple, Union
8
+ from typing import Tuple
9
+ from typing import Union
10
+
6
11
  VERSION_TUPLE = Tuple[Union[int, str], ...]
7
12
  else:
8
13
  VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
12
17
  __version_tuple__: VERSION_TUPLE
13
18
  version_tuple: VERSION_TUPLE
14
19
 
15
- __version__ = version = '0.2.3'
16
- __version_tuple__ = version_tuple = (0, 2, 3)
20
+ __version__ = version = '0.3.0'
21
+ __version_tuple__ = version_tuple = (0, 3, 0)
atlas_schema/methods.py CHANGED
@@ -8,7 +8,6 @@ from operator import ior
8
8
  import awkward
9
9
  import particle
10
10
  from coffea.nanoevents.methods import base, candidate, vector
11
- from dask_awkward import dask_method
12
11
 
13
12
  from atlas_schema.enums import PhotonID
14
13
  from atlas_schema.typing_compat import Behavior
@@ -63,22 +62,9 @@ class Particle(vector.PtEtaPhiMLorentzVector):
63
62
  - '{obj}_select'
64
63
  """
65
64
 
66
- @property
67
- def mass(self):
68
- r"""Invariant mass (+, -, -, -)
69
-
70
- :math:`\sqrt{t^2-x^2-y^2-z^2}`
71
- """
72
- return self["mass"] / 1.0e3
73
-
74
- @dask_method
75
65
  def passes(self, name):
76
66
  return self[f"select_{name}"] == 1
77
67
 
78
- @passes.dask
79
- def passes(self, dask_array, name):
80
- return dask_array[f"select_{name}"] == 1
81
-
82
68
  # NB: fields with the name 'pt' take precedence over this
83
69
  # @dask_property
84
70
  # def pt(self):
@@ -166,8 +152,8 @@ behavior.update(awkward._util.copy_behaviors("Particle", "Electron", behavior))
166
152
  class Electron(Particle, base.NanoCollection, base.Systematic):
167
153
  @property
168
154
  def mass(self):
169
- """Electron mass in GeV"""
170
- return particle.literals.e_minus.mass / 1.0e3
155
+ """Electron mass in MeV"""
156
+ return awkward.ones_like(self.pt) * particle.literals.e_minus.mass
171
157
 
172
158
 
173
159
  _set_repr_name("Electron")
@@ -184,8 +170,8 @@ behavior.update(awkward._util.copy_behaviors("Particle", "Muon", behavior))
184
170
  class Muon(Particle, base.NanoCollection, base.Systematic):
185
171
  @property
186
172
  def mass(self):
187
- """Muon mass in GeV"""
188
- return particle.literals.mu_minus.mass / 1.0e3
173
+ """Muon mass in MeV"""
174
+ return awkward.ones_like(self.pt) * particle.literals.mu_minus.mass
189
175
 
190
176
 
191
177
  _set_repr_name("Muon")
@@ -202,8 +188,8 @@ behavior.update(awkward._util.copy_behaviors("Particle", "Tau", behavior))
202
188
  class Tau(Particle, base.NanoCollection, base.Systematic):
203
189
  @property
204
190
  def mass(self):
205
- """Tau mass in GeV"""
206
- return particle.literals.tau_minus.mass / 1.0e3
191
+ """Tau mass in MeV"""
192
+ return awkward.ones_like(self.pt) * particle.literals.tau_minus.mass
207
193
 
208
194
 
209
195
  _set_repr_name("Tau")
@@ -218,7 +204,14 @@ behavior.update(awkward._util.copy_behaviors("Particle", "Jet", behavior))
218
204
 
219
205
 
220
206
  @awkward.mixin_class(behavior)
221
- class Jet(Particle, base.NanoCollection, base.Systematic): ...
207
+ class Jet(Particle, base.NanoCollection, base.Systematic):
208
+ @property
209
+ def mass(self):
210
+ r"""Invariant mass (+, -, -, -)
211
+
212
+ :math:`\sqrt{t^2-x^2-y^2-z^2}`
213
+ """
214
+ return self["m"]
222
215
 
223
216
 
224
217
  _set_repr_name("Jet")
@@ -230,12 +223,24 @@ JetArray.MomentumClass = vector.LorentzVectorArray # noqa: F821
230
223
 
231
224
  __all__ = [
232
225
  "Electron",
226
+ "ElectronArray", # noqa: F822
227
+ "ElectronRecord", # noqa: F822
233
228
  "Jet",
229
+ "JetArray", # noqa: F822
230
+ "JetRecord", # noqa: F822
234
231
  "MissingET",
232
+ "MissingETArray", # noqa: F822
233
+ "MissingETRecord", # noqa: F822
235
234
  "Muon",
235
+ "MuonArray", # noqa: F822
236
+ "MuonRecord", # noqa: F822
236
237
  "NtupleEvents",
237
238
  "Particle",
239
+ "ParticleArray", # noqa: F822
240
+ "ParticleRecord", # noqa: F822
238
241
  "Pass",
239
242
  "Photon",
243
+ "PhotonArray", # noqa: F822
244
+ "PhotonRecord", # noqa: F822
240
245
  "Weight",
241
246
  ]
atlas_schema/schema.py CHANGED
@@ -1,42 +1,142 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import difflib
3
4
  import warnings
4
5
  from collections.abc import KeysView, ValuesView
5
6
  from typing import Any, ClassVar
6
7
 
7
8
  from coffea.nanoevents.schemas.base import BaseSchema, zip_forms
8
9
 
10
+ from atlas_schema.methods import behavior as roaster
9
11
  from atlas_schema.typing_compat import Behavior, Self
10
12
 
11
13
 
12
14
  class NtupleSchema(BaseSchema): # type: ignore[misc]
13
- """Ntuple schema builder
15
+ """The schema for building ATLAS ntuples following the typical centralized formats.
14
16
 
15
- The Ntuple schema is built from all branches found in the supplied file, based on
16
- the naming pattern of the branches. The following additional arrays are constructed:
17
+ This schema is built from all branches found in a tree in the supplied
18
+ file, based on the naming pattern of the branches. This naming pattern is
19
+ typically assumed to be
17
20
 
18
- - n/a
21
+ .. code-block:: bash
22
+
23
+ {collection:str}_{subcollection:str}_{systematic:str}
24
+
25
+ where:
26
+ * ``collection`` is assumed to be a prefix with typical characters, following the regex ``[a-zA-Z][a-zA-Z0-9]*``; that is starting with a case-insensitive letter, and proceeded by zero or more alphanumeric characters,
27
+ * ``subcollection`` is assumed to be anything with typical characters (allowing for underscores) following the regex ``[a-zA-Z_][a-zA-Z0-9_]*``; that is starting with a case-insensitive letter or underscore, and proceeded by zero or more alphanumeric characters including underscores, and
28
+ * ``systematic`` is assumed to be either ``NOSYS`` to indicate a branch with potential systematic variariations, or anything with typical characters (allowing for underscores) following the same regular expression as the ``subcollection``.
29
+
30
+ Here, a collection refers to the top-level entry to access an item - a collection called ``el`` will be accessible under the ``el`` attributes via ``events['el']`` or ``events.el``. A subcollection called ``pt`` will be accessible under that collection, such as ``events['el']['pt']`` or ``events.el.pt``. This is the power of the schema providing a more user-friendly (and programmatic) access to the underlying branches.
31
+
32
+ The above logic means that the following branches below will be categorized as follows:
33
+
34
+ +-------------------------------+-------------------+-----------------------+------------------+
35
+ | branch | collection | subcollection | systematic |
36
+ +===============================+===================+=======================+==================+
37
+ | ``'eventNumber'`` | ``'eventNumber'`` | ``None`` | ``None`` |
38
+ +-------------------------------+-------------------+-----------------------+------------------+
39
+ | ``'runNumber'`` | ``'runNumber'`` | ``None`` | ``None`` |
40
+ +-------------------------------+-------------------+-----------------------+------------------+
41
+ | ``'el_pt_NOSYS'`` | ``'el'`` | ``'pt'`` | ``'NOSYS'`` |
42
+ +-------------------------------+-------------------+-----------------------+------------------+
43
+ | ``'jet_cleanTightBad_NOSYS'`` | ``'jet'`` | ``'cleanTightBad'`` | ``'NOSYS'`` |
44
+ +-------------------------------+-------------------+-----------------------+------------------+
45
+ | ``'jet_select_btag_NOSYS'`` | ``'jet'`` | ``'select_btag'`` | ``'NOSYS'`` |
46
+ +-------------------------------+-------------------+-----------------------+------------------+
47
+ | ``'jet_e_NOSYS'`` | ``'jet'`` | ``'e'`` | ``'NOSYS'`` |
48
+ +-------------------------------+-------------------+-----------------------+------------------+
49
+ | ``'truthel_phi'`` | ``'truthel'`` | ``'phi'`` | ``None`` |
50
+ +-------------------------------+-------------------+-----------------------+------------------+
51
+ | ``'truthel_pt'`` | ``'truthel'`` | ``'pt'`` | ``None`` |
52
+ +-------------------------------+-------------------+-----------------------+------------------+
53
+ | ``'ph_eta'`` | ``'ph'`` | ``'eta'`` | ``None`` |
54
+ +-------------------------------+-------------------+-----------------------+------------------+
55
+ | ``'ph_phi_SCALE__1up'`` | ``'ph'`` | ``'phi'`` | ``'SCALE__1up'`` |
56
+ +-------------------------------+-------------------+-----------------------+------------------+
57
+ | ``'mu_TTVA_effSF_NOSYS'`` | ``'mu'`` | ``'TTVA_effSF'`` | ``'NOSYS'`` |
58
+ +-------------------------------+-------------------+-----------------------+------------------+
59
+ | ``'recojet_antikt4PFlow_pt'`` | ``'recojet'`` | ``'antikt4PFlow_pt'`` | ``'NOSYS'`` |
60
+ +-------------------------------+-------------------+-----------------------+------------------+
61
+ | ``'recojet_antikt10UFO_m'`` | ``'recojet'`` | ``'antikt10UFO_m'`` | ``None`` |
62
+ +-------------------------------+-------------------+-----------------------+------------------+
63
+
64
+ Sometimes this logic is not what you want, and there are ways to teach ``NtupleSchema`` how to group some of these better for atypical cases. We can address these case-by-case.
65
+
66
+ **Singletons**
67
+
68
+ Sometimes you have particular branches that you don't want to be treated as a collection (with subcollections). And sometimes you will see warnings about this (see :ref:`faq`). There are some pre-defined ``singletons`` stored under :attr:`event_ids`, and these will be lazily treated as a _singleton_. For other cases where you add your own branches, you can additionally extend this class to add your own :attr:`singletons`:
69
+
70
+ .. code-block:: python
71
+
72
+ from atlas_schema.schema import NtupleSchema
73
+
74
+
75
+ class MySchema(NtupleSchema):
76
+ singletons = {"RandomRunNumber"}
77
+
78
+ and use this schema in your analysis code. The rest of the logic will be handled for you, and you can access your singletons under ``events.RandomRunNumber`` as expected.
79
+
80
+ **Mixins (collections, subcollections)**
81
+
82
+ In more complicated scenarios, you might need to teach :class:`NtupleSchema` how to handle collections that end up having underscores in their name, or other characters that make the grouping non-trivial. In some other scenarios, you want to tell the schema to assign a certain set of behaviors to a collection - rather than the default :class:`atlas_schema.methods.Particle` behavior. This is where :attr:`mixins` comes in. Similar to how :attr:`singletons` are handled, you extend this schema to include your own ``mixins`` pointing them at one of the behaviors defined in :mod:`atlas_schema.methods`.
83
+
84
+ Let's demonstrate both cases. Imagine you want to have your ``truthel`` collections above treated as :class:`atlas_schema.methods.Electron`, then you would extend the existing :attr:`mixins`:
85
+
86
+ .. code-block:: python
87
+
88
+ from atlas_schema.schema import NtupleSchema
89
+
90
+
91
+ class MySchema(NtupleSchema):
92
+ mixins = {"truthel": "Electron", **NtupleSchema.mixins}
93
+
94
+ Now, ``events.truthel`` will give you arrays zipped up with :class:`atlas_schema.methods.Electron` behaviors.
95
+
96
+ If instead, you run into problems with mixing different branches in the same collection, because the default behavior of this schema described above is not smart enough to handle the atypical cases, you can explicitly fix this by defining your collections:
97
+
98
+ .. code-block:: python
99
+
100
+ from atlas_schema.schema import NtupleSchema
101
+
102
+
103
+ class MySchema(NtupleSchema):
104
+ mixins = {
105
+ "recojet_antikt4PFlow": "Jet",
106
+ "recojet_antikt10UFO": "Jet",
107
+ **NtupleSchema.mixins,
108
+ }
109
+
110
+ Now, ``events.recojet_antikt4PFlow`` and ``events.recojet_antikt10UFO`` will be separate collections, instead of a single ``events.recojet`` that incorrectly merged branches from each of these collections.
19
111
  """
20
112
 
21
- __dask_capable__ = True
113
+ __dask_capable__: ClassVar[bool] = True
114
+
115
+ warn_missing_crossrefs: ClassVar[bool] = True
22
116
 
23
- warn_missing_crossrefs = True
24
- error_missing_event_ids = False
117
+ #: Treat missing event-level branches as error instead of warning (default is ``False``)
118
+ error_missing_event_ids: ClassVar[bool] = False
119
+ #: Determine closest behavior for a given branch or treat branch as :attr:`default_behavior` (default is ``True``)
120
+ identify_closest_behavior: ClassVar[bool] = True
25
121
 
122
+ #: event IDs to expect in data datasets
26
123
  event_ids_data: ClassVar[set[str]] = {
27
124
  "lumiBlock",
28
125
  "averageInteractionsPerCrossing",
29
126
  "actualInteractionsPerCrossing",
30
127
  "dataTakingYear",
31
128
  }
129
+ #: event IDs to expect in MC datasets
32
130
  event_ids_mc: ClassVar[set[str]] = {
33
131
  "mcChannelNumber",
34
132
  "runNumber",
35
133
  "eventNumber",
36
134
  "mcEventWeights",
37
135
  }
136
+ #: all event IDs to expect in the dataset
38
137
  event_ids: ClassVar[set[str]] = {*event_ids_data, *event_ids_mc}
39
138
 
139
+ #: mixins defining the mapping from collection name to behavior to use for that collection
40
140
  mixins: ClassVar[dict[str, str]] = {
41
141
  "el": "Electron",
42
142
  "jet": "Jet",
@@ -48,9 +148,10 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
48
148
  "weight": "Weight",
49
149
  }
50
150
 
51
- # These are stored as length-1 vectors unnecessarily
151
+ #: additional branches to pass-through with no zipping or additional interpretation (such as those stored as length-1 vectors)
52
152
  singletons: ClassVar[set[str]] = set()
53
153
 
154
+ #: docstrings to assign for specific subcollections across the various collections identified by this schema
54
155
  docstrings: ClassVar[dict[str, str]] = {
55
156
  "charge": "charge",
56
157
  "eta": "pseudorapidity",
@@ -60,6 +161,9 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
60
161
  "phi": "azimuthal angle",
61
162
  }
62
163
 
164
+ #: default behavior to use for any collection (default ``"NanoCollection"``, from :class:`coffea.nanoevents.methods.base.NanoCollection`)
165
+ default_behavior: ClassVar[str] = "NanoCollection"
166
+
63
167
  def __init__(self, base_form: dict[str, Any], version: str = "latest"):
64
168
  super().__init__(base_form)
65
169
  self._version = version
@@ -87,10 +191,37 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
87
191
  branch_forms = dict(zip(field_names, input_contents))
88
192
 
89
193
  # parse into high-level records (collections, list collections, and singletons)
90
- collections = {k.split("_")[0] for k in branch_forms}
194
+ collections = {
195
+ k.split("_")[0] for k in branch_forms if k not in self.singletons
196
+ }
91
197
  collections -= self.event_ids
92
198
  collections -= set(self.singletons)
93
199
 
200
+ # now handle any collections that we identified that are substrings of the items in the mixins
201
+ # convert all valid branch_forms into strings to make the lookups a bit faster
202
+ bf_str = ",".join(branch_forms.keys())
203
+ for mixin in self.mixins:
204
+ if mixin in collections:
205
+ continue
206
+ if f",{mixin}_" not in bf_str and not bf_str.startswith(f"{mixin}_"):
207
+ continue
208
+ if "_" in mixin:
209
+ warnings.warn(
210
+ f"I identified a mixin that I did not automatically identify as a collection because it contained an underscore: '{mixin}'. I will add this to the known collections. To suppress this warning next time, please create your ntuples with collections without underscores. [mixin-underscore]",
211
+ RuntimeWarning,
212
+ stacklevel=2,
213
+ )
214
+ collections.add(mixin)
215
+ for collection in list(collections):
216
+ if mixin.startswith(f"{collection}_"):
217
+ warnings.warn(
218
+ f"I found a misidentified collection: '{collection}'. I will remove this from the known collections. To suppress this warning next time, please create your ntuples with collections that are not similarly named with underscores. [collection-subset]",
219
+ RuntimeWarning,
220
+ stacklevel=2,
221
+ )
222
+ collections.remove(collection)
223
+ break
224
+
94
225
  # rename needed because easyjet breaks the AMG assumptions
95
226
  # https://gitlab.cern.ch/easyjet/easyjet/-/issues/246
96
227
  for k in list(branch_forms):
@@ -99,17 +230,25 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
99
230
  branch_forms[k.replace("_NOSYS", "") + "_NOSYS"] = branch_forms.pop(k)
100
231
 
101
232
  # these are collections with systematic variations
102
- subcollections = {
103
- k.split("__")[0].split("_", 1)[1].replace("_NOSYS", "")
104
- for k in branch_forms
105
- if "NOSYS" in k
106
- }
233
+ try:
234
+ subcollections = {
235
+ k.split("__")[0].split("_", 1)[1].replace("_NOSYS", "")
236
+ for k in branch_forms
237
+ if "NOSYS" in k and k not in self.singletons
238
+ }
239
+ except IndexError as exc:
240
+ msg = "One of the branches does not follow the assumed pattern for this schema. [invalid-branch-name]"
241
+ raise RuntimeError(msg) from exc
107
242
 
108
243
  # Check the presence of the event_ids
109
244
  missing_event_ids = [
110
245
  event_id for event_id in self.event_ids if event_id not in branch_forms
111
246
  ]
112
247
 
248
+ missing_singletons = [
249
+ singleton for singleton in self.singletons if singleton not in branch_forms
250
+ ]
251
+
113
252
  if len(missing_event_ids) > 0:
114
253
  if self.error_missing_event_ids:
115
254
  msg = f"There are missing event ID fields: {missing_event_ids} \n\n\
@@ -125,17 +264,29 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
125
264
  stacklevel=2,
126
265
  )
127
266
 
267
+ if len(missing_singletons) > 0:
268
+ # These singletons are simply branches we do not parse or handle
269
+ # explicitly in atlas-schema (e.g. they are copied directly to the
270
+ # output structure we provide you), however there can be false
271
+ # positives when you submit multiple files with different branch
272
+ # structures and this warning could be safely ignored.
273
+ warnings.warn(
274
+ f"Missing singletons : {missing_singletons}. [singleton-missing]",
275
+ RuntimeWarning,
276
+ stacklevel=2,
277
+ )
278
+
128
279
  output = {}
129
280
 
130
281
  # first, register singletons (event-level, others)
131
282
  for name in {*self.event_ids, *self.singletons}:
132
- if name in missing_event_ids:
283
+ if name in [*missing_event_ids, *missing_singletons]:
133
284
  continue
285
+
134
286
  output[name] = branch_forms[name]
135
287
 
136
288
  # next, go through and start grouping up collections
137
289
  for name in collections:
138
- mixin = self.mixins.get(name, "NanoCollection")
139
290
  content = {}
140
291
  used = set()
141
292
 
@@ -165,7 +316,7 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
165
316
 
166
317
  if not used and not content:
167
318
  warnings.warn(
168
- f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly.",
319
+ f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly. [singleton-undefined]",
169
320
  RuntimeWarning,
170
321
  stacklevel=2,
171
322
  )
@@ -173,14 +324,27 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
173
324
  output[name] = branch_forms[name]
174
325
 
175
326
  else:
176
- output[name] = zip_forms(content, name, record_name=mixin)
327
+ behavior = self.mixins.get(name, "")
328
+ if not behavior:
329
+ behavior = self.suggested_behavior(name)
330
+ warnings.warn(
331
+ f"I found a collection with no defined mixin: '{name}'. I will assume behavior: '{behavior}'. To suppress this warning next time, please define mixins for your custom collections. [mixin-undefined]",
332
+ RuntimeWarning,
333
+ stacklevel=2,
334
+ )
335
+
336
+ output[name] = zip_forms(content, name, record_name=behavior)
177
337
 
178
338
  output[name].setdefault("parameters", {})
179
339
  output[name]["parameters"].update({"collection_name": name})
180
340
 
181
341
  if output[name]["class"] == "ListOffsetArray":
182
- parameters = output[name]["content"]["fields"]
183
- contents = output[name]["content"]["contents"]
342
+ if output[name]["class"] == "RecordArray":
343
+ parameters = output[name]["content"]["fields"]
344
+ contents = output[name]["content"]["contents"]
345
+ else:
346
+ # these are also singletons of another kind that we just pass through
347
+ continue
184
348
  elif output[name]["class"] == "RecordArray":
185
349
  parameters = output[name]["fields"]
186
350
  contents = output[name]["contents"]
@@ -190,6 +354,7 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
190
354
  else:
191
355
  msg = f"Unhandled class {output[name]['class']}"
192
356
  raise RuntimeError(msg)
357
+
193
358
  # update docstrings as needed
194
359
  # NB: must be before flattening for easier logic
195
360
  for index, parameter in enumerate(parameters):
@@ -208,7 +373,55 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
208
373
 
209
374
  @classmethod
210
375
  def behavior(cls) -> Behavior:
211
- """Behaviors necessary to implement this schema"""
212
- from atlas_schema.methods import behavior as roaster
376
+ """Behaviors necessary to implement this schema
213
377
 
378
+ Returns:
379
+ dict[str | tuple['*', str], type[awkward.Record]]: an :data:`awkward.behavior` dictionary
380
+ """
214
381
  return roaster
382
+
383
+ @classmethod
384
+ def suggested_behavior(cls, key: str, cutoff: float = 0.4) -> str:
385
+ """
386
+ Suggest e behavior to use for a provided collection or branch name.
387
+
388
+ Default behavior: :class:`~coffea.nanoevents.methods.base.NanoCollection`.
389
+
390
+ Note:
391
+ If :attr:`identify_closest_behavior` is ``False``, then this function will return the default behavior ``NanoCollection``.
392
+
393
+ Warning:
394
+ If no behavior is found above the *cutoff* score, then this function will return the default behavior.
395
+
396
+ Args:
397
+ key (str): collection name to suggest a matching behavior for
398
+ cutoff (float): o ptional argument cutoff (default ``0.4``) is a float in the range ``[0, 1]``. Possibilities that don't score at least that similar to *key* are ignored.
399
+
400
+ Returns:
401
+ str: suggested behavior to use by string
402
+
403
+ Example:
404
+ >>> from atlas_schema.schema import NtupleSchema
405
+ >>> NtupleSchema.suggested_behavior("truthjet")
406
+ 'Jet'
407
+ >>> NtupleSchema.suggested_behavior("SignalElectron")
408
+ 'Electron'
409
+ >>> NtupleSchema.suggested_behavior("generatorWeight")
410
+ 'Weight'
411
+ >>> NtupleSchema.suggested_behavior("aVeryStrangelyNamedBranchWithNoMatch")
412
+ 'NanoCollection'
413
+ """
414
+ if cls.identify_closest_behavior:
415
+ # lowercase everything to do case-insensitive matching
416
+ behaviors = [b for b in cls.behavior() if isinstance(b, str)]
417
+ behaviors_l = [b.lower() for b in behaviors]
418
+ results = difflib.get_close_matches(
419
+ key.lower(), behaviors_l, n=1, cutoff=cutoff
420
+ )
421
+ if not results:
422
+ return cls.default_behavior
423
+
424
+ behavior = results[0]
425
+ # need to identify the index and return the unlowered version
426
+ return behaviors[behaviors_l.index(behavior)]
427
+ return cls.default_behavior
@@ -5,7 +5,7 @@ Typing helpers.
5
5
  from __future__ import annotations
6
6
 
7
7
  import sys
8
- from typing import Annotated
8
+ from typing import Annotated, Literal, Union
9
9
 
10
10
  import awkward
11
11
 
@@ -19,6 +19,6 @@ if sys.version_info >= (3, 11):
19
19
  else:
20
20
  from typing_extensions import Self
21
21
 
22
- Behavior: TypeAlias = dict[str, type[awkward.Record]]
22
+ Behavior: TypeAlias = dict[Union[str, tuple[Literal["*"]], str], type[awkward.Record]]
23
23
 
24
24
  __all__ = ("Annotated", "Behavior", "Self")
atlas_schema/utils.py CHANGED
@@ -1,16 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from enum import Enum
4
- from typing import TypeVar, Union, cast
4
+ from typing import TypeVar, cast
5
5
 
6
6
  import awkward as ak
7
- import dask_awkward as dak
8
7
 
9
- Array = TypeVar("Array", bound=Union[dak.Array, ak.Array])
8
+ Array = TypeVar("Array", bound=ak.Array)
10
9
  _E = TypeVar("_E", bound=Enum)
11
10
 
12
11
 
13
- def isin(element: Array, test_elements: dak.Array | ak.Array, axis: int = -1) -> Array:
12
+ def isin(element: Array, test_elements: ak.Array, axis: int = -1) -> Array:
14
13
  """
15
14
  Find test_elements in element. Similar in API as :func:`numpy.isin`.
16
15
 
@@ -21,12 +20,12 @@ def isin(element: Array, test_elements: dak.Array | ak.Array, axis: int = -1) ->
21
20
  comparison.
22
21
 
23
22
  Args:
24
- element (dak.Array or ak.Array): input array of values.
25
- test_elements (dak.Array or ak.Array): one-dimensional set of values against which to test each value of *element*.
23
+ element (ak.Array): input array of values.
24
+ test_elements (ak.Array): one-dimensional set of values against which to test each value of *element*.
26
25
  axis (int): the axis along which the comparison is performed
27
26
 
28
27
  Returns:
29
- dak.Array or ak.Array: result of comparison for test_elements in *element*
28
+ ak.Array: result of comparison for test_elements in *element*
30
29
 
31
30
  Example:
32
31
  >>> import awkward as ak
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlas-schema
3
- Version: 0.2.3
3
+ Version: 0.3.0
4
4
  Summary: Helper python package for ATLAS Common NTuple Analysis work.
5
5
  Project-URL: Homepage, https://github.com/scipp-atlas/atlas-schema
6
6
  Project-URL: Bug Tracker, https://github.com/scipp-atlas/atlas-schema/issues
7
7
  Project-URL: Discussions, https://github.com/scipp-atlas/atlas-schema/discussions
8
- Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.2.3/
8
+ Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.3.0/
9
9
  Project-URL: Releases, https://github.com/scipp-atlas/atlas-schema/releases
10
10
  Project-URL: Release Notes, https://atlas-schema.readthedocs.io/en/latest/history.html
11
11
  Author-email: Giordon Stark <kratsg@gmail.com>
@@ -227,7 +227,7 @@ Classifier: Programming Language :: Python :: 3.12
227
227
  Classifier: Topic :: Scientific/Engineering
228
228
  Classifier: Typing :: Typed
229
229
  Requires-Python: >=3.9
230
- Requires-Dist: coffea[dask]>=2024.4.1
230
+ Requires-Dist: coffea[dask]>=2025.7.0
231
231
  Requires-Dist: particle>=0.25.0
232
232
  Provides-Extra: dev
233
233
  Requires-Dist: pytest-cov>=3; extra == 'dev'
@@ -251,7 +251,7 @@ Requires-Dist: tbump>=6.7.0; extra == 'test'
251
251
  Requires-Dist: twine; extra == 'test'
252
252
  Description-Content-Type: text/markdown
253
253
 
254
- # atlas-schema v0.2.3
254
+ # atlas-schema v0.3.0
255
255
 
256
256
  [![Actions Status][actions-badge]][actions-link]
257
257
  [![Documentation Status][rtd-badge]][rtd-link]
@@ -335,11 +335,9 @@ like below:
335
335
 
336
336
  ```python
337
337
  import awkward as ak
338
- import dask
339
- import hist.dask as had
338
+ from hist import Hist
340
339
  import matplotlib.pyplot as plt
341
340
  from coffea import processor
342
- from coffea.nanoevents import NanoEventsFactory
343
341
  from distributed import Client
344
342
 
345
343
  from atlas_schema.schema import NtupleSchema
@@ -352,7 +350,7 @@ class MyFirstProcessor(processor.ProcessorABC):
352
350
  def process(self, events):
353
351
  dataset = events.metadata["dataset"]
354
352
  h_ph_pt = (
355
- had.Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
353
+ Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
356
354
  .Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
357
355
  .Int64()
358
356
  )
@@ -376,17 +374,18 @@ class MyFirstProcessor(processor.ProcessorABC):
376
374
  if __name__ == "__main__":
377
375
  client = Client()
378
376
 
379
- fname = "ntuple.root"
380
- events = NanoEventsFactory.from_root(
381
- {fname: "analysis"},
382
- schemaclass=NtupleSchema,
383
- metadata={"dataset": "700352.Zqqgamma.mc20d.v1"},
384
- ).events()
385
-
386
- p = MyFirstProcessor()
387
- out = p.process(events)
388
- (computed,) = dask.compute(out)
389
- print(computed)
377
+ fileset = {"700352.Zqqgamma.mc20d.v1": {"files": {"ntuple.root": "analysis"}}}
378
+
379
+ run = processor.Runner(
380
+ executor=processor.IterativeExecutor(compression=None),
381
+ schema=NtupleSchema,
382
+ savemetrics=True,
383
+ )
384
+
385
+ out, metrics = run(fileset, processor_instance=MyFirstProcessor())
386
+
387
+ print(out)
388
+ print(metrics)
390
389
 
391
390
  fig, ax = plt.subplots()
392
391
  computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
@@ -0,0 +1,13 @@
1
+ atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
2
+ atlas_schema/_version.py,sha256=AGmG_Lx0-9ztFw_7d9mYbaYuC-2abxE1oXOUNAY29YY,511
3
+ atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
4
+ atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
5
+ atlas_schema/methods.py,sha256=DPeEFofeD5_bCk7V3KudJaE_sAUMpBIh-gPnM4kWDe8,7124
6
+ atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ atlas_schema/schema.py,sha256=44i7ri-7OP4SK6_D_3JAGvoiIY-IcPBp1p6MlKfWK5I,21675
8
+ atlas_schema/typing_compat.py,sha256=3G8h4WfLoDmrtWZvtYKLCwEpCQ_O4Fwygb2WlDRSE4E,488
9
+ atlas_schema/utils.py,sha256=E3jCka-pf_0h_r3OO0hMLlbF6dQKoxr2T1Gd18-aJ4U,2034
10
+ atlas_schema-0.3.0.dist-info/METADATA,sha256=NCkA4ydLhlTHYJOTipTgZlR7_yhDsDe7zpa0jbnqw00,20069
11
+ atlas_schema-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ atlas_schema-0.3.0.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
13
+ atlas_schema-0.3.0.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
2
- atlas_schema/_version.py,sha256=AaQEeqeDwmZAHoPuwg2C0ulADePbIYLSFanZzt0cytQ,411
3
- atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
4
- atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
5
- atlas_schema/methods.py,sha256=K7u6HGKXrtpMg7jjCjKPwIEnknOShUH4HQ1ibKBzkZ0,6832
6
- atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- atlas_schema/schema.py,sha256=s3bcSa5DH5iILPD_2BD3co8MSTsXNs1rYBmn44388Kc,8082
8
- atlas_schema/typing_compat.py,sha256=RwkxiiYbXO9yxkeaL8CdRaOHH7wq6vO_epg1YD7RbRs,439
9
- atlas_schema/utils.py,sha256=spk7KIMBbXSPpZBTltyxaHWvyitkEGSVldfuKFoyavk,2137
10
- atlas_schema-0.2.3.dist-info/METADATA,sha256=xtXdXa-9ra8TTtZSXLycUKPvBLPzXCIPa5cueuv0w90,20107
11
- atlas_schema-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- atlas_schema-0.2.3.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
13
- atlas_schema-0.2.3.dist-info/RECORD,,