atlas-schema 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
atlas_schema/__init__.py CHANGED
@@ -10,7 +10,8 @@ import warnings
10
10
 
11
11
  from atlas_schema._version import version as __version__
12
12
  from atlas_schema.enums import ParticleOrigin, PhotonID
13
+ from atlas_schema.utils import isin
13
14
 
14
15
  warnings.filterwarnings("ignore", module="coffea.*")
15
16
 
16
- __all__ = ["ParticleOrigin", "PhotonID", "__version__"]
17
+ __all__ = ["ParticleOrigin", "PhotonID", "__version__", "isin"]
atlas_schema/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.2.1'
16
- __version_tuple__ = version_tuple = (0, 2, 1)
15
+ __version__ = version = '0.2.3'
16
+ __version_tuple__ = version_tuple = (0, 2, 3)
atlas_schema/enums.py CHANGED
@@ -1,9 +1,31 @@
1
1
  from __future__ import annotations
2
2
 
3
- from enum import IntEnum
3
+ import sys
4
+ from enum import Enum, IntEnum
4
5
 
6
+ if sys.version_info >= (3, 11):
7
+ from enum import EnumType
8
+ else:
9
+ from enum import EnumMeta as EnumType
5
10
 
6
- class ParticleType(IntEnum):
11
+ from typing import Callable, TypeVar, cast
12
+
13
+ _E = TypeVar("_E", bound=Enum)
14
+
15
+
16
+ class MultipleEnumAccessMeta(EnumType):
17
+ """
18
+ Enum Metaclass to provide a way to access multiple values all at once.
19
+ """
20
+
21
+ def __getitem__(self: type[_E], key: str | tuple[str]) -> _E | list[_E]: # type:ignore[misc,override]
22
+ getitem = cast(Callable[[str], _E], super().__getitem__) # type:ignore[misc]
23
+ if isinstance(key, tuple):
24
+ return [getitem(name) for name in key]
25
+ return getitem(key)
26
+
27
+
28
+ class ParticleType(IntEnum, metaclass=MultipleEnumAccessMeta):
7
29
  """
8
30
  Taken from `ATLAS Truth Utilities for ParticleType <https://gitlab.cern.ch/atlas/athena/-/blob/74f43ff0910edb2a2bd3778880ccbdad648dc037/Generators/TruthUtils/TruthUtils/TruthClasses.h#L8-49>`_.
9
31
  """
@@ -50,7 +72,7 @@ class ParticleType(IntEnum):
50
72
  UnknownJet = 38
51
73
 
52
74
 
53
- class ParticleOrigin(IntEnum):
75
+ class ParticleOrigin(IntEnum, metaclass=MultipleEnumAccessMeta):
54
76
  """
55
77
  Taken from `ATLAS Truth Utilities for ParticleOrigin <https://gitlab.cern.ch/atlas/athena/-/blob/74f43ff0910edb2a2bd3778880ccbdad648dc037/Generators/TruthUtils/TruthUtils/TruthClasses.h#L51-103>`_.
56
78
  """
@@ -105,7 +127,7 @@ class ParticleOrigin(IntEnum):
105
127
  QCD = 45
106
128
 
107
129
 
108
- class PhotonID(IntEnum):
130
+ class PhotonID(IntEnum, metaclass=MultipleEnumAccessMeta):
109
131
  """
110
132
  Taken from the `EGamma Identification CP group's twiki <https://twiki.cern.ch/twiki/bin/viewauth/AtlasProtected/EGammaIdentificationRun2#Photon_isEM_word>`_.
111
133
  """
atlas_schema/schema.py CHANGED
@@ -49,7 +49,7 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
49
49
  }
50
50
 
51
51
  # These are stored as length-1 vectors unnecessarily
52
- singletons: ClassVar[list[str]] = []
52
+ singletons: ClassVar[set[str]] = set()
53
53
 
54
54
  docstrings: ClassVar[dict[str, str]] = {
55
55
  "charge": "charge",
@@ -127,8 +127,8 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
127
127
 
128
128
  output = {}
129
129
 
130
- # first, register the event-level stuff directly
131
- for name in self.event_ids:
130
+ # first, register singletons (event-level, others)
131
+ for name in {*self.event_ids, *self.singletons}:
132
132
  if name in missing_event_ids:
133
133
  continue
134
134
  output[name] = branch_forms[name]
@@ -163,7 +163,17 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
163
163
  }
164
164
  )
165
165
 
166
- output[name] = zip_forms(content, name, record_name=mixin)
166
+ if not used and not content:
167
+ warnings.warn(
168
+ f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly.",
169
+ RuntimeWarning,
170
+ stacklevel=2,
171
+ )
172
+ self.singletons.add(name)
173
+ output[name] = branch_forms[name]
174
+
175
+ else:
176
+ output[name] = zip_forms(content, name, record_name=mixin)
167
177
 
168
178
  output[name].setdefault("parameters", {})
169
179
  output[name]["parameters"].update({"collection_name": name})
@@ -174,6 +184,9 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
174
184
  elif output[name]["class"] == "RecordArray":
175
185
  parameters = output[name]["fields"]
176
186
  contents = output[name]["contents"]
187
+ elif output[name]["class"] == "NumpyArray":
188
+ # these are singletons that we just pass through
189
+ continue
177
190
  else:
178
191
  msg = f"Unhandled class {output[name]['class']}"
179
192
  raise RuntimeError(msg)
@@ -191,11 +204,6 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
191
204
  ),
192
205
  )
193
206
 
194
- if name in self.singletons:
195
- # flatten! this 'promotes' the content of an inner dimension
196
- # upwards, effectively hiding one nested dimension
197
- output[name] = output[name]["content"]
198
-
199
207
  return output.keys(), output.values()
200
208
 
201
209
  @classmethod
atlas_schema/utils.py ADDED
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+ from typing import TypeVar, Union, cast
5
+
6
+ import awkward as ak
7
+ import dask_awkward as dak
8
+
9
+ Array = TypeVar("Array", bound=Union[dak.Array, ak.Array])
10
+ _E = TypeVar("_E", bound=Enum)
11
+
12
+
13
+ def isin(element: Array, test_elements: dak.Array | ak.Array, axis: int = -1) -> Array:
14
+ """
15
+ Find test_elements in element. Similar in API as :func:`numpy.isin`.
16
+
17
+ Calculates `element in test_elements`, broadcasting over *element elements only*. Returns a boolean array of the same shape as *element* that is `True` where an element of *element* is in *test_elements* and `False` otherwise.
18
+
19
+ This works by first transforming *test_elements* to an array with one more
20
+ dimension than the *element*, placing the *test_elements* at *axis*, and then doing a
21
+ comparison.
22
+
23
+ Args:
24
+ element (dak.Array or ak.Array): input array of values.
25
+ test_elements (dak.Array or ak.Array): one-dimensional set of values against which to test each value of *element*.
26
+ axis (int): the axis along which the comparison is performed
27
+
28
+ Returns:
29
+ dak.Array or ak.Array: result of comparison for test_elements in *element*
30
+
31
+ Example:
32
+ >>> import awkward as ak
33
+ >>> import atlas_schema as ats
34
+ >>> truth_origins = ak.Array([[1, 2, 3], [4], [5, 6, 7], [1]])
35
+ >>> prompt_origins = ak.Array([1, 2, 7])
36
+ >>> ats.isin(truth_origins, prompt_origins).to_list()
37
+ [[True, True, False], [False], [False, False, True], [True]]
38
+ """
39
+ assert test_elements.ndim == 1, "test_elements must be one-dimensional"
40
+ assert axis >= -1, "axis must be -1 or positive-valued"
41
+ assert axis < element.ndim + 1, "axis too large for the element"
42
+
43
+ # First, build up the transformation, with slice(None) indicating where to stick the test_elements
44
+ reshaper: list[None | slice] = [None] * element.ndim
45
+ axis = element.ndim if axis == -1 else axis
46
+ reshaper.insert(axis, slice(None))
47
+
48
+ # Note: reshaper needs to be a tuple for indexing purposes
49
+ return cast(Array, ak.any(element == test_elements[tuple(reshaper)], axis=-1))
@@ -1,11 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlas-schema
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Helper python package for ATLAS Common NTuple Analysis work.
5
5
  Project-URL: Homepage, https://github.com/scipp-atlas/atlas-schema
6
6
  Project-URL: Bug Tracker, https://github.com/scipp-atlas/atlas-schema/issues
7
7
  Project-URL: Discussions, https://github.com/scipp-atlas/atlas-schema/discussions
8
- Project-URL: Changelog, https://github.com/scipp-atlas/atlas-schema/releases
8
+ Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.2.3/
9
+ Project-URL: Releases, https://github.com/scipp-atlas/atlas-schema/releases
10
+ Project-URL: Release Notes, https://atlas-schema.readthedocs.io/en/latest/history.html
9
11
  Author-email: Giordon Stark <kratsg@gmail.com>
10
12
  License:
11
13
  Apache License
@@ -249,7 +251,7 @@ Requires-Dist: tbump>=6.7.0; extra == 'test'
249
251
  Requires-Dist: twine; extra == 'test'
250
252
  Description-Content-Type: text/markdown
251
253
 
252
- # atlas-schema v0.2.1
254
+ # atlas-schema v0.2.3
253
255
 
254
256
  [![Actions Status][actions-badge]][actions-link]
255
257
  [![Documentation Status][rtd-badge]][rtd-link]
@@ -277,6 +279,129 @@ Description-Content-Type: text/markdown
277
279
 
278
280
  <!-- prettier-ignore-end -->
279
281
 
282
+ This is the python package containing schemas and helper functions enabling
283
+ analyzers to work with ATLAS datasets (Monte Carlo and Data), using
284
+ [coffea](https://coffea-hep.readthedocs.io/en/latest/).
285
+
286
+ ## Hello World
287
+
288
+ The simplest example is to just get started processing the file as expected:
289
+
290
+ ```python
291
+ from atlas_schema.schema import NtupleSchema
292
+ from coffea import dataset_tools
293
+ import awkward as ak
294
+
295
+ fileset = {"ttbar": {"files": {"path/to/ttbar.root": "tree_name"}}}
296
+ samples, report = dataset_tools.preprocess(fileset)
297
+
298
+
299
+ def noop(events):
300
+ return ak.fields(events)
301
+
302
+
303
+ fields = dataset_tools.apply_to_fileset(noop, samples, schemaclass=NtupleSchema)
304
+ print(fields)
305
+ ```
306
+
307
+ which produces something similar to
308
+
309
+ ```python
310
+ {
311
+ "ttbar": [
312
+ "dataTakingYear",
313
+ "mcChannelNumber",
314
+ "runNumber",
315
+ "eventNumber",
316
+ "lumiBlock",
317
+ "actualInteractionsPerCrossing",
318
+ "averageInteractionsPerCrossing",
319
+ "truthjet",
320
+ "PileupWeight",
321
+ "RandomRunNumber",
322
+ "met",
323
+ "recojet",
324
+ "truth",
325
+ "generatorWeight",
326
+ "beamSpotWeight",
327
+ "trigPassed",
328
+ "jvt",
329
+ ]
330
+ }
331
+ ```
332
+
333
+ However, a more involved example to apply a selection and fill a histogram looks
334
+ like below:
335
+
336
+ ```python
337
+ import awkward as ak
338
+ import dask
339
+ import hist.dask as had
340
+ import matplotlib.pyplot as plt
341
+ from coffea import processor
342
+ from coffea.nanoevents import NanoEventsFactory
343
+ from distributed import Client
344
+
345
+ from atlas_schema.schema import NtupleSchema
346
+
347
+
348
+ class MyFirstProcessor(processor.ProcessorABC):
349
+ def __init__(self):
350
+ pass
351
+
352
+ def process(self, events):
353
+ dataset = events.metadata["dataset"]
354
+ h_ph_pt = (
355
+ had.Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
356
+ .Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
357
+ .Int64()
358
+ )
359
+
360
+ cut = ak.all(events.ph.isEM, axis=1)
361
+ h_ph_pt.fill(isEM="all", pt=ak.firsts(events.ph.pt / 1.0e3))
362
+ h_ph_pt.fill(isEM="pass", pt=ak.firsts(events[cut].ph.pt / 1.0e3))
363
+ h_ph_pt.fill(isEM="fail", pt=ak.firsts(events[~cut].ph.pt / 1.0e3))
364
+
365
+ return {
366
+ dataset: {
367
+ "entries": ak.num(events, axis=0),
368
+ "ph_pt": h_ph_pt,
369
+ }
370
+ }
371
+
372
+ def postprocess(self, accumulator):
373
+ pass
374
+
375
+
376
+ if __name__ == "__main__":
377
+ client = Client()
378
+
379
+ fname = "ntuple.root"
380
+ events = NanoEventsFactory.from_root(
381
+ {fname: "analysis"},
382
+ schemaclass=NtupleSchema,
383
+ metadata={"dataset": "700352.Zqqgamma.mc20d.v1"},
384
+ ).events()
385
+
386
+ p = MyFirstProcessor()
387
+ out = p.process(events)
388
+ (computed,) = dask.compute(out)
389
+ print(computed)
390
+
391
+ fig, ax = plt.subplots()
392
+ computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
393
+ ax.set_xscale("log")
394
+ ax.legend(title="Photon pT for Zqqgamma")
395
+
396
+ fig.savefig("ph_pt.pdf")
397
+ ```
398
+
399
+ which produces
400
+
401
+ <img src="https://raw.githubusercontent.com/scipp-atlas/atlas-schema/main/docs/_static/img/ph_pt.png" alt="three stacked histograms of photon pT, with each stack corresponding to: no selection, requiring the isEM flag, and inverting the isEM requirement" width="500" style="display: block; margin-left: auto; margin-right: auto;">
402
+
403
+ <!-- SPHINX-END -->
404
+
280
405
  ## Developer Notes
281
406
 
282
407
  ### Converting Enums from C++ to Python
@@ -0,0 +1,13 @@
1
+ atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
2
+ atlas_schema/_version.py,sha256=AaQEeqeDwmZAHoPuwg2C0ulADePbIYLSFanZzt0cytQ,411
3
+ atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
4
+ atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
5
+ atlas_schema/methods.py,sha256=K7u6HGKXrtpMg7jjCjKPwIEnknOShUH4HQ1ibKBzkZ0,6832
6
+ atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ atlas_schema/schema.py,sha256=s3bcSa5DH5iILPD_2BD3co8MSTsXNs1rYBmn44388Kc,8082
8
+ atlas_schema/typing_compat.py,sha256=RwkxiiYbXO9yxkeaL8CdRaOHH7wq6vO_epg1YD7RbRs,439
9
+ atlas_schema/utils.py,sha256=spk7KIMBbXSPpZBTltyxaHWvyitkEGSVldfuKFoyavk,2137
10
+ atlas_schema-0.2.3.dist-info/METADATA,sha256=xtXdXa-9ra8TTtZSXLycUKPvBLPzXCIPa5cueuv0w90,20107
11
+ atlas_schema-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ atlas_schema-0.2.3.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
13
+ atlas_schema-0.2.3.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- atlas_schema/__init__.py,sha256=mwY_EsW32pdZxihzpAg_enYPl7S-_d27idXKIlYvVqE,425
2
- atlas_schema/_version.py,sha256=MxUhzLJIZQfEpDTTcKSxciTGrMLd5v2VmMlHa2HGeo0,411
3
- atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
4
- atlas_schema/enums.py,sha256=a44N1UbUA4K1y6wzK7zBUxbw4xT02J7vLeSqT03dFaU,2941
5
- atlas_schema/methods.py,sha256=K7u6HGKXrtpMg7jjCjKPwIEnknOShUH4HQ1ibKBzkZ0,6832
6
- atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- atlas_schema/schema.py,sha256=YRVaiDa5Evl2HZ9CzH23d0-TLkvxqyvFQhn0ixyWCcw,7668
8
- atlas_schema/typing_compat.py,sha256=RwkxiiYbXO9yxkeaL8CdRaOHH7wq6vO_epg1YD7RbRs,439
9
- atlas_schema-0.2.1.dist-info/METADATA,sha256=KFUTH5W2HUx8wJOzznaalEqLc9frqeMcbPJ1TmNyTYc,16662
10
- atlas_schema-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
- atlas_schema-0.2.1.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
12
- atlas_schema-0.2.1.dist-info/RECORD,,