atlas-schema 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
atlas_schema/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.2.2'
16
- __version_tuple__ = version_tuple = (0, 2, 2)
15
+ __version__ = version = '0.2.3'
16
+ __version_tuple__ = version_tuple = (0, 2, 3)
atlas_schema/schema.py CHANGED
@@ -49,7 +49,7 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
49
49
  }
50
50
 
51
51
  # These are stored as length-1 vectors unnecessarily
52
- singletons: ClassVar[list[str]] = []
52
+ singletons: ClassVar[set[str]] = set()
53
53
 
54
54
  docstrings: ClassVar[dict[str, str]] = {
55
55
  "charge": "charge",
@@ -127,8 +127,8 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
127
127
 
128
128
  output = {}
129
129
 
130
- # first, register the event-level stuff directly
131
- for name in self.event_ids:
130
+ # first, register singletons (event-level, others)
131
+ for name in {*self.event_ids, *self.singletons}:
132
132
  if name in missing_event_ids:
133
133
  continue
134
134
  output[name] = branch_forms[name]
@@ -163,7 +163,17 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
163
163
  }
164
164
  )
165
165
 
166
- output[name] = zip_forms(content, name, record_name=mixin)
166
+ if not used and not content:
167
+ warnings.warn(
168
+ f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly.",
169
+ RuntimeWarning,
170
+ stacklevel=2,
171
+ )
172
+ self.singletons.add(name)
173
+ output[name] = branch_forms[name]
174
+
175
+ else:
176
+ output[name] = zip_forms(content, name, record_name=mixin)
167
177
 
168
178
  output[name].setdefault("parameters", {})
169
179
  output[name]["parameters"].update({"collection_name": name})
@@ -174,6 +184,9 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
174
184
  elif output[name]["class"] == "RecordArray":
175
185
  parameters = output[name]["fields"]
176
186
  contents = output[name]["contents"]
187
+ elif output[name]["class"] == "NumpyArray":
188
+ # these are singletons that we just pass through
189
+ continue
177
190
  else:
178
191
  msg = f"Unhandled class {output[name]['class']}"
179
192
  raise RuntimeError(msg)
@@ -191,11 +204,6 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
191
204
  ),
192
205
  )
193
206
 
194
- if name in self.singletons:
195
- # flatten! this 'promotes' the content of an inner dimension
196
- # upwards, effectively hiding one nested dimension
197
- output[name] = output[name]["content"]
198
-
199
207
  return output.keys(), output.values()
200
208
 
201
209
  @classmethod
atlas_schema/utils.py CHANGED
@@ -10,30 +10,40 @@ Array = TypeVar("Array", bound=Union[dak.Array, ak.Array])
10
10
  _E = TypeVar("_E", bound=Enum)
11
11
 
12
12
 
13
- def isin(haystack: Array, needles: dak.Array | ak.Array, axis: int = -1) -> Array:
13
+ def isin(element: Array, test_elements: dak.Array | ak.Array, axis: int = -1) -> Array:
14
14
  """
15
- Find needles in haystack.
15
+ Find test_elements in element. Similar in API as :func:`numpy.isin`.
16
16
 
17
- This works by first transforming needles to an array with one more
18
- dimension than the haystack, placing the needles at axis, and then doing a
17
+ Calculates `element in test_elements`, broadcasting over *element elements only*. Returns a boolean array of the same shape as *element* that is `True` where an element of *element* is in *test_elements* and `False` otherwise.
18
+
19
+ This works by first transforming *test_elements* to an array with one more
20
+ dimension than the *element*, placing the *test_elements* at *axis*, and then doing a
19
21
  comparison.
20
22
 
21
23
  Args:
22
- haystack (dak.Array or ak.Array): haystack of values.
23
- needles (dak.Array or ak.Array): one-dimensional set of needles to find in haystack.
24
+ element (dak.Array or ak.Array): input array of values.
25
+ test_elements (dak.Array or ak.Array): one-dimensional set of values against which to test each value of *element*.
24
26
  axis (int): the axis along which the comparison is performed
25
27
 
26
28
  Returns:
27
- dak.Array or ak.Array: result of comparison for needles in haystack
29
+ dak.Array or ak.Array: result of comparison for test_elements in *element*
30
+
31
+ Example:
32
+ >>> import awkward as ak
33
+ >>> import atlas_schema as ats
34
+ >>> truth_origins = ak.Array([[1, 2, 3], [4], [5, 6, 7], [1]])
35
+ >>> prompt_origins = ak.Array([1, 2, 7])
36
+ >>> ats.isin(truth_origins, prompt_origins).to_list()
37
+ [[True, True, False], [False], [False, False, True], [True]]
28
38
  """
29
- assert needles.ndim == 1, "Needles must be one-dimensional"
39
+ assert test_elements.ndim == 1, "test_elements must be one-dimensional"
30
40
  assert axis >= -1, "axis must be -1 or positive-valued"
31
- assert axis < haystack.ndim + 1, "axis too large for the haystack"
41
+ assert axis < element.ndim + 1, "axis too large for the element"
32
42
 
33
- # First, build up the transformation, with slice(None) indicating where to stick the needles
34
- reshaper: list[None | slice] = [None] * haystack.ndim
35
- axis = haystack.ndim if axis == -1 else axis
43
+ # First, build up the transformation, with slice(None) indicating where to stick the test_elements
44
+ reshaper: list[None | slice] = [None] * element.ndim
45
+ axis = element.ndim if axis == -1 else axis
36
46
  reshaper.insert(axis, slice(None))
37
47
 
38
48
  # Note: reshaper needs to be a tuple for indexing purposes
39
- return cast(Array, ak.any(haystack == needles[tuple(reshaper)], axis=-1))
49
+ return cast(Array, ak.any(element == test_elements[tuple(reshaper)], axis=-1))
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlas-schema
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Helper python package for ATLAS Common NTuple Analysis work.
5
5
  Project-URL: Homepage, https://github.com/scipp-atlas/atlas-schema
6
6
  Project-URL: Bug Tracker, https://github.com/scipp-atlas/atlas-schema/issues
7
7
  Project-URL: Discussions, https://github.com/scipp-atlas/atlas-schema/discussions
8
- Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.2.2/
8
+ Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.2.3/
9
9
  Project-URL: Releases, https://github.com/scipp-atlas/atlas-schema/releases
10
10
  Project-URL: Release Notes, https://atlas-schema.readthedocs.io/en/latest/history.html
11
11
  Author-email: Giordon Stark <kratsg@gmail.com>
@@ -251,7 +251,7 @@ Requires-Dist: tbump>=6.7.0; extra == 'test'
251
251
  Requires-Dist: twine; extra == 'test'
252
252
  Description-Content-Type: text/markdown
253
253
 
254
- # atlas-schema v0.2.2
254
+ # atlas-schema v0.2.3
255
255
 
256
256
  [![Actions Status][actions-badge]][actions-link]
257
257
  [![Documentation Status][rtd-badge]][rtd-link]
@@ -279,6 +279,129 @@ Description-Content-Type: text/markdown
279
279
 
280
280
  <!-- prettier-ignore-end -->
281
281
 
282
+ This is the python package containing schemas and helper functions enabling
283
+ analyzers to work with ATLAS datasets (Monte Carlo and Data), using
284
+ [coffea](https://coffea-hep.readthedocs.io/en/latest/).
285
+
286
+ ## Hello World
287
+
288
+ The simplest example is to just get started processing the file as expected:
289
+
290
+ ```python
291
+ from atlas_schema.schema import NtupleSchema
292
+ from coffea import dataset_tools
293
+ import awkward as ak
294
+
295
+ fileset = {"ttbar": {"files": {"path/to/ttbar.root": "tree_name"}}}
296
+ samples, report = dataset_tools.preprocess(fileset)
297
+
298
+
299
+ def noop(events):
300
+ return ak.fields(events)
301
+
302
+
303
+ fields = dataset_tools.apply_to_fileset(noop, samples, schemaclass=NtupleSchema)
304
+ print(fields)
305
+ ```
306
+
307
+ which produces something similar to
308
+
309
+ ```python
310
+ {
311
+ "ttbar": [
312
+ "dataTakingYear",
313
+ "mcChannelNumber",
314
+ "runNumber",
315
+ "eventNumber",
316
+ "lumiBlock",
317
+ "actualInteractionsPerCrossing",
318
+ "averageInteractionsPerCrossing",
319
+ "truthjet",
320
+ "PileupWeight",
321
+ "RandomRunNumber",
322
+ "met",
323
+ "recojet",
324
+ "truth",
325
+ "generatorWeight",
326
+ "beamSpotWeight",
327
+ "trigPassed",
328
+ "jvt",
329
+ ]
330
+ }
331
+ ```
332
+
333
+ However, a more involved example to apply a selection and fill a histogram looks
334
+ like below:
335
+
336
+ ```python
337
+ import awkward as ak
338
+ import dask
339
+ import hist.dask as had
340
+ import matplotlib.pyplot as plt
341
+ from coffea import processor
342
+ from coffea.nanoevents import NanoEventsFactory
343
+ from distributed import Client
344
+
345
+ from atlas_schema.schema import NtupleSchema
346
+
347
+
348
+ class MyFirstProcessor(processor.ProcessorABC):
349
+ def __init__(self):
350
+ pass
351
+
352
+ def process(self, events):
353
+ dataset = events.metadata["dataset"]
354
+ h_ph_pt = (
355
+ had.Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
356
+ .Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
357
+ .Int64()
358
+ )
359
+
360
+ cut = ak.all(events.ph.isEM, axis=1)
361
+ h_ph_pt.fill(isEM="all", pt=ak.firsts(events.ph.pt / 1.0e3))
362
+ h_ph_pt.fill(isEM="pass", pt=ak.firsts(events[cut].ph.pt / 1.0e3))
363
+ h_ph_pt.fill(isEM="fail", pt=ak.firsts(events[~cut].ph.pt / 1.0e3))
364
+
365
+ return {
366
+ dataset: {
367
+ "entries": ak.num(events, axis=0),
368
+ "ph_pt": h_ph_pt,
369
+ }
370
+ }
371
+
372
+ def postprocess(self, accumulator):
373
+ pass
374
+
375
+
376
+ if __name__ == "__main__":
377
+ client = Client()
378
+
379
+ fname = "ntuple.root"
380
+ events = NanoEventsFactory.from_root(
381
+ {fname: "analysis"},
382
+ schemaclass=NtupleSchema,
383
+ metadata={"dataset": "700352.Zqqgamma.mc20d.v1"},
384
+ ).events()
385
+
386
+ p = MyFirstProcessor()
387
+ out = p.process(events)
388
+ (computed,) = dask.compute(out)
389
+ print(computed)
390
+
391
+ fig, ax = plt.subplots()
392
+ computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
393
+ ax.set_xscale("log")
394
+ ax.legend(title="Photon pT for Zqqgamma")
395
+
396
+ fig.savefig("ph_pt.pdf")
397
+ ```
398
+
399
+ which produces
400
+
401
+ <img src="https://raw.githubusercontent.com/scipp-atlas/atlas-schema/main/docs/_static/img/ph_pt.png" alt="three stacked histograms of photon pT, with each stack corresponding to: no selection, requiring the isEM flag, and inverting the isEM requirement" width="500" style="display: block; margin-left: auto; margin-right: auto;">
402
+
403
+ <!-- SPHINX-END -->
404
+
282
405
  ## Developer Notes
283
406
 
284
407
  ### Converting Enums from C++ to Python
@@ -1,13 +1,13 @@
1
1
  atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
2
- atlas_schema/_version.py,sha256=RrHB9KG1O3GPm--rbTedqmZbdDrbgeRLXBmT4OBUqqI,411
2
+ atlas_schema/_version.py,sha256=AaQEeqeDwmZAHoPuwg2C0ulADePbIYLSFanZzt0cytQ,411
3
3
  atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
4
4
  atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
5
5
  atlas_schema/methods.py,sha256=K7u6HGKXrtpMg7jjCjKPwIEnknOShUH4HQ1ibKBzkZ0,6832
6
6
  atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- atlas_schema/schema.py,sha256=YRVaiDa5Evl2HZ9CzH23d0-TLkvxqyvFQhn0ixyWCcw,7668
7
+ atlas_schema/schema.py,sha256=s3bcSa5DH5iILPD_2BD3co8MSTsXNs1rYBmn44388Kc,8082
8
8
  atlas_schema/typing_compat.py,sha256=RwkxiiYbXO9yxkeaL8CdRaOHH7wq6vO_epg1YD7RbRs,439
9
- atlas_schema/utils.py,sha256=Oe2G3pe009Uhawsdk9e0MuqOHbAa5vZ8F2F9pOmz_Ok,1442
10
- atlas_schema-0.2.2.dist-info/METADATA,sha256=QeHezHbhZY-hA2xdVlrQNeZN2OSCA8hn24jzoMUZDX8,16823
11
- atlas_schema-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- atlas_schema-0.2.2.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
13
- atlas_schema-0.2.2.dist-info/RECORD,,
9
+ atlas_schema/utils.py,sha256=spk7KIMBbXSPpZBTltyxaHWvyitkEGSVldfuKFoyavk,2137
10
+ atlas_schema-0.2.3.dist-info/METADATA,sha256=xtXdXa-9ra8TTtZSXLycUKPvBLPzXCIPa5cueuv0w90,20107
11
+ atlas_schema-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ atlas_schema-0.2.3.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
13
+ atlas_schema-0.2.3.dist-info/RECORD,,